rhoR coverage - 99.23%

Files
Source

File	Lines	Relevant	Covered	Missed	Hits / Line	Coverage
R/genPcombo.R	21	4	0	4	0	0.00%
src/rhor.cpp	470	127	127	0	251651	100.00%
R/utils.R	138	60	60	0	10	100.00%
R/createSimulatedCodeSet.R	77	39	39	0	8	100.00%
R/getHandSet.R	94	24	24	0	14261	100.00%
R/rhoK.R	62	21	21	0	29	100.00%
R/rho.R	85	21	21	0	5	100.00%
R/rhoCT.R	52	18	18	0	3	100.00%
R/prset.R	36	17	17	0	14970	100.00%
R/generateKPs.R	34	16	16	0	2493	100.00%
R/rhoSet.R	49	16	16	0	5	100.00%
R/getHandSetIndices.R	33	14	14	0	2	100.00%
R/calcRho.R	44	13	13	0	16813	100.00%
R/calcKappa.R	34	13	13	0	27580	100.00%
R/genPKcombo.R	23	12	12	0	16111	100.00%
R/rho.file.R	46	11	11	0	2	100.00%
R/getTestSet.R	34	11	11	0	2	100.00%
R/rhoMin.R	39	10	10	0	4	100.00%
R/createRandomSet.R	27	8	8	0	47	100.00%
R/calculations.R	15	6	6	0	8	100.00%
R/kappaCT.R	24	6	6	0	18	100.00%
R/contingencyToSet.R	11	6	6	0	31	100.00%
R/baserateCT.R	27	5	5	0	18	100.00%
R/baserateSet.R	24	4	4	0	11	100.00%
R/checkBRPKcombo.R	18	4	4	0	30356	100.00%
R/getBootPvalue.R	19	4	4	0	104	100.00%
R/kappaSet.R	23	3	3	0	47	100.00%
R/kappa.R	28	3	3	0	41	100.00%
R/getR.R	15	3	3	0	36355	100.00%
R/baserate.R	36	3	3	0	5	100.00%

###
#' @title Calculate kappa (Set)
#' 
#' @description 
#' This function calculates Cohen's kappa for a given \code{\link{codeSet}}. Called by \code{\link{kappa}}.
#' 
#' @export
#' @param set A \code{\link{codeSet}}
#' 
#' @seealso \code{\link{kappa}} and \code{\link{kappaCT}}
#' 
#' @return 
#' The kappa of the \code{\link{codeSet}}
#' 
###
kappaSet = function(set){
  if(any(set != 1 & set != 0)) {
    stop("Each set must only consist of zeros and ones.")
  }
  
  return(calcKappa(set,isSet = TRUE,kappaThreshold = NULL));
}


###
# Create A Random Set
#
# This creates a set with given parameters
# @param setLength The length of the set to be created
# @param baserate The baserate of the set to be created
# @param kappa The kappa of the set to be created
# @param minPrecision The minimum precision accepted in this set. Defaults to 0
# @param maxPrecision The maximum precision accepted in this set. Defaults to 1
# @return This returns a set with the given parameters
###
createRandomSet = function(setLength, baserate, kappaMin, kappaMax, minPrecision = 0, maxPrecision = 1, type = "set"){
  #generates a valid precision with the given kappa and baserate
  KP = generateKPs(1, baserate, kappaMin, kappaMax, minPrecision, maxPrecision)[[1]]
  kappa = KP$kappa
  precision = KP$precision
  recall = getR(kappa, baserate, precision)
  
  if(type == "ct") {
    fullSet = pr.ct(precision = precision, recall = recall, length = setLength, baserate = baserate);
  } 
  else {
    fullSet = prset(precision = precision, recall = recall, length = setLength, baserate = baserate);
  }
  
  return(fullSet);
}

###
# Precision Recall Set
#
# This creates a set with a given recall and precision
# @param precision This is the precision of the given set
# @param recall This is the recall of the given set
# @param length This is the length of the set
# @param baserate This is the baserate of the set
# @return returns a set with the given parameters
###
prset = function(precision, recall, length, baserate) {
  #generates a set given the different 4 quadrants
  gold1s = max(round(baserate * length , 0), 1); # TP + FN
  gold0s = length - gold1s; # FP + FN
  TP = max(round(gold1s * recall , 0) , 1); # TP
  FP = min(gold0s, max(round(TP * (1 - precision) / precision , 0), 1)); # FP
  gold = c(rep(1, gold1s), rep(0, gold0s));
  silver = c(rep(1, TP),rep(0, gold1s - TP), rep(1, FP), rep(0, gold0s - FP));

  return(cbind(gold, silver));
}

pr.ct = function(precision, recall, length, baserate) {
  #generates a set given the different 4 quadrants
  gold1s = max(round(baserate * length , 0), 1); # TP + FN
  gold0s = length - gold1s; # FP + FN
  TP = max(round(gold1s * recall , 0) , 1); # TP
  FP = min(gold0s, max(round(TP * (1 - precision) / precision , 0), 1)); # FP
  
  ct = matrix(0, ncol = 2, nrow = 2)
  ct[1,1] = TP
  ct[1,2] = gold1s - TP
  ct[2,1] = FP
  ct[2,2] = gold0s - FP
  return(ct);
}

any_equal <- function(target, current, tolerance = sqrt(.Machine$double.eps)) {
  any(sapply(target, function(targ) abs(current - targ) < tolerance ))
}
all_equal <- function(target, current, tolerance = sqrt(.Machine$double.eps)) {
  all(sapply(target, function(targ) abs(current - targ) < tolerance ))
}


#' Convert a codeset to a contingency table
#'
#' @param x codeset 
#'
#' @return contingency table as a 2x2 matrix
#' 
#' @export
as.contingency.table <- function(x) {
  y <- NULL
  
  if ( nrow(x) > 2 && ncol(x) == 2 ) {
    tp = length(which(x[,1] == 1 & x[,2] == 1))
    tn = length(which(x[,1] == 0 & x[,2] == 0))
    fp = length(which(x[,1] == 0 & x[,2] == 1))
    fn = length(which(x[,1] == 1 & x[,2] == 0))
    
    y <- matrix(
      c(tp, fp, fn, tn), 
      ncol = 2, nrow = 2
    )
  }
  else {
    y <- x
  }
  
  dimnames(y) = list(c("first_positive", "first_negative"), c("second_positive", "second_negative"))
  class(y) <- c("contingency.table", "rating.set", class(y))
  attr(y, "agreement") <- list(
    "true_positive" = y[1, 1],
    "false_negative" = y[1, 2],
    "false_positive" = y[2, 1],
    "true_negative" = y[2, 2]
  )
  attr(y, "baserate") <- baserateCT(y)
  attr(y, "kappa") <- kappaCT(y)
  
  return(y)
}

#' Convert codeset to contingency table
#'
#' @param x matrix contingency table (2x2)
#'
#' @return 2-column matrix representation of the contingency table
#' 
#' @export
as.code.set <- function(x) {
  y <- NULL
  if (all(dim(x) == c(2,2))) {
    y <- matrix(
      c(rep(1, x[1,1]*2), rep(1:0, x[1,2]), rep(0:1, x[2,1]), rep(0, x[2,2]*2)),
      byrow = TRUE,
      ncol = 2
    )
  } 
  else {
    y <- x
  }
  
  class(y) <- c("code.set", "rating.set", class(y))
  attr(y, "baserate") <- baserateSet(y)
  attr(y, "kappa") <- kappaSet(y)
  colnames(y) <- c("first_rater", "second_rater")
  
  return(y)
}

#' Helper function to return special values on a rating set
#' 
#' @param x Set or Contingency.Table
#'
#' @param i Value to search for
#'
#' @export
"$.rating.set" <- function (x, i) {
  if (i %in% names(attributes(x))) {
    return(attr(x, i))
  }
  else {
    if (is(x, "code.set")) {
      if (i %in% colnames(x)) {
        return(x[, which(colnames(x) == i)])
      }
    }
    else if (is(x, "contingency.table")) {
      if (i %in% colnames(x)) {
        return(sum(x[, which(colnames(x) == i)]))
      }
      else if (i %in% rownames(x)) {
        return(sum(x[which(rownames(x) == i), ]))
      }
    }
  }
  
  return(NULL)
}

#' @export
.DollarNames.rating.set <- function(x, pattern) {
  vals <- .codegen_dollar(x, pattern)
  
  cols <- colnames(x)
  if (!is.null(cols)) {
    vals <- c(cols, vals)
  }

  rows <- rownames(x)
  if (!is.null(rows)) {
    vals <- c(rows, vals)
  }
  
  vals
}

.codegen_dollar <- function(x, pattern = "") {
  attrs <- names(attributes(x))
  vals <- attrs[attrs %in% c("baserate", "kappa", "agreement")]
  return(vals)
}

#' @export
print.rating.set <- function(x, ...) {
  y <- unclass(x)

  for(a in additional_attributes) {
    attr(y, a) <- NULL
  }
  
  print(y, ...)
}

###
#' @title Calculate Baserate (Set)
#' 
#' @description
#' This function will calculate the baserate of the first rater, second rater, and the average of both the raters. Called by \code{\link{baserate}}.
#' 
#' @param set The \code{\link{codeSet}} for which the baserate is calculated
#' 
#' @seealso \code{\link{baserate}} and \code{\link{baserateCT}}
#' 
#' @export
#' @return A list of the format:\describe{
#' \item{firstBaserate}{The percentage that a positive code, or a 1, appears in the first rater}
#' \item{secondBaserate}{The percentage that a positive code, or a 1, appears in the second rater}
#' \item{averageBaserate}{The average percentage that a positive code, or a 1, appears in either of the two raters}
#' }
#' 
###
baserateSet = function(set){
  firstBaserate = length(which(set[,1] == 1))/nrow(set)
  secondBaserate = length(which(set[,2] == 1))/nrow(set)
  averageBaserate = mean(c(firstBaserate,secondBaserate))
  return(list(firstBaserate = firstBaserate, secondBaserate = secondBaserate, averageBaserate = averageBaserate))
}

rating_set_recall <- function (x) {
  if (!is(x, "contingency.table")) {
    x <- as.contingency.table(x)
  }
  
  x$agreement$true_positive / (x$agreement$true_positive + x$agreement$false_negative)
}

rating_set_precision <- function (x) {
  if (!is(x, "contingency.table")) {
    x <- as.contingency.table(x)
  }
  
  x$agreement$true_positive / (x$agreement$true_positive + x$agreement$false_positive)
}

genPKcombo = function(kappaDistribution, kappaProbability, precisionDistribution, precisionProbability, baserate){
  # currKappa = sample(kappaDistribution, 1, replace = FALSE, prob = kappaProbability)
  # currPrecision = sample(precisionDistribution, 1, replace = FALSE, prob = precisionProbability)
  currKappa = kappaDistribution[sample.int(length(kappaDistribution), 1, replace = FALSE, prob = kappaProbability)]
  currPrecision = precisionDistribution[sample.int(length(precisionDistribution), 1, replace = FALSE, prob = precisionProbability)]

  if(!checkBRPKcombo(baserate, currPrecision, currKappa)){
    precisionMin = (2*baserate*currKappa - 2*baserate - currKappa)/(currKappa - 2)
    indicies = which(precisionDistribution > precisionMin)
    
    if (length(indicies) == 0) {
      return(genPKcombo(kappaDistribution, kappaProbability, precisionDistribution, precisionProbability, baserate))
    }
    
    precisionDistribution = precisionDistribution[indicies]
    precisionProbability = precisionProbability[indicies]
    # currPrecision = sample(precisionDistribution, 1, replace = FALSE, prob = precisionProbability)
    currPrecision = precisionDistribution[sample.int(length(precisionDistribution), 1, replace = FALSE, prob = precisionProbability)]
    return(list(precision = currPrecision, kappa = currKappa))
  } else {
    return(list(precision = currPrecision, kappa = currKappa))
  }
}

###
#' @title Calculate kappa (contingency Table)
#' 
#' @description 
#' This function calculates Cohen's kappa on a \code{\link{contingencyTable}}. Called by \code{\link{kappa}}. 
#' 
#' @param ct A \code{\link{contingencyTable}}
#' 
#' @seealso \code{\link{kappa}} and \code{\link{kappaSet}}
#' 
#' @export
#' @return  The kappa of the \code{\link{contingencyTable}}
###
kappaCT = function(ct){
  if(any(ct < 0)){stop("Values in Contingency Table must be positive")}
  if(ncol(ct) != 2 || nrow(ct) != 2) stop("Incorrect number of dimensions: Contingency table must be 2x2.")
  for(i in 1:nrow(ct)) {
    for(j in 1:ncol(ct)) {
      if (ct[i,j]%%1 != 0) stop("Contingency table values must be positive integers.")
    }  
  }
  
  return(calcKappa(ct,isSet = FALSE,kappaThreshold = NULL));
}

###
#' @title Get Handset
#' @description This function is to get a handset of a set and calculate the kappa
#' @param set This is the set to take a handset of
#' @param handSetLength This is the length of the handset to take
#' @param handSetBaserate This is the minimum baserate to inflate the handset to
#' @param returnSet If TRUE, then return the handSet if FALSE, return the kappa of the handSet
#' 
#' @return The function returns the handSet if returnSet is TRUE or the kappa of the handSet if not
#' @export
###
getHandSet = function(set, handSetLength, handSetBaserate, returnSet=FALSE) {
  #positives is the minimum number of positive pairs in the first rater
  positives = ceiling(handSetLength * handSetBaserate);
  posInd = which(set[,1] == 1);

  if(positives > length(posInd)){stop("Not enough positives in first rater to inflate to this level")}
  #if there is an inflated baserate positives will be > 0, so if handSetBaserate > 0, positives > 0
  if (positives > 0) {
    #goes through and picks a set that fits the number of positives first then randomly samples from the rest
    #if the set created is not a valid set, then it reruns the process until a valid set is found
    positiveIndices = posInd[sample.int(length(posInd),size=positives,replace=FALSE)];
    others = set[!(1:nrow(set) %in% positiveIndices),];
    otherIndices = sample.int(nrow(others),size=(handSetLength - positives),replace=FALSE);
    this.set = rbind(set[positiveIndices,], others[otherIndices,]);
  } else if (positives == 0){
    #if there is no restriction on positives, then the set is generated by randomly sampling from the entire set
    #this.set=matrix(0,2,2);
    #while ((sum(this.set[,2]) == 0 | sum(this.set[,2]) == nrow(this.set))) {
      theseIndices = sample.int(nrow(set),size=handSetLength,replace=FALSE);
      this.set = set[theseIndices,];
    #}
  }

  #if return set is true return set rather than kappa
  if (returnSet) {
    this.set <- as.code.set(this.set);
    return(this.set);
  }

  #otherwise return the kappa of the set
  handSetKappa = calcKappa(this.set);
  
  return(handSetKappa);
}

###
#' @title Get Handset
#' @description This function is to get a handset of a set and calculate the kappa
#' 
#' @param full.ct This is the set to take a handset of
#' @param handSetLength This is the length of the handset to take
#' @param handSetBaserate This is the minimum baserate to inflate the handset to
#' @param as_kappa If FALSE then return the handSet, if TRUE (default) return the kappa of the handSet
#' 
#' @return The function returns the handSet if returnSet is TRUE or the kappa of the handSet if not
#' @export
###
getHandCT <- function(full.ct, handSetLength, handSetBaserate, as_kappa = TRUE) {
  positives <- ceiling(handSetLength * handSetBaserate)
  if (positives > sum(full.ct[1,])) {
    stop("Not enough positives in first rater to inflate to this level")
  }
  
  # positiveIndices <- sample_ct(full.ct[1,], positives)
  # 
  # other_ct <- full.ct
  # other_ct[1, ] <- other_ct[1, ] - positiveIndices
  # 
  # other_ct_vec <- as.vector(other_ct)
  # this_ct <- sample_ct(other_ct_vec,  handSetLength - positives)
  # this_ct[1, ] <- this_ct[1, ] + positiveIndices
  
  this_ct <- getHand_ct(ct = full.ct, handSetLength = handSetLength, handSetBaserate = handSetBaserate)

  if (!as_kappa) {
    this_ct <- as.contingency.table(this_ct);
    return(this_ct)
  } 

  return(kappa_ct(this_ct))
}

# sample_ct <- function(x, size) {
#   x_vec <- as.vector(x)
#   x_sampled <- matrix(rep(0, length(x)), ncol = length(x_vec) / 2)
#   for(i in seq(size)) {
#     s <- sample(seq(x_vec), 1, replace = F, prob = x_vec / sum(x_vec))
#     x_sampled[[s]] = x_sampled[[s]] + 1
#     x_vec[s] = x_vec[s] - 1
#   }
# 
#   return(x_sampled)
# }

###
#' @title Create Simulated codeSet
#' 
#' @description
#' Creates a simulated \code{\link{codeSet}} with the given parameters
#' 
#' @param length the length of the simulated \code{\link{codeSet}} to be created
#' @param baserate the \code{\link{baserate}} of the simulated \code{\link{codeSet}}
#' @param kappaMin the minimum kappa of the simulated \code{\link{codeSet}}
#' @param kappaMax the maximum kappa of the simulated \code{\link{codeSet}}
#' @param precisionMin the minimum precision of the simulated \code{\link{codeSet}}
#' @param precisionMax the maximum precision of the simulated \code{\link{codeSet}}
#' @param ... Parameters passed to createRandomSet (e.g. type = "set" or type = "ct")
#' @param tries the maximum number of tries to generate a valid set, smaller set lengths may require an increased number of tries
#' 
#' @details
#' \code{\link{codeSet}}s are generated by first picking a random kappa within its range and a random precision within its range.  If the random kappa, random precision, and baserate are not mathematically possible, then the precision is resampled from a range of mathematically possible values within its range.  A unique simulated \code{\link{codeSet}} is then constructed given these parameters.
#' 
#' @export
#' @return A \code{\link{codeSet}} that fulfills the given parameters
###
createSimulatedCodeSet = function(length, baserate, kappaMin, kappaMax, precisionMin, precisionMax, ..., tries = 50){
  if(length <= 0){stop("length must be positive")}
  if(baserate < 0){stop("baserate must be positive")}
  if(kappaMin < 0 | kappaMin > 1) stop("kappaMin must be between 0 and 1.")
  if(kappaMax < 0 | kappaMax > 1) stop("kappaMax must be between 0 and 1.")
  if(kappaMin > kappaMax){stop("kappaMin must be less than kappaMax")}
  if(precisionMin < 0 | precisionMin > 1) stop("precisionMin must be between 0 and 1.")
  if(precisionMax < 0 | precisionMax > 1) stop("precisionMax must be between 0 and 1.")
  if(precisionMin > precisionMax){stop("precisionMin must be less than precisionMax")}
  if(tries < 1){stop("tries must be greater than 1")}
  
  setFound = NULL
  validSet = F
  triesLeft = tries
  Ks = c()
  while(!validSet && triesLeft > 0) {
    set = createRandomSet(setLength = length, baserate = baserate, kappaMin = kappaMin, kappaMax = kappaMax, minPrecision = precisionMin, maxPrecision = precisionMax)
    k = kappa(set)
    
    if ((k > kappaMin && k < kappaMax) || any_equal(k, c(kappaMin, kappaMax))) {
      setFound = set
      validSet = T
    }
    
    Ks = c(Ks, k)
    triesLeft = triesLeft - 1
  }
  
  if (is.null(setFound)) {
    Ks_under_min <- Ks[which(Ks < kappaMin)]
    Kdist.to.min <- kappaMin - Ks_under_min
    Kdist.to.min.low <- ifelse(length(Ks_under_min) > 0, min(Kdist.to.min), NA)
    
    Ks_over_min  <- Ks[which(Ks > kappaMax)]
    Kdist.to.max <- Ks_over_min - kappaMax
    Kdist.to.max.low <- ifelse(length(Ks_over_min) > 0, min(Kdist.to.max), NA)
    
    Kdist.all_mins <- c(Kdist.to.min, Kdist.to.max)
    
    both_mins <- c(Kdist.to.min.low, Kdist.to.max.low)
    both_mins <- both_mins[!is.na(both_mins)]
    closest <- c(Ks_under_min, Ks_over_min) [which(Kdist.all_mins == min(both_mins))][1]
    
    stop(paste0(
      "Unable to create a set of the provided length (", length, ") ",
        "in the provided kappa range (", kappaMin, "\U2014", kappaMax, "). ",
        "The closest kappa obtained after ", tries, " tries was: ", 
        round(x = closest, digits = 5), "\n",
        "\tTry increasing the set length or the range of valid kappas"
    ))
  }
  
  return(setFound)
  
  # return(createRandomSet(setLength = length, baserate = baserate, kappaMin = kappaMin, kappaMax = kappaMax, minPrecision = precisionMin, maxPrecision = precisionMax, ...))
}

###
#' @title Rho using a file
#' 
#' @description 
#' This function calculates rho and kappa for a given \code{\link[=getTestSet]{testSet}} as defined by the file 
#' and columns (col1, col2), and returns a list containing both values. Called by \code{\link{rho}}.
#' 
#' @export
#' 
#' @param col1 The first column from file
#' @param col2 The second column from file
#' @template rho-params
#' 
#' @return A list of the format:\describe{
#' \item{rho}{The rho of the \code{\link{codeSet}}}
#' \item{kappa}{The Cohen's Kappa of the \code{\link{codeSet}}}
#' }
###
rho.file <- function(
  x, col1, col2, 
  OcSBaserate = NULL,
  testSetBaserateInflation = 0,
  OcSLength = 10000,
  replicates = 800,
  ScSKappaThreshold = 0.9,
  ScSKappaMin = .40,
  ScSPrecisionMin = 0.6,
  ScSPrecisionMax = 1
) {
  setFile = utils::read.csv(x)
  set = as.matrix(setFile[,c(col1, col2)])
  
  if (!any(is.na(set))) {
    rhoSet(
      set,
      OcSBaserate,
      testSetBaserateInflation, 
      OcSLength, replicates, 
      ScSKappaThreshold, ScSKappaMin, 
      ScSPrecisionMin, ScSPrecisionMax
    )
  }
  else {
    stop("The columns provided for col1 and col2 must not contain NA")
  }
}

generateKPs = function(
  numNeeded, baserate,
  kappaMin, kappaMax,
  precisionMin, precisionMax,
  distributionType = 'FLAT',
  distributionLength = 100000
) {
  kappaDistribution = seq(kappaMin, kappaMax, length = distributionLength)

  if (distributionType == 'FLAT') {
    #probability for a flat distribution
    kappaProbability = NULL
  }
  else if (distributionType == 'BELL') {
    #probability for a bell curve
    kappaProbability = stats::dnorm(kappaDistribution, mean = 0.9, sd = 0.1)
  }

  precisionDistribution = seq(precisionMin, precisionMax, length = 10000)
  precisionProbability = NULL

  KPs = tryCatch({
    KPs = list()
    for (i in 1:numNeeded) {
      KPs[[length(KPs) + 1]] = genPKcombo(kappaDistribution, kappaProbability, precisionDistribution, precisionProbability, baserate)
    }
    KPs
  },error=function(e){
    NULL
  })
  if(is.null(KPs)){stop("Could not generate a valid set given ranges of kappa and precision")}

  return (KPs);
}

###
#' @title Rho (contingency Table)
#' 
#' @description 
#' This function calculates rho and kappa for a given \code{\link{contingencyTable}}, and returns a list containing both values. Called by \code{\link{rho}}.
#' 
#' 
#' @template rho-params
#' 
#' @export
#' @return A list of the format:\describe{
#' \item{rho}{The rho of the \code{\link{contingencyTable}}}
#' \item{kappa}{The Cohen's Kappa of the \code{\link{contingencyTable}}}
#' }
###
rhoCT = function(
  x,
  OcSBaserate = NULL,
  testSetBaserateInflation = 0,
  OcSLength = 10000,
  replicates = 800,
  ScSKappaThreshold = 0.9,
  ScSKappaMin = .40,
  ScSPrecisionMin = 0.6,
  ScSPrecisionMax = 1
){
  if (any(x < 0)) {
    stop("Values in Contingency Table must be positive")
  }
  kappa = kappa_ct(x);
  
  if (is.null(OcSBaserate)) {
    OcSBaserate = (sum(x[1,])) / (sum(x))
  }
  
  rho_result <- rhoK(
    kappa, 
    OcSBaserate,
    sum(x),
    testSetBaserateInflation,
    OcSLength,
    replicates,
    ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax
  )
  
  return(list(
    rho = rho_result, 
    kappa = kappa,
    recall = rating_set_recall(x),
    precision = rating_set_precision(x)
  ))
}

###
#' @title Rho (kappa)
#' 
#' @description 
#' This function calculates rho for an observed kappa value with associated set parameters
#' (testSetLength and OcSBaserate). Called by \code{\link{rho}}. A p-value is returned and if 
#' this value is less than 0.05, it is said that the handset does generalize to the entire set
#' 
#' @template rho-params
#' @param testSetLength The length of the \code{\link[=getTestSet]{testSet}} (ignored unless \emph{data} is an observed kappa value)
#' @param method set to "c" to calculate using the C++ implmentation. Defaults to "standard"
#' 
#' @export
#' @return rho for the given parameters
###
rhoK = function(
  x, 
  OcSBaserate,
  testSetLength,
  testSetBaserateInflation = 0, 
  OcSLength = 10000, 
  replicates = 800, 
  ScSKappaThreshold = 0.9, 
  ScSKappaMin = 0.40, 
  ScSPrecisionMin = 0.6, 
  ScSPrecisionMax = 1,
  method = "standard"
) {
  if (
    (x > 1 | x < 0) || 
    (OcSBaserate > 1 | OcSBaserate < 0)
  ) stop("x and OcSBaserate must be between 0 and 1.")

  if (OcSBaserate < 0 || ScSKappaThreshold < 0 || ScSKappaMin < 0) 
    stop("OcSBaserate, ScSKappaThreshold, and ScSKappaMin must be positive.")

  if (
    testSetLength %% 1 != 0 ||
    OcSLength %% 1 != 0 ||
    replicates %% 1 != 0
  ) stop("testSetLength, OcSLength, and replicates values must be a positive integer.")

  if (ScSKappaThreshold < ScSKappaMin) 
    stop("ScSKappaThreshold must be greater than ScSKappaMin.")

  if (
    (ScSPrecisionMin < 0 | ScSPrecisionMin > 1) ||
    (ScSPrecisionMax < 0 | ScSPrecisionMax > 1)
  ) stop("ScSPrecisionMin and ScSPrecisionMax must be between 0 and 1.")

  if (ScSPrecisionMax < ScSPrecisionMin) 
    stop("ScSPrecisionMax must be greater than ScSPrecisionMin.")

  result <- NULL
  if (method == "standard") {
    result <- calcRho(  x, OcSBaserate, testSetLength, testSetBaserateInflation, OcSLength, replicates, ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax)
  }
  else {
    result <- calcRho_c(x, OcSBaserate, testSetLength, testSetBaserateInflation, OcSLength, replicates, ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax)
  }
  return(result)
}

#' Generate a Handset
#' 
#' @description Generate a vector representing indices of set, using the handSetBaserate
#' to determine the minimum number of indices that are positive
#' 
#' @param set matrix of two columns
#' @param handSetLength number of indices to find
#' @param handSetBaserate number between 0 and 1 to use as a minimum number of positive indices
#'
#' @return vector of indices from set
#' @export
getHandSetIndices = function(set, handSetLength = 20, handSetBaserate = 0.2) {
  positives = ceiling(handSetLength * handSetBaserate);
  posInd = which(set[, 1] == 1);
  
  if (positives > length(posInd)) {
    stop("Not enough positives in first rater to inflate to this level")
  }
  
  this.set = NULL
  if (positives > 0) {
    positiveIndices = posInd[sample.int(length(posInd), size=positives, replace=FALSE)]
    others = which(!(1:nrow(set) %in% positiveIndices))
    otherIndices = sample.int(length(others), size = (handSetLength - positives), replace = FALSE)
    this.set = c(positiveIndices, others[otherIndices])
  }
  else if (positives == 0) {
    theseIndices = sample.int(length(set),size=handSetLength,replace=FALSE)
    this.set = theseIndices
  }
  
  return(sample(this.set))
}

###
# This function calculates rho given the parameters of the initial set and the kappa of the observed handset and is
# called by rhoK()
###
calcRho = function(
  x, 
  OcSBaserate, 
  testSetLength, 
  testSetBaserateInflation = 0,
  OcSLength = 10000, 
  replicates = 800, 
  ScSKappaThreshold = 0.9, 
  ScSKappaMin = 0.40, 
  ScSPrecisionMin = 0.6, 
  ScSPrecisionMax = 1, 
  KPs = NULL
) {
  
  if(is.null(KPs)) {
    KPs = generateKPs(replicates, OcSBaserate, ScSKappaMin, ScSKappaThreshold, ScSPrecisionMin, ScSPrecisionMax)
  }
  if(length(KPs) < replicates) {
    replicates = length(KPs)
  }
  
  #creates distribution from random kappa sets
  savedKappas = rep(0,replicates);
  
  # cat("Runs: ", runs, "\n")
  # cat("KPs length:, ", length(KPs), "\n")
  for (i in 1:replicates) {
    KP = KPs[i]
    kappa = KP[[1]]$kappa
    precision = KP[[1]]$precision
    recall = getR(kappa, OcSBaserate, precision)
    fullSet = prset(precision = precision, recall = recall, length = OcSLength, baserate = OcSBaserate);
    
    #computes kappa of handset to add to distribution
    savedKappas[i] = getHandSet(set = fullSet, handSetLength = testSetLength, handSetBaserate = testSetBaserateInflation);
  }

  # compare observedKappa to distribution
  return(getBootPvalue(savedKappas, x));
}

###
#' @title Calculate kappa
#'
#' @description
#' This function calculates Cohen's kappa on a \code{\link{contingencyTable}} or a \code{\link{codeSet}}
#'
#' @param data A \code{\link{contingencyTable}} or a \code{\link{codeSet}}
#'
#'
#' @seealso \code{\link{kappaSet}} and \code{\link{kappaCT}}
#'
#' @examples
#' #Given a code set
#' kappa(data = codeSet)
#'
#' #Given a contingency Table
#' kappa(data = contingencyTable)
#'
#' @export
#' @return The kappa of the \code{\link{contingencyTable}} or \code{\link{codeSet}}
###
kappa <- function(data) {
  if (all(dim(data) == 2)) {
    return(kappaCT(data))
  } else {
    return(kappaSet(data))
  }
}

###
# Get Recall
#
# This gets the recall from a pair
# @param kappa This is the kappa of the pair
# @param BR This is the baserate of the pair
# @param P this is the precision of the pair
# @return returns the recall given the parameters
###
getR = function(kappa, BR, P){
  #gets recall that corresponds with the kappa, baserate, and precision
  top = kappa*P;
  R = top / (2*P-2*BR-kappa+2*BR*kappa);
  return (R);
}

###
#' @title Rho Min
#' 
#' @description 
#' This function calculates the minimum testSetLength where it is possible to get a rho less than alpha for the given parameters of rho.
#' 
#' @param baserate A \code{\link{baserate}}
#' @param alpha The threshold of significance for rho (similar to an alpha level for a p value), defaulted to 0.05
#' @param inc An integer indicating by how much the testSetLength should increase each iteration
#' @param printInc A boolean indicating whether to print out each increment value with it's corresponding significance for rho
#' @param ... Any additional parameters passed into \code{\link{rho}}
#' 
#' @examples
#' #Add testSetBaserateInflation as an additional parameter
#' rhoMin(0.2, testSetBaserateInflation = 0.33)
#' 
#' #Add testSetBaserateInflation as well as changing inc and selecting printInc
#' rhoMin(0.2, inc = 5, printInc = TRUE, testSetBaserateInflation = 0.33)
#' 
#' @export
#' @return  The minimum length of testSet, to the nearest multiple of inc, greater than the minimum length, that would give a value where rho less than alpha becomes mathematically possible.
###
rhoMin = function(baserate, alpha = 0.05, inc = 10, printInc = FALSE, ...){
  rhoVal = 1
  testSetLength = 0
  if(inc%%1 != 0 | inc <= 0) stop("Inc value must be a positive integer.")
  if(alpha > 1 | alpha < 0) stop("Alpha must be between 0 and 1.")
  
  
  while(rhoVal > alpha){
    testSetLength = testSetLength + inc
    rhoVal = rho(1, baserate, testSetLength, ...)
    if(printInc) {
      cat(testSetLength, rhoVal,"\n")
    }
  }
  return(testSetLength)
  
}

###
# Check Baserate, Precision, Kappa Combo
#
# This function checks to make sure the combination of BR, P, and K will create a valid set
# @param BR This is the supplied baserate
# @param P This is the supplied precision
# @param K This is the supplied kappa
# @return returns TRUE if the combination is valid, FALSE otherwise
###
checkBRPKcombo = function(BR, P, K) {
  #if right is less than P, then it is a valid combination. If not, then the pair of kappa, precision, and baserate does not correspond to a valid set
  right = (2*BR*K - 2*BR - K)/(K - 2);
  if(P > right){
    return(TRUE);
  }else{
    return(FALSE);
  }
}

###
# Calculate Kappa
#
# This function calculates kappa given a set
# @param set This is the set to calculate kappa from
# @param isSet TRUE if set FALSE if contigency table
# @param kappaThreshold if null then return kappa otherwise return list of kappa and above or not
# @return This returns the kappa of the set given
###
calcKappa <- function(set, isSet = TRUE, kappaThreshold = NULL) {
  if (!isSet) {
    set <- contingencyToSet(set[1, 1], set[2, 1], set[1, 2], set[2, 2])
  }

  if (
    length(which(set[, 1] == set[, 2] & set[, 1] == 1)) == nrow(set) |
    length(which(set[, 1] == set[, 2] & set[, 1] == 0)) == nrow(set)
  ) {
    return(1)
  }

  #calculates kappa by calculating the agreement and the baserates and then creating the adjacenty matrix
  agreement <- length(which(set[, 1] == set[, 2])) / nrow(set);
  baserate2 <- length(which(1 == set[, 2])) / nrow(set);
  baserate1 <- length(which(1 == set[, 1])) / nrow(set);
  randomAgreement <- baserate1 * baserate2 + (1 - baserate1) * (1 - baserate2);
  kappa <- (agreement - randomAgreement) / (1 - randomAgreement);

  if (is.null(kappaThreshold)) {
    return(kappa);
  } else {
    return(list(kappa = kappa, above = kappa > kappaThreshold))
  }
}

###
#' @title Calculate Baserate
#' 
#' @description
#' This function calculates the baserate of the first rater, second rater, and the average of both the raters.
#' 
#' @details
#' A baserate is the percentage, as a decimal, that a positive code appears in data (either a \code{\link{codeSet}} or \code{\link{contingencyTable}}) for a given rater.  It is assumed that the first rater is more experienced and thus provides a better estimation of the actual baserate for a given code, so the first rater's baserate is often used as if it is the actual baserate.  If the raters are assumed to have the same experience level, the average baserate may give a better estimation.  If the second rater is more experienced, the second rater's baserate may give a better estimation.  Functions assume that the first rater is the more experienced rater and thus uses the first rater's baserate as the overall baserate estimation.
#' 
#' 
#' @param data The \code{\link[=getTestSet]{testSet}} or \code{\link{contingencyTable}} for which the baserate is calculatede
#' 
#' @seealso \code{\link{baserateSet}} and \code{\link{baserateCT}}
#'
#' @examples
#' #Given a code set
#' baserate(data = codeSet)
#' 
#' #Given a contingency Table
#' baserate(data = contingencyTable)
#'
#' @export
#' @return A list of the format:\describe{
#' \item{firstBaserate}{The percentage of the data for which a positive code, or a 1, appears in the first rater}
#' \item{secondBaserate}{The percentage of the data for which a positive code, or a 1, appears in the second rater}
#' \item{averageBaserate}{The average of the firstBaserate and secondBaserate.}
#' }
#' 
###
baserate = function(data){
  if (nrow(data) != 2) {
    return(baserateSet(data))
  } else {
    return(baserateCT(data))
  }
}

###
# Generate Precision Combo
#
# This function generates a precision that fits within the range and will create a valid set
# @param kappa This is the kappa of the desired pair
# @param Pmin This is the minimum precision desired
# @param Pmax This is the maximum precision desired
# @param BR This is the baserate of the desired pair
# @return This returns a precision value that will generate a valid set with the other parameters
####

genPcombo = function(kappa, Pmin, Pmax, BR){
  #gets a random precision in the precision range
  currPrecision = stats::runif(1, Pmin, Pmax);
  #checks is the combination of precision, kappa, and baserate is valid.  If it is vaid, return the precision, if not, recall the function until a valid precision is found
  if(!checkBRPKcombo(BR, currPrecision, kappa)){
    genPcombo(kappa, Pmin, Pmax, BR);
  }else{
    return(currPrecision);
  }
}

###
#' @title Get Test Set
#' 
#' @description
#' This function gets a \emph{testSet} from a larger \code{\link{codeSet}} given certain sampling parameters.
#' 
#' @details
#' A \emph{testSet} is a \code{\link{codeSet}} that is a subset of a larger \code{\link{codeSet}} with a given set of properties.  A \emph{testSet} is constructed by sampling (without replacement) P rows from rows in the larger \code{\link{codeSet}} where the first rater's code was 1, and then appending an additional sample (without replacement) of R rows taken at random from the larger \code{\link{codeSet}} excluding rows included in the first P rows sampled. P is computed as the minbaserate * length of the \emph{testset}. R is computed as testSetLength - P. The result of this sampling procedure is to create a sample with a minimum baserate regardless of the baserate of the larger \code{\link{codeSet}}.If \emph{testSetBaserateInflation} is set to zero, the function selects rows at random.
#' 
#' @param set The \code{\link{codeSet}} from which the \emph{testSet} is taken
#' @param testSetLength The length of the \emph{testSet} to be taken
#' @param testSetBaserateInflation The minimum guaranteed \code{\link{baserate}} of the \emph{testSet}. Default to 0
#' 
#' 
#' @export
#' @return A \code{\link{codeSet}} with the properties specified
###

getTestSet = function(set, testSetLength, testSetBaserateInflation = 0){
  if(length(unlist(set)) < testSetLength){stop("testSetLength must be less than the length of set")}
  if(testSetLength %% 1 | testSetLength < 1) {stop("testSetLength value must be a positive integer.")}
  if(testSetBaserateInflation < 0){stop("testSetBaserateInfation must be positive")}
  if(testSetBaserateInflation >= 1){stop("testSetBaserateInflation must be below 1")}
  if(length(which(set[,1] == 1 & set[,2] == 1)) == nrow(set)){
    return(set[1:testSetLength,])
  }else if(length(which(set[,1] == 0 & set[,2] == 0)) == nrow(set)){
    if(testSetBaserateInflation == 0){
      return(set[1:testSetLength,])
    }else{
      stop("Not enough positives in first rater to inflate to this level")
    }
  }
  return(getHandSet(set = set, handSetLength = testSetLength, handSetBaserate = testSetBaserateInflation, returnSet = T))
}

###
#' @title Calculate Baserate (CT)
#' 
#' @description
#' This function calculates the baserate of the first rater, second rater, and the average of both the raters. Called by \code{\link{baserate}}.
#' 
#' @export
#' 
#' @param CT The \code{\link{contingencyTable}} for which the baserate is calculated
#' 
#' @seealso \code{\link{baserate}} and \code{\link{baserateSet}}
#' 
#' @return A list of the format:\describe{
#' \item{firstBaserate}{The percentage of the data for which a positive code, or a 1, appears in the first rater}
#' \item{secondBaserate}{The percentage of the data for which a positive code, or a 1, appears in the second rater}
#' \item{averageBaserate}{The average of the firstBaserate and secondBaserate.}
#' }
#' 
###

baserateCT = function(CT){
  if(any(CT < 0)){stop("Values in Contingency Table must be positive")}
  firstBaserate = sum(CT[1,])/sum(CT)
  secondBaserate = sum(CT[,1])/sum(CT)
  averageBaserate = mean(c(firstBaserate,secondBaserate))
  return(list(firstBaserate = firstBaserate, secondBaserate = secondBaserate, averageBaserate = averageBaserate))
}

###
# Get Boot P Value
#
# This function gets the p value of the result in the distribution
# @param distribution The distribution that the result falls into
# @param result The value that you are evaluating where in the distribution it falls
# @return This returns the p value from the result into the distribution
###
getBootPvalue = function(distribution, result) {
  #returns the percentage of the time that the distribution was greater or equal to the observed kappa
  N = length(distribution);
  if(result < mean(distribution, na.rm = T)) {
    #if the result is less than the mean of the distribution, than the p value is 1
    return(1);
  } else {
    #return the number of times that the distribution is greater than the result as a percentage of the total number of items in the distribution
    return(length(which(distribution >= result)) / N);
  }
}

###
#' @include rhoK.R
#' @title Rho (set)
#' 
#' @description 
#' This function calculates rho and kappa for a given \code{\link[=getTestSet]{testSet}}, and returns a list containing both values. Called by \code{\link{rho}}.
#' 
#' @export
#' 
#' @template rho-params
#' 
#' @return A list of the format:\describe{
#' \item{rho}{The rho of the \code{\link{codeSet}}}
#' \item{kappa}{The Cohen's Kappa of the \code{\link{codeSet}}}
#' }
###
rhoSet = function(
  x,
  OcSBaserate = NULL,
  testSetBaserateInflation = 0,
  OcSLength = 10000,
  replicates = 800,
  ScSKappaThreshold = 0.9,
  ScSKappaMin = 0.40,
  ScSPrecisionMin = 0.6,
  ScSPrecisionMax = 1
) {
  kappa = kappaSet(x);
  
  row.count = nrow(x);
  if(is.null(OcSBaserate)) {
    OcSBaserate = length(which(x[,1] == 1)) / row.count;
  }

  return(list(
    rho = rhoK(
      kappa, 
      OcSBaserate,
      row.count,
      testSetBaserateInflation,
      OcSLength,
      replicates,
      ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax
    ), 
    kappa = kappa,
    recall = rating_set_recall(x),
    precision = rating_set_precision(x)
  ))
}

### 
#' @title Rho
#' 
#' @description 
#' This function calculates rho for a \code{\link[=getTestSet]{testSet}}, \code{\link{contingencyTable}}, or an observed kappa value with associated set parameters (testSetLength and OcSBaserate). 
#' 
#' @details 
#' Rho is a Monte Carlo rejective method of interrater reliability statistics, implemented here for Cohen's Kappa. Rho constructs a collection of data sets in which kappa is below a specified threshold, and computes the empirical distribution on kappa based on the specified sampling procedure. Rho returns the percent of the empirical distribution greater than or equal to an observed kappa. As a result, Rho quantifies the type 1 error in generalizing from an observed test set to a true value of agreement between two raters.
#' 
#' Rho starts with an observed kappa value, calculated on a subset of a \code{\link{codeSet}}, known as an observed \code{\link[=getTestSet]{testSet}}, and a \emph{kappa threshold} which indicates what is considered significant agreement between raters.
#' 
#' It then generates a collection of fully-coded, simulated 
#' \code{\link[=getTestSet]{codeSets}} (ScS), further described in 
#' \code{\link{createSimulatedCodeSet}}, all of which have a kappa value below the kappa 
#' threshold and similar properties as the original \code{\link{codeSet}}.
#' 
#' Then, kappa is calculated on a \code{\link[=getTestSet]{testSet}} sampled from each of the ScSs in the
#' collection to create a null hypothesis distribution. These \code{\link[=getTestSet]{testSets}} mirror the observed \code{\link[=getTestSet]{testSets}} in their size and sampling method.  How these \code{\link[=getTestSet]{testSets}} are sampled is futher described in \code{\link{getTestSet}}.
#' 
#' The null hypothesis is that the observed \code{\link[=getTestSet]{testSet}}, was sampled from a data set, which, if both raters were to code in its entirety, would result in a level of agreement below the kappa threshold.
#' 
#' For example, using an alpha level of 0.05, if the observed kappa is greater than 95 percent of the kappas in the null hypothesis distribution, the null hypothesis is rejected. Then one can conclude that the two raters would have acceptable agreement had they coded the entire data set.
#'
#' @template rho-params
#' @param testSetLength The length of the \code{\link[=getTestSet]{testSet}} (ignored unless \emph{data} is an observed kappa value)
#' 
#' @examples
#' # Given an observed kappa value
#' rho(x = 0.88, OcSBaserate = 0.2, testSetLength = 80)
#' 
#' # Given a test Set
#' rho(x = codeSet)
#' 
#' # Given a contingency Table
#' rho(x = contingencyTable)
#'
#' @export
#' @return rho and kappa for the given data and parameters (unless kappa is given)
###
rho = function(
  x,
  OcSBaserate = NULL,
  testSetLength = NULL,
  testSetBaserateInflation = 0,
  OcSLength = 10000,
  replicates = 800,
  ScSKappaThreshold = 0.9,
  ScSKappaMin = 0.40,
  ScSPrecisionMin = 0.6,
  ScSPrecisionMax = 1
) {

  ### Calculate rho given a kappa value
  if (is(x, "numeric")) {
    if (is.null(OcSBaserate)) { stop("Must give a baserate when a kappa value is given") }
    if (is.null(testSetLength)) { stop("Must give a testSetLength when a kappa value is given") }

    result <- rhoK(x, OcSBaserate,
               testSetLength,
               testSetBaserateInflation,
               OcSLength,
               replicates,
               ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax) 
  }

  ### Calculate rho given a code set
  else if (nrow(x) != 2) {
    result <- rhoSet(x, OcSBaserate,
                  testSetBaserateInflation,
                  OcSLength,
                  replicates,
                  ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax)
  }

  ### Calculate rho given a contingency table
  else {
    result <- rhoCT(x, OcSBaserate,
                 testSetBaserateInflation,
                 OcSLength,
                 replicates,
                 ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax)
  }

  return(result)
}

#include <RcppArmadilloExtensions/sample.h>
// [[Rcpp::depends(RcppArmadillo)]]

#include <RcppArmadillo.h>
#include <Rcpp.h>
using namespace Rcpp;
using namespace arma;

// NumericMatrix toNumericMatrix(arma::imat x) {
//   int nRows=x.n_rows;
//   
//   NumericMatrix y(nRows, x.n_cols);
//   for (int j=0; j<x.n_rows; j++) {
//     for (int i=0; i<x.n_cols; i++) {
//       y(j,i) = x(j, i);
//     }
//   }
//   
//   return y;
// }

// // @name sample_weighted
// // @title sample_weighted
// // @param x vector
// // @param n integer
// // @export
// //' @export
// // [[Rcpp::export]]
// arma::vec sample_weighted(arma::vec xx, arma::vec probs, int n) {
//   // NumericVector ret(n);
//   // NumericVector xx(x);
//   arma::vec ret(n);
//   
//   // IntegerVector rng = seq(0, xx.size()-1);
//   arma::ivec rng = regspace<ivec>(0, xx.size() - 1);
//   for(int w = 0; w < n; w++) {
//     // int tot = sum(xx);
//     // NumericVector probs = xx / tot;
//     // arma::vec probs2 = toArmaVec(probs);
//   //   IntegerVector s = Rcpp::sugar::sample(rng, 1, false,  probs);
//     arma::ivec s = Rcpp::RcppArmadillo::sample(rng, 1, false, probs);
//     xx[s[0]] -= 1;
//     ret[w] = s[0];
//   }
//   // return(xx);
//   return(ret + 1);
// }

// // ' @export
// // [[Rcpp::export]]
// arma::ivec sample_weighted2(arma::imat xx, int n, bool forR = true) {
//   arma::ivec ret = arma::ivec(n);
//   NumericMatrix x = toNumericMatrix(xx);
//   IntegerVector rng = seq(0, x.size() - 1);
//   
//   for(int w = 0; w < n; w++) {
//     double tot = accu(xx);
//     NumericVector probs = x / tot;
//     IntegerVector s = sample(rng, 1, false,  probs);
//     x[s[0]] -= 1;
//     ret[w] = s[0];
//   }
//   
//   if(forR == true) {
//     return(ret + 1);
//   } else {
//     return(ret);
//   }
// }

//' @title sample_contingency_table
//' @name sample_contingency_table
//' 
//' @param xx contingency table matrix
//' @param n int size of the contingency table
//' @param forR bool if true, add 1 to the results accounting for R indices starting at 1
//' 
//' @export
// [[Rcpp::export]]
arma::ivec sample_contingency_table(arma::imat xx, int n, bool forR = true) {
  arma::ivec ret = arma::ivec(n);
  
  int maxInt;
  int s;
  for(int w = 0; w < n; w++) {
    maxInt = accu(xx);
    s = randi<int>(distr_param(0, maxInt));
    ret[w] = (s <= xx(0) ? 0 : (s <= (xx(0)+xx(1))) ? 1 : (s <= (xx(0)+xx(1)+xx(2))) ? 2 : 3);
    xx(ret[w])--;
  }
  
  if(forR == true) return(ret + 1);
  else return(ret);
}

//' @name getBootPvalue_c
//' @title getBootPvalue_c
//' 
//' @param distribution vector of calculated kappas
//' @param result double calculated kappa to compare against
//' 
//' @description 
//' returns the percentage of the time that the distribution was greater or equal to the observed kappa
//' if the result is less than the mean of the distribution, than the p value is 1
//' else return the number of times that the distribution is greater than the result as a percentage of the total number 
//' of items in the distribution
//'
//' @return double calculated p-value
//'
//' @export
// [[Rcpp::export]]
double getBootPvalue_c(arma::vec distribution, double result) {
  if(result < mean(distribution)) {
    return(1.0);
  } 

  else {
    arma::uvec matched = find(distribution >= result);
    
    double rho = (matched.size() * 1.0) / distribution.size();
    return( rho );
  }
}

// Validate a Baserate/Kappa combo against the supplied Precision. 
// Return true if Precision is greater than the calculated right value
// [[Rcpp::export]]
bool check_BRK_combo(double BR, double P, double K) {
  double right = ((2 * BR * K) - (2 * BR) - K) / (K - 2);
  
  return(P > right);
}

//' @name recall
//' @title recall
//' 
//' @param kappa double
//' @param BR double
//' @param P double
//' 
//' @return Recall calculated from provided kappa, BR, and P
//' 
//' @export
// [[Rcpp::export]]
double recall(double kappa, double BR, double P) {
  double top = kappa * P;
  double R = top / (2*P-2*BR-kappa+2*BR*kappa);
  return(R);
}

// Find a valid precision and kappa combo from a distribution of Kappas and Precisions given a baserate
//
// [[Rcpp::export]]
NumericVector find_valid_pk(
  arma::colvec kappaDistribution, arma::vec kappaProbability,
  arma::colvec precisionDistribution, arma::vec precisionProbability,
  double baserate
) {
  arma::ivec kappaRng = regspace<arma::ivec>(0, kappaDistribution.size() - 1);

  arma::ivec whKappa;
  if (kappaProbability.n_elem > 0) {
    whKappa = Rcpp::RcppArmadillo::sample(kappaRng, 1, false, kappaProbability);
  } else {
    whKappa = Rcpp::RcppArmadillo::sample(kappaRng, 1, false);
  }
  double currKappa = kappaDistribution[whKappa.at(0)];
  
  arma::ivec precRng = regspace<arma::ivec>(0, precisionDistribution.size() - 1);
  arma::ivec whPrec = Rcpp::RcppArmadillo::sample(precRng, 1, false);
  double currPrec = precisionDistribution[whPrec.at(0)];
  
  if (!check_BRK_combo(baserate, currPrec, currKappa)) {
    double precisionMin = (2 * baserate * currKappa - 2 * baserate - currKappa) / (currKappa - 2);
    arma::uvec indicies = find(precisionDistribution > precisionMin);

    if (indicies.size() == 0) {
      return(find_valid_pk(kappaDistribution, kappaProbability, precisionDistribution, precisionProbability, baserate));
    }

    precisionDistribution = precisionDistribution.elem(indicies);
    
    if (precisionProbability.size() > 0) {
      precisionProbability = precisionProbability.elem(indicies);
    }
    
    precRng = regspace<ivec>(0, precisionDistribution.size() - 1);
    whPrec = Rcpp::RcppArmadillo::sample(precRng, 1, false);
    currPrec = precisionDistribution[whPrec.at(0)];
  }
  
  return(Rcpp::NumericVector::create(currPrec, currKappa));
}


//' @name generateKPs_c
//' @title generate_kp_list
//' 
//' @param numNeeded int
//' @param baserate double
//' @param kappaMin double
//' @param kappaMax double
//' @param precisionMin double
//' @param precisionMax double
//' @param distributionType int 0 - normal (default), 1 - bell
//' @param distributionLength long
//' 
//' @return matrix of kappa and precision values (column 1 as precision)
//' 
//' @export
// [[Rcpp::export]]
Rcpp::NumericMatrix generate_kp_list(
    int numNeeded, double baserate, 
    double kappaMin, double kappaMax, 
    double precisionMin, double precisionMax, 
    int distributionType = 0, long distributionLength = 10000
) {
  double kappaStep = ((kappaMax - kappaMin) / (distributionLength - 1));

  colvec kappaDistribution = regspace(kappaMin, kappaStep, kappaMax);

  arma::vec kappaProbability;
  if(distributionType == 1) {
    kappaProbability = normpdf(kappaDistribution, 0.9, 0.1);
  }

  double precStep = (precisionMax - precisionMin) / (10000 - 1);
  colvec precisionDistribution = regspace(precisionMin, precStep, precisionMax);

  arma::vec precisionProbability;
  Rcpp::NumericMatrix KPs(numNeeded,2);

  for(int i=0; i < numNeeded; i++) {
    KPs(i, _) = find_valid_pk(
      kappaDistribution,
      kappaProbability,
      precisionDistribution,
      precisionProbability, 
      baserate
    );
  }

  return (KPs);
}

//' @name contingency_table
//' @title contingency_table
//' @description Create a contingency table using the provied precision, recall, baserate, and length.
//' 
//' @param precision double
//' @param rec double
//' @param length int
//' @param baserate double
//' 
//' @export
// [[Rcpp::export]]
arma::imat contingency_table(double precision, double rec, int length, double baserate) {
  int gold1s = max(NumericVector::create(round(baserate * length), 1));
  int gold0s = length - gold1s;
  int TP = max(NumericVector::create(round(gold1s * rec), 1));
  int FP = min(NumericVector::create(gold0s, max(NumericVector::create(round(TP * (1 - precision) / precision),  1))));
  
  arma::imat ct = arma::imat(2,2);
  ct(0,0) = TP;
  ct(0,1) = gold1s - TP;
  ct(1,0) = FP;
  ct(1,1) = gold0s - FP;
  
  return(ct);
}

//' @name random_contingency_table
//' @title random_contingency_table
//' @param setLength [TBD]
//' @param baserate [TBD]
//' @param kappaMin [TBD]
//' @param kappaMax [TBD]
//' @param minPrecision [TBD]
//' @param maxPrecision [TBD]
//' 
//' @export
// [[Rcpp::export]]
arma::imat random_contingency_table(
    int setLength,double baserate, 
    double kappaMin, double kappaMax, 
    double minPrecision = 0, double maxPrecision = 1
) {
  NumericMatrix KP = generate_kp_list(1, baserate, kappaMin, kappaMax, minPrecision, maxPrecision);
  double kappa = KP(0,1);
  double precision = KP(0,0);
  double rec = recall(kappa, baserate, precision);
  
  arma::imat ct = contingency_table(precision, rec, setLength, baserate);
  
  return(ct);
}

//' @title kappa_ct
//' @description Calculate kappa from a contingency table
//' @param ct [TBD]
//' 
//' @export
// [[Rcpp::export]]
double kappa_ct(arma::imat ct) {
  double a = ct(0,0); //gold 1 & silver 1
  double c = ct(0,1); //gold 1 & silver 0
  double b = ct(1,0); //gold 0 & silver 1
  double d = ct(1,1); //gold 0 & silver 0
  double size = accu(ct);
  
  double pZero = (a+d) / size;
  double pYes = ((a + b) / size) * ((a + c) / size);
  double pNo =  ((c + d) / size) * ((b + d) / size);
  double pE = (pYes + pNo);
  double k = (pZero - pE) / (1 - pE);
  
  return(k);
}

// [[Rcpp::export]]
arma::imat getHand_ct(arma::imat ct, int handSetLength, double handSetBaserate) {
  int positives = ceil(handSetLength * handSetBaserate);
  arma::ivec positiveIndices; 
  arma::imat otherCT(ct);
  
  if(positives > 0) {
    arma::irowvec gold1s = ct.row(0);
    positiveIndices = sample_contingency_table(gold1s, positives, false);
  
    double sumOnes = sum(positiveIndices == 0);
    double sumTwos = sum(positiveIndices == 1);
    positiveIndices *= 2;
    otherCT(0,0) = otherCT(0,0) - sumOnes;
    otherCT(0,1) = otherCT(0,1) - sumTwos;
  }
  
  arma::ivec otherAsVector = otherCT.as_col();
  arma::ivec otherIndices = sample_contingency_table(otherCT, handSetLength - positives, false);
  arma::ivec allIndices = join_cols<ivec>(positiveIndices, otherIndices);
  arma::imat newCT = arma::imat(2,2);
  
  newCT(0,0) = sum(allIndices == 0);
  newCT(1,0) = sum(allIndices == 1);
  newCT(0,1) = sum(allIndices == 2);
  newCT(1,1) = sum(allIndices == 3);
  
  return(newCT);
}

//' @name getHand_kappa
//' @title getHand_kappa
//' 
//' @description This function returns kappa calculated from a Handset taken from  a larger Contingency Table
//' 
//' @param ct KPs matrix of kappa (column 1) and precision (column 2) values
//' @param handSetLength The length of the \code{\link[=getTestSet]{testSet}} (ignored unless \emph{data} is an observed kappa value)
//' @param handSetBaserate baserate to inflate the sampled contingency table to
//' 
//' @return Kappa as double
//' 
//' @export
// [[Rcpp::export]]
double getHand_kappa(arma::imat ct, int handSetLength, double handSetBaserate) {
  arma::imat newCT = getHand_ct(ct, handSetLength, handSetBaserate);
  
  double k = kappa_ct(newCT);
  return(k);
}

// This function calculates rho given the parameters of the initial set and 
// the kappa of the observed handset and is called by rhoK()
//
// [[Rcpp::export]]
double calcRho_c(
  double x, 
  double OcSBaserate, 
  int testSetLength, 
  double testSetBaserateInflation = 0,
  int OcSLength = 10000, 
  int replicates = 800, 
  double ScSKappaThreshold = 0.9,
  double ScSKappaMin = 0.40,
  double ScSPrecisionMin = 0.6, 
  double ScSPrecisionMax = 1.0,
  NumericMatrix KPs = NumericMatrix(0)
) {
  if (KPs.size() == 1 && KPs[0] == 0) {
    KPs = generate_kp_list(replicates, OcSBaserate, ScSKappaMin, ScSKappaThreshold, ScSPrecisionMin, ScSPrecisionMax);
  }

  if(KPs.nrow() < replicates) {
    replicates = KPs.nrow();
  }

  arma::vec savedKappas = arma::vec(replicates);
  for (int KProw = 0; KProw < replicates; KProw++) {
    NumericVector KP = KPs(KProw, _);
    double precision = KP[0];
    double kappa = KP[1];
    double rec = recall(kappa, OcSBaserate, precision);

    arma::imat fullCT = contingency_table(precision, rec, OcSLength, OcSBaserate);
    double kk = getHand_kappa(fullCT, testSetLength, testSetBaserateInflation);
    savedKappas[KProw] = kk;
  };

  return(getBootPvalue_c(savedKappas, x));
}

// // @name rhoCT_c
// // @title rhoCT_ct
// // @param contingencyTable [TBD]
// // @param testSetBaserateInflation [TBD]
// // @param replicates [TBD]
// // @param OcSLength [TBD]
// // @param OcSBaserate [TBD]
// // @param ScSKappaThreshold [TBD]
// // @param ScSKappaMin [TBD]
// // @param ScSPrecisionMin [TBD]
// // @param ScSPrecisionMax [TBD]
// //' @export
// // [[Rcpp::export]]
// arma::vec rhoCT_c(
//   arma::imat contingencyTable,
//   double testSetBaserateInflation = 0.2,
//   int replicates = 800,
//   int OcSLength = 10000,
//   double OcSBaserate = -1.0,
//   double ScSKappaThreshold = 0.65,
//   double ScSKappaMin = 0.40,
//   double ScSPrecisionMin = 0.6,
//   double ScSPrecisionMax = 1
// ){
//   // Observed kappa
//   double kappa = kappa_ct(contingencyTable);
//   double size = accu(contingencyTable);
//   double gold1;
//   
//   if(OcSBaserate == -1.0) {
//     gold1 = accu(contingencyTable.row(0));
//     OcSBaserate = gold1 / size;
//   }
//   
//   double rho = calcRho_c(
//     kappa, OcSBaserate,
//     size, testSetBaserateInflation,
//     OcSLength, replicates,
//     ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax
//   );
//   
//   return(NumericVector::create(kappa, rho));
// }


// //' @export
// // [[Rcpp::export]]
// bool run_rho_in_c(
//     double threshold,  double infl,
//     int handsetLength = 20,
//     double handsetBaserate = 0.2,
//     int replicates = 10000
// ) {
//   arma::imat ct = random_contingency_table(10000, infl, 0.4, threshold, 0.2, 0.8);
//   // handset --> testset
//   arma::imat handset = getHand_ct(ct, handsetLength, handsetBaserate);
//   arma::vec res = rhoCT_c(handset, handsetBaserate, replicates, 10000, -1, 0.65, 0.4, 0.6, 1);
//   
//   return(res[0]>threshold);
// }


1		contingencyToSet = function(TP, FP, FN, TN){
2	31x	setLength = TP + FP + FN + TN;
3
4	31x	gold1s = TP + FN;
5	31x	gold0s = setLength - gold1s;
6
7	31x	gold = c(rep(1, gold1s), rep(0, gold0s));
8	31x	silver = c(rep(1, TP),rep(0, gold1s - TP), rep(1, FP), rep(0, gold0s - FP));
9
10	31x	return(cbind(gold, silver));
11		};

1		###
2		#' @title Calculate kappa (Set)
3		#'
4		#' @description
5		#' This function calculates Cohen's kappa for a given \code{\link{codeSet}}. Called by \code{\link{kappa}}.
6		#'
7		#' @export
8		#' @param set A \code{\link{codeSet}}
9		#'
10		#' @seealso \code{\link{kappa}} and \code{\link{kappaCT}}
11		#'
12		#' @return
13		#' The kappa of the \code{\link{codeSet}}
14		#'
15		###
16		kappaSet = function(set){
17	70x	if(any(set != 1 & set != 0)) {
18	1x	stop("Each set must only consist of zeros and ones.")
19		}
20
21	69x	return(calcKappa(set,isSet = TRUE,kappaThreshold = NULL));
22		}
23

1		###
2		# Create A Random Set
3		#
4		# This creates a set with given parameters
5		# @param setLength The length of the set to be created
6		# @param baserate The baserate of the set to be created
7		# @param kappa The kappa of the set to be created
8		# @param minPrecision The minimum precision accepted in this set. Defaults to 0
9		# @param maxPrecision The maximum precision accepted in this set. Defaults to 1
10		# @return This returns a set with the given parameters
11		###
12		createRandomSet = function(setLength, baserate, kappaMin, kappaMax, minPrecision = 0, maxPrecision = 1, type = "set"){
13		#generates a valid precision with the given kappa and baserate
14	54x	KP = generateKPs(1, baserate, kappaMin, kappaMax, minPrecision, maxPrecision)[[1]]
15	54x	kappa = KP$kappa
16	54x	precision = KP$precision
17	54x	recall = getR(kappa, baserate, precision)
18
19	54x	if(type == "ct") {
20	1x	fullSet = pr.ct(precision = precision, recall = recall, length = setLength, baserate = baserate);
21		}
22		else {
23	53x	fullSet = prset(precision = precision, recall = recall, length = setLength, baserate = baserate);
24		}
25
26	54x	return(fullSet);
27		}

1		###
2		# Precision Recall Set
3		#
4		# This creates a set with a given recall and precision
5		# @param precision This is the precision of the given set
6		# @param recall This is the recall of the given set
7		# @param length This is the length of the set
8		# @param baserate This is the baserate of the set
9		# @return returns a set with the given parameters
10		###
11		prset = function(precision, recall, length, baserate) {
12		#generates a set given the different 4 quadrants
13	36354x	gold1s = max(round(baserate * length , 0), 1); # TP + FN
14	36354x	gold0s = length - gold1s; # FP + FN
15	36354x	TP = max(round(gold1s * recall , 0) , 1); # TP
16	36354x	FP = min(gold0s, max(round(TP * (1 - precision) / precision , 0), 1)); # FP
17	36354x	gold = c(rep(1, gold1s), rep(0, gold0s));
18	36354x	silver = c(rep(1, TP),rep(0, gold1s - TP), rep(1, FP), rep(0, gold0s - FP));
19
20	36354x	return(cbind(gold, silver));
21		}
22
23		pr.ct = function(precision, recall, length, baserate) {
24		#generates a set given the different 4 quadrants
25	1x	gold1s = max(round(baserate * length , 0), 1); # TP + FN
26	1x	gold0s = length - gold1s; # FP + FN
27	1x	TP = max(round(gold1s * recall , 0) , 1); # TP
28	1x	FP = min(gold0s, max(round(TP * (1 - precision) / precision , 0), 1)); # FP
29
30	1x	ct = matrix(0, ncol = 2, nrow = 2)
31	1x	ct[1,1] = TP
32	1x	ct[1,2] = gold1s - TP
33	1x	ct[2,1] = FP
34	1x	ct[2,2] = gold0s - FP
35	1x	return(ct);
36		}

1		any_equal <- function(target, current, tolerance = sqrt(.Machine$double.eps)) {
2	54x	any(sapply(target, function(targ) abs(current - targ) < tolerance ))
3		}
4		all_equal <- function(target, current, tolerance = sqrt(.Machine$double.eps)) {
5	5x	all(sapply(target, function(targ) abs(current - targ) < tolerance ))
6		}
7
8
9		#' Convert a codeset to a contingency table
10		#'
11		#' @param x codeset
12		#'
13		#' @return contingency table as a 2x2 matrix
14		#'
15		#' @export
16		as.contingency.table <- function(x) {
17	19x	y <- NULL
18
19	19x	if ( nrow(x) > 2 && ncol(x) == 2 ) {
20	12x	tp = length(which(x[,1] == 1 & x[,2] == 1))
21	12x	tn = length(which(x[,1] == 0 & x[,2] == 0))
22	12x	fp = length(which(x[,1] == 0 & x[,2] == 1))
23	12x	fn = length(which(x[,1] == 1 & x[,2] == 0))
24
25	12x	y <- matrix(
26	12x	c(tp, fp, fn, tn),
27	12x	ncol = 2, nrow = 2
28		)
29		}
30		else {
31	7x	y <- x
32		}
33
34	19x	dimnames(y) = list(c("first_positive", "first_negative"), c("second_positive", "second_negative"))
35	19x	class(y) <- c("contingency.table", "rating.set", class(y))
36	19x	attr(y, "agreement") <- list(
37	19x	"true_positive" = y[1, 1],
38	19x	"false_negative" = y[1, 2],
39	19x	"false_positive" = y[2, 1],
40	19x	"true_negative" = y[2, 2]
41		)
42	19x	attr(y, "baserate") <- baserateCT(y)
43	19x	attr(y, "kappa") <- kappaCT(y)
44
45	19x	return(y)
46		}
47
48		#' Convert codeset to contingency table
49		#'
50		#' @param x matrix contingency table (2x2)
51		#'
52		#' @return 2-column matrix representation of the contingency table
53		#'
54		#' @export
55		as.code.set <- function(x) {
56	7x	y <- NULL
57	7x	if (all(dim(x) == c(2,2))) {
58	3x	y <- matrix(
59	3x	c(rep(1, x[1,1]2), rep(1:0, x[1,2]), rep(0:1, x[2,1]), rep(0, x[2,2]2)),
60	3x	byrow = TRUE,
61	3x	ncol = 2
62		)
63		}
64		else {
65	4x	y <- x
66		}
67
68	7x	class(y) <- c("code.set", "rating.set", class(y))
69	7x	attr(y, "baserate") <- baserateSet(y)
70	7x	attr(y, "kappa") <- kappaSet(y)
71	7x	colnames(y) <- c("first_rater", "second_rater")
72
73	7x	return(y)
74		}
75
76		#' Helper function to return special values on a rating set
77		#'
78		#' @param x Set or Contingency.Table
79		#'
80		#' @param i Value to search for
81		#'
82		#' @export
83		"$.rating.set" <- function (x, i) {
84	55x	if (i %in% names(attributes(x))) {
85	50x	return(attr(x, i))
86		}
87		else {
88	5x	if (is(x, "code.set")) {
89	2x	if (i %in% colnames(x)) {
90	1x	return(x[, which(colnames(x) == i)])
91		}
92		}
93	3x	else if (is(x, "contingency.table")) {
94	3x	if (i %in% colnames(x)) {
95	1x	return(sum(x[, which(colnames(x) == i)]))
96		}
97	2x	else if (i %in% rownames(x)) {
98	1x	return(sum(x[which(rownames(x) == i), ]))
99		}
100		}
101		}
102
103	2x	return(NULL)
104		}
105
106		#' @export
107		.DollarNames.rating.set <- function(x, pattern) {
108	2x	vals <- .codegen_dollar(x, pattern)
109
110	2x	cols <- colnames(x)
111	2x	if (!is.null(cols)) {
112	2x	vals <- c(cols, vals)
113		}
114
115	2x	rows <- rownames(x)
116	2x	if (!is.null(rows)) {
117	1x	vals <- c(rows, vals)
118		}
119
120	2x	vals
121		}
122
123		.codegen_dollar <- function(x, pattern = "") {
124	3x	attrs <- names(attributes(x))
125	3x	vals <- attrs[attrs %in% c("baserate", "kappa", "agreement")]
126	3x	return(vals)
127		}
128
129		#' @export
130		print.rating.set <- function(x, ...) {
131	1x	y <- unclass(x)
132
133	1x	for(a in additional_attributes) {
134	3x	attr(y, a) <- NULL
135		}
136
137	1x	print(y, ...)
138		}

1		###
2		#' @title Calculate Baserate (Set)
3		#'
4		#' @description
5		#' This function will calculate the baserate of the first rater, second rater, and the average of both the raters. Called by \code{\link{baserate}}.
6		#'
7		#' @param set The \code{\link{codeSet}} for which the baserate is calculated
8		#'
9		#' @seealso \code{\link{baserate}} and \code{\link{baserateCT}}
10		#'
11		#' @export
12		#' @return A list of the format:\describe{
13		#' \item{firstBaserate}{The percentage that a positive code, or a 1, appears in the first rater}
14		#' \item{secondBaserate}{The percentage that a positive code, or a 1, appears in the second rater}
15		#' \item{averageBaserate}{The average percentage that a positive code, or a 1, appears in either of the two raters}
16		#' }
17		#'
18		###
19		baserateSet = function(set){
20	11x	firstBaserate = length(which(set[,1] == 1))/nrow(set)
21	11x	secondBaserate = length(which(set[,2] == 1))/nrow(set)
22	11x	averageBaserate = mean(c(firstBaserate,secondBaserate))
23	11x	return(list(firstBaserate = firstBaserate, secondBaserate = secondBaserate, averageBaserate = averageBaserate))
24		}

1		rating_set_recall <- function (x) {
2	8x	if (!is(x, "contingency.table")) {
3	8x	x <- as.contingency.table(x)
4		}
5
6	8x	x$agreement$true_positive / (x$agreement$true_positive + x$agreement$false_negative)
7		}
8
9		rating_set_precision <- function (x) {
10	8x	if (!is(x, "contingency.table")) {
11	8x	x <- as.contingency.table(x)
12		}
13
14	8x	x$agreement$true_positive / (x$agreement$true_positive + x$agreement$false_positive)
15		}

1		genPKcombo = function(kappaDistribution, kappaProbability, precisionDistribution, precisionProbability, baserate){
2		# currKappa = sample(kappaDistribution, 1, replace = FALSE, prob = kappaProbability)
3		# currPrecision = sample(precisionDistribution, 1, replace = FALSE, prob = precisionProbability)
4	40473x	currKappa = kappaDistribution[sample.int(length(kappaDistribution), 1, replace = FALSE, prob = kappaProbability)]
5	40473x	currPrecision = precisionDistribution[sample.int(length(precisionDistribution), 1, replace = FALSE, prob = precisionProbability)]
6
7	40473x	if(!checkBRPKcombo(baserate, currPrecision, currKappa)){
8	6044x	precisionMin = (2baseratecurrKappa - 2*baserate - currKappa)/(currKappa - 2)
9	6044x	indicies = which(precisionDistribution > precisionMin)
10
11	6044x	if (length(indicies) == 0) {
12	1607x	return(genPKcombo(kappaDistribution, kappaProbability, precisionDistribution, precisionProbability, baserate))
13		}
14
15	4437x	precisionDistribution = precisionDistribution[indicies]
16	4437x	precisionProbability = precisionProbability[indicies]
17		# currPrecision = sample(precisionDistribution, 1, replace = FALSE, prob = precisionProbability)
18	4437x	currPrecision = precisionDistribution[sample.int(length(precisionDistribution), 1, replace = FALSE, prob = precisionProbability)]
19	4437x	return(list(precision = currPrecision, kappa = currKappa))
20		} else {
21	34428x	return(list(precision = currPrecision, kappa = currKappa))
22		}
23		}

1		###
2		#' @title Calculate kappa (contingency Table)
3		#'
4		#' @description
5		#' This function calculates Cohen's kappa on a \code{\link{contingencyTable}}. Called by \code{\link{kappa}}.
6		#'
7		#' @param ct A \code{\link{contingencyTable}}
8		#'
9		#' @seealso \code{\link{kappa}} and \code{\link{kappaSet}}
10		#'
11		#' @export
12		#' @return The kappa of the \code{\link{contingencyTable}}
13		###
14		kappaCT = function(ct){
15	1x	if(any(ct < 0)){stop("Values in Contingency Table must be positive")}
16	1x	if(ncol(ct) != 2 \|\| nrow(ct) != 2) stop("Incorrect number of dimensions: Contingency table must be 2x2.")
17	26x	for(i in 1:nrow(ct)) {
18	51x	for(j in 1:ncol(ct)) {
19	1x	if (ct[i,j]%%1 != 0) stop("Contingency table values must be positive integers.")
20		}
21		}
22
23	25x	return(calcKappa(ct,isSet = FALSE,kappaThreshold = NULL));
24		}

1		###
2		#' @title Get Handset
3		#' @description This function is to get a handset of a set and calculate the kappa
4		#' @param set This is the set to take a handset of
5		#' @param handSetLength This is the length of the handset to take
6		#' @param handSetBaserate This is the minimum baserate to inflate the handset to
7		#' @param returnSet If TRUE, then return the handSet if FALSE, return the kappa of the handSet
8		#'
9		#' @return The function returns the handSet if returnSet is TRUE or the kappa of the handSet if not
10		#' @export
11		###
12		getHandSet = function(set, handSetLength, handSetBaserate, returnSet=FALSE) {
13		#positives is the minimum number of positive pairs in the first rater
14	36306x	positives = ceiling(handSetLength * handSetBaserate);
15	36306x	posInd = which(set[,1] == 1);
16
17	1x	if(positives > length(posInd)){stop("Not enough positives in first rater to inflate to this level")}
18		#if there is an inflated baserate positives will be > 0, so if handSetBaserate > 0, positives > 0
19	36305x	if (positives > 0) {
20		#goes through and picks a set that fits the number of positives first then randomly samples from the rest
21		#if the set created is not a valid set, then it reruns the process until a valid set is found
22	15503x	positiveIndices = posInd[sample.int(length(posInd),size=positives,replace=FALSE)];
23	15503x	others = set[!(1:nrow(set) %in% positiveIndices),];
24	15503x	otherIndices = sample.int(nrow(others),size=(handSetLength - positives),replace=FALSE);
25	15503x	this.set = rbind(set[positiveIndices,], others[otherIndices,]);
26	20802x	} else if (positives == 0){
27		#if there is no restriction on positives, then the set is generated by randomly sampling from the entire set
28		#this.set=matrix(0,2,2);
29		#while ((sum(this.set[,2]) == 0 \| sum(this.set[,2]) == nrow(this.set))) {
30	20802x	theseIndices = sample.int(nrow(set),size=handSetLength,replace=FALSE);
31	20802x	this.set = set[theseIndices,];
32		#}
33		}
34
35		#if return set is true return set rather than kappa
36	36305x	if (returnSet) {
37	4x	this.set <- as.code.set(this.set);
38	4x	return(this.set);
39		}
40
41		#otherwise return the kappa of the set
42	36301x	handSetKappa = calcKappa(this.set);
43
44	36301x	return(handSetKappa);
45		}
46
47		###
48		#' @title Get Handset
49		#' @description This function is to get a handset of a set and calculate the kappa
50		#'
51		#' @param full.ct This is the set to take a handset of
52		#' @param handSetLength This is the length of the handset to take
53		#' @param handSetBaserate This is the minimum baserate to inflate the handset to
54		#' @param as_kappa If FALSE then return the handSet, if TRUE (default) return the kappa of the handSet
55		#'
56		#' @return The function returns the handSet if returnSet is TRUE or the kappa of the handSet if not
57		#' @export
58		###
59		getHandCT <- function(full.ct, handSetLength, handSetBaserate, as_kappa = TRUE) {
60	3x	positives <- ceiling(handSetLength * handSetBaserate)
61	3x	if (positives > sum(full.ct[1,])) {
62	1x	stop("Not enough positives in first rater to inflate to this level")
63		}
64
65		# positiveIndices <- sample_ct(full.ct[1,], positives)
66		#
67		# other_ct <- full.ct
68		# other_ct[1, ] <- other_ct[1, ] - positiveIndices
69		#
70		# other_ct_vec <- as.vector(other_ct)
71		# this_ct <- sample_ct(other_ct_vec, handSetLength - positives)
72		# this_ct[1, ] <- this_ct[1, ] + positiveIndices
73
74	2x	this_ct <- getHand_ct(ct = full.ct, handSetLength = handSetLength, handSetBaserate = handSetBaserate)
75
76	2x	if (!as_kappa) {
77	1x	this_ct <- as.contingency.table(this_ct);
78	1x	return(this_ct)
79		}
80
81	1x	return(kappa_ct(this_ct))
82		}
83
84		# sample_ct <- function(x, size) {
85		# x_vec <- as.vector(x)
86		# x_sampled <- matrix(rep(0, length(x)), ncol = length(x_vec) / 2)
87		# for(i in seq(size)) {
88		# s <- sample(seq(x_vec), 1, replace = F, prob = x_vec / sum(x_vec))
89		# x_sampled[[s]] = x_sampled[[s]] + 1
90		# x_vec[s] = x_vec[s] - 1
91		# }
92		#
93		# return(x_sampled)
94		# }

1		###
2		#' @title Create Simulated codeSet
3		#'
4		#' @description
5		#' Creates a simulated \code{\link{codeSet}} with the given parameters
6		#'
7		#' @param length the length of the simulated \code{\link{codeSet}} to be created
8		#' @param baserate the \code{\link{baserate}} of the simulated \code{\link{codeSet}}
9		#' @param kappaMin the minimum kappa of the simulated \code{\link{codeSet}}
10		#' @param kappaMax the maximum kappa of the simulated \code{\link{codeSet}}
11		#' @param precisionMin the minimum precision of the simulated \code{\link{codeSet}}
12		#' @param precisionMax the maximum precision of the simulated \code{\link{codeSet}}
13		#' @param ... Parameters passed to createRandomSet (e.g. type = "set" or type = "ct")
14		#' @param tries the maximum number of tries to generate a valid set, smaller set lengths may require an increased number of tries
15		#'
16		#' @details
17		#' \code{\link{codeSet}}s are generated by first picking a random kappa within its range and a random precision within its range. If the random kappa, random precision, and baserate are not mathematically possible, then the precision is resampled from a range of mathematically possible values within its range. A unique simulated \code{\link{codeSet}} is then constructed given these parameters.
18		#'
19		#' @export
20		#' @return A \code{\link{codeSet}} that fulfills the given parameters
21		###
22		createSimulatedCodeSet = function(length, baserate, kappaMin, kappaMax, precisionMin, precisionMax, ..., tries = 50){
23	1x	if(length <= 0){stop("length must be positive")}
24	1x	if(baserate < 0){stop("baserate must be positive")}
25	2x	if(kappaMin < 0 \| kappaMin > 1) stop("kappaMin must be between 0 and 1.")
26	2x	if(kappaMax < 0 \| kappaMax > 1) stop("kappaMax must be between 0 and 1.")
27	1x	if(kappaMin > kappaMax){stop("kappaMin must be less than kappaMax")}
28	2x	if(precisionMin < 0 \| precisionMin > 1) stop("precisionMin must be between 0 and 1.")
29	2x	if(precisionMax < 0 \| precisionMax > 1) stop("precisionMax must be between 0 and 1.")
30	1x	if(precisionMin > precisionMax){stop("precisionMin must be less than precisionMax")}
31	1x	if(tries < 1){stop("tries must be greater than 1")}
32
33	2x	setFound = NULL
34	2x	validSet = F
35	2x	triesLeft = tries
36	2x	Ks = c()
37	2x	while(!validSet && triesLeft > 0) {
38	51x	set = createRandomSet(setLength = length, baserate = baserate, kappaMin = kappaMin, kappaMax = kappaMax, minPrecision = precisionMin, maxPrecision = precisionMax)
39	51x	k = kappa(set)
40
41	51x	if ((k > kappaMin && k < kappaMax) \|\| any_equal(k, c(kappaMin, kappaMax))) {
42	1x	setFound = set
43	1x	validSet = T
44		}
45
46	51x	Ks = c(Ks, k)
47	51x	triesLeft = triesLeft - 1
48		}
49
50	2x	if (is.null(setFound)) {
51	1x	Ks_under_min <- Ks[which(Ks < kappaMin)]
52	1x	Kdist.to.min <- kappaMin - Ks_under_min
53	1x	Kdist.to.min.low <- ifelse(length(Ks_under_min) > 0, min(Kdist.to.min), NA)
54
55	1x	Ks_over_min <- Ks[which(Ks > kappaMax)]
56	1x	Kdist.to.max <- Ks_over_min - kappaMax
57	1x	Kdist.to.max.low <- ifelse(length(Ks_over_min) > 0, min(Kdist.to.max), NA)
58
59	1x	Kdist.all_mins <- c(Kdist.to.min, Kdist.to.max)
60
61	1x	both_mins <- c(Kdist.to.min.low, Kdist.to.max.low)
62	1x	both_mins <- both_mins[!is.na(both_mins)]
63	1x	closest <- c(Ks_under_min, Ks_over_min) [which(Kdist.all_mins == min(both_mins))][1]
64
65	1x	stop(paste0(
66	1x	"Unable to create a set of the provided length (", length, ") ",
67	1x	"in the provided kappa range (", kappaMin, "\U2014", kappaMax, "). ",
68	1x	"The closest kappa obtained after ", tries, " tries was: ",
69	1x	round(x = closest, digits = 5), "\n",
70	1x	"\tTry increasing the set length or the range of valid kappas"
71		))
72		}
73
74	1x	return(setFound)
75
76		# return(createRandomSet(setLength = length, baserate = baserate, kappaMin = kappaMin, kappaMax = kappaMax, minPrecision = precisionMin, maxPrecision = precisionMax, ...))
77		}

1		###
2		#' @title Rho using a file
3		#'
4		#' @description
5		#' This function calculates rho and kappa for a given \code{\link[=getTestSet]{testSet}} as defined by the file
6		#' and columns (col1, col2), and returns a list containing both values. Called by \code{\link{rho}}.
7		#'
8		#' @export
9		#'
10		#' @param col1 The first column from file
11		#' @param col2 The second column from file
12		#' @template rho-params
13		#'
14		#' @return A list of the format:\describe{
15		#' \item{rho}{The rho of the \code{\link{codeSet}}}
16		#' \item{kappa}{The Cohen's Kappa of the \code{\link{codeSet}}}
17		#' }
18		###
19		rho.file <- function(
20		x, col1, col2,
21		OcSBaserate = NULL,
22		testSetBaserateInflation = 0,
23		OcSLength = 10000,
24		replicates = 800,
25		ScSKappaThreshold = 0.9,
26		ScSKappaMin = .40,
27		ScSPrecisionMin = 0.6,
28		ScSPrecisionMax = 1
29		) {
30	3x	setFile = utils::read.csv(x)
31	3x	set = as.matrix(setFile[,c(col1, col2)])
32
33	3x	if (!any(is.na(set))) {
34	2x	rhoSet(
35	2x	set,
36	2x	OcSBaserate,
37	2x	testSetBaserateInflation,
38	2x	OcSLength, replicates,
39	2x	ScSKappaThreshold, ScSKappaMin,
40	2x	ScSPrecisionMin, ScSPrecisionMax
41		)
42		}
43		else {
44	1x	stop("The columns provided for col1 and col2 must not contain NA")
45		}
46		}

1		generateKPs = function(
2		numNeeded, baserate,
3		kappaMin, kappaMax,
4		precisionMin, precisionMax,
5		distributionType = 'FLAT',
6		distributionLength = 100000
7		) {
8	93x	kappaDistribution = seq(kappaMin, kappaMax, length = distributionLength)
9
10	93x	if (distributionType == 'FLAT') {
11		#probability for a flat distribution
12	92x	kappaProbability = NULL
13		}
14	1x	else if (distributionType == 'BELL') {
15		#probability for a bell curve
16	1x	kappaProbability = stats::dnorm(kappaDistribution, mean = 0.9, sd = 0.1)
17		}
18
19	93x	precisionDistribution = seq(precisionMin, precisionMax, length = 10000)
20	93x	precisionProbability = NULL
21
22	93x	KPs = tryCatch({
23	93x	KPs = list()
24	93x	for (i in 1:numNeeded) {
25	38865x	KPs[[length(KPs) + 1]] = genPKcombo(kappaDistribution, kappaProbability, precisionDistribution, precisionProbability, baserate)
26		}
27	92x	KPs
28	93x	},error=function(e){
29	1x	NULL
30		})
31	1x	if(is.null(KPs)){stop("Could not generate a valid set given ranges of kappa and precision")}
32
33	92x	return (KPs);
34		}

1		###
2		#' @title Rho (contingency Table)
3		#'
4		#' @description
5		#' This function calculates rho and kappa for a given \code{\link{contingencyTable}}, and returns a list containing both values. Called by \code{\link{rho}}.
6		#'
7		#'
8		#' @template rho-params
9		#'
10		#' @export
11		#' @return A list of the format:\describe{
12		#' \item{rho}{The rho of the \code{\link{contingencyTable}}}
13		#' \item{kappa}{The Cohen's Kappa of the \code{\link{contingencyTable}}}
14		#' }
15		###
16		rhoCT = function(
17		x,
18		OcSBaserate = NULL,
19		testSetBaserateInflation = 0,
20		OcSLength = 10000,
21		replicates = 800,
22		ScSKappaThreshold = 0.9,
23		ScSKappaMin = .40,
24		ScSPrecisionMin = 0.6,
25		ScSPrecisionMax = 1
26		){
27	4x	if (any(x < 0)) {
28	1x	stop("Values in Contingency Table must be positive")
29		}
30	3x	kappa = kappa_ct(x);
31
32	3x	if (is.null(OcSBaserate)) {
33	3x	OcSBaserate = (sum(x[1,])) / (sum(x))
34		}
35
36	3x	rho_result <- rhoK(
37	3x	kappa,
38	3x	OcSBaserate,
39	3x	sum(x),
40	3x	testSetBaserateInflation,
41	3x	OcSLength,
42	3x	replicates,
43	3x	ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax
44		)
45
46	3x	return(list(
47	3x	rho = rho_result,
48	3x	kappa = kappa,
49	3x	recall = rating_set_recall(x),
50	3x	precision = rating_set_precision(x)
51		))
52		}

1		###
2		#' @title Rho (kappa)
3		#'
4		#' @description
5		#' This function calculates rho for an observed kappa value with associated set parameters
6		#' (testSetLength and OcSBaserate). Called by \code{\link{rho}}. A p-value is returned and if
7		#' this value is less than 0.05, it is said that the handset does generalize to the entire set
8		#'
9		#' @template rho-params
10		#' @param testSetLength The length of the \code{\link[=getTestSet]{testSet}} (ignored unless \emph{data} is an observed kappa value)
11		#' @param method set to "c" to calculate using the C++ implmentation. Defaults to "standard"
12		#'
13		#' @export
14		#' @return rho for the given parameters
15		###
16		rhoK = function(
17		x,
18		OcSBaserate,
19		testSetLength,
20		testSetBaserateInflation = 0,
21		OcSLength = 10000,
22		replicates = 800,
23		ScSKappaThreshold = 0.9,
24		ScSKappaMin = 0.40,
25		ScSPrecisionMin = 0.6,
26		ScSPrecisionMax = 1,
27		method = "standard"
28		) {
29		if (
30	50x	(x > 1 \| x < 0) \|\|
31	50x	(OcSBaserate > 1 \| OcSBaserate < 0)
32	3x	) stop("x and OcSBaserate must be between 0 and 1.")
33
34	47x	if (OcSBaserate < 0 \|\| ScSKappaThreshold < 0 \|\| ScSKappaMin < 0)
35	2x	stop("OcSBaserate, ScSKappaThreshold, and ScSKappaMin must be positive.")
36
37		if (
38	45x	testSetLength %% 1 != 0 \|\|
39	45x	OcSLength %% 1 != 0 \|\|
40	45x	replicates %% 1 != 0
41	3x	) stop("testSetLength, OcSLength, and replicates values must be a positive integer.")
42
43	42x	if (ScSKappaThreshold < ScSKappaMin)
44	1x	stop("ScSKappaThreshold must be greater than ScSKappaMin.")
45
46		if (
47	41x	(ScSPrecisionMin < 0 \| ScSPrecisionMin > 1) \|\|
48	41x	(ScSPrecisionMax < 0 \| ScSPrecisionMax > 1)
49	4x	) stop("ScSPrecisionMin and ScSPrecisionMax must be between 0 and 1.")
50
51	37x	if (ScSPrecisionMax < ScSPrecisionMin)
52	1x	stop("ScSPrecisionMax must be greater than ScSPrecisionMin.")
53
54	36x	result <- NULL
55	36x	if (method == "standard") {
56	26x	result <- calcRho( x, OcSBaserate, testSetLength, testSetBaserateInflation, OcSLength, replicates, ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax)
57		}
58		else {
59	10x	result <- calcRho_c(x, OcSBaserate, testSetLength, testSetBaserateInflation, OcSLength, replicates, ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax)
60		}
61	36x	return(result)
62		}

1		#' Generate a Handset
2		#'
3		#' @description Generate a vector representing indices of set, using the handSetBaserate
4		#' to determine the minimum number of indices that are positive
5		#'
6		#' @param set matrix of two columns
7		#' @param handSetLength number of indices to find
8		#' @param handSetBaserate number between 0 and 1 to use as a minimum number of positive indices
9		#'
10		#' @return vector of indices from set
11		#' @export
12		getHandSetIndices = function(set, handSetLength = 20, handSetBaserate = 0.2) {
13	4x	positives = ceiling(handSetLength * handSetBaserate);
14	4x	posInd = which(set[, 1] == 1);
15
16	4x	if (positives > length(posInd)) {
17	1x	stop("Not enough positives in first rater to inflate to this level")
18		}
19
20	3x	this.set = NULL
21	3x	if (positives > 0) {
22	2x	positiveIndices = posInd[sample.int(length(posInd), size=positives, replace=FALSE)]
23	2x	others = which(!(1:nrow(set) %in% positiveIndices))
24	2x	otherIndices = sample.int(length(others), size = (handSetLength - positives), replace = FALSE)
25	2x	this.set = c(positiveIndices, others[otherIndices])
26		}
27	1x	else if (positives == 0) {
28	1x	theseIndices = sample.int(length(set),size=handSetLength,replace=FALSE)
29	1x	this.set = theseIndices
30		}
31
32	3x	return(sample(this.set))
33		}

1		###
2		# This function calculates rho given the parameters of the initial set and the kappa of the observed handset and is
3		# called by rhoK()
4		###
5		calcRho = function(
6		x,
7		OcSBaserate,
8		testSetLength,
9		testSetBaserateInflation = 0,
10		OcSLength = 10000,
11		replicates = 800,
12		ScSKappaThreshold = 0.9,
13		ScSKappaMin = 0.40,
14		ScSPrecisionMin = 0.6,
15		ScSPrecisionMax = 1,
16		KPs = NULL
17		) {
18
19	136x	if(is.null(KPs)) {
20	36x	KPs = generateKPs(replicates, OcSBaserate, ScSKappaMin, ScSKappaThreshold, ScSPrecisionMin, ScSPrecisionMax)
21		}
22	136x	if(length(KPs) < replicates) {
23	50x	replicates = length(KPs)
24		}
25
26		#creates distribution from random kappa sets
27	136x	savedKappas = rep(0,replicates);
28
29		# cat("Runs: ", runs, "\n")
30		# cat("KPs length:, ", length(KPs), "\n")
31	136x	for (i in 1:replicates) {
32	36300x	KP = KPs[i]
33	36300x	kappa = KP[[1]]$kappa
34	36300x	precision = KP[[1]]$precision
35	36300x	recall = getR(kappa, OcSBaserate, precision)
36	36300x	fullSet = prset(precision = precision, recall = recall, length = OcSLength, baserate = OcSBaserate);
37
38		#computes kappa of handset to add to distribution
39	36300x	savedKappas[i] = getHandSet(set = fullSet, handSetLength = testSetLength, handSetBaserate = testSetBaserateInflation);
40		}
41
42		# compare observedKappa to distribution
43	136x	return(getBootPvalue(savedKappas, x));
44		}

1		###
2		#' @title Calculate kappa
3		#'
4		#' @description
5		#' This function calculates Cohen's kappa on a \code{\link{contingencyTable}} or a \code{\link{codeSet}}
6		#'
7		#' @param data A \code{\link{contingencyTable}} or a \code{\link{codeSet}}
8		#'
9		#'
10		#' @seealso \code{\link{kappaSet}} and \code{\link{kappaCT}}
11		#'
12		#' @examples
13		#' #Given a code set
14		#' kappa(data = codeSet)
15		#'
16		#' #Given a contingency Table
17		#' kappa(data = contingencyTable)
18		#'
19		#' @export
20		#' @return The kappa of the \code{\link{contingencyTable}} or \code{\link{codeSet}}
21		###
22		kappa <- function(data) {
23	61x	if (all(dim(data) == 2)) {
24	5x	return(kappaCT(data))
25		} else {
26	56x	return(kappaSet(data))
27		}
28		}

1		###
2		# Get Recall
3		#
4		# This gets the recall from a pair
5		# @param kappa This is the kappa of the pair
6		# @param BR This is the baserate of the pair
7		# @param P this is the precision of the pair
8		# @return returns the recall given the parameters
9		###
10		getR = function(kappa, BR, P){
11		#gets recall that corresponds with the kappa, baserate, and precision
12	36355x	top = kappa*P;
13	36355x	R = top / (2P-2BR-kappa+2BRkappa);
14	36355x	return (R);
15		}

1		###
2		#' @title Rho Min
3		#'
4		#' @description
5		#' This function calculates the minimum testSetLength where it is possible to get a rho less than alpha for the given parameters of rho.
6		#'
7		#' @param baserate A \code{\link{baserate}}
8		#' @param alpha The threshold of significance for rho (similar to an alpha level for a p value), defaulted to 0.05
9		#' @param inc An integer indicating by how much the testSetLength should increase each iteration
10		#' @param printInc A boolean indicating whether to print out each increment value with it's corresponding significance for rho
11		#' @param ... Any additional parameters passed into \code{\link{rho}}
12		#'
13		#' @examples
14		#' #Add testSetBaserateInflation as an additional parameter
15		#' rhoMin(0.2, testSetBaserateInflation = 0.33)
16		#'
17		#' #Add testSetBaserateInflation as well as changing inc and selecting printInc
18		#' rhoMin(0.2, inc = 5, printInc = TRUE, testSetBaserateInflation = 0.33)
19		#'
20		#' @export
21		#' @return The minimum length of testSet, to the nearest multiple of inc, greater than the minimum length, that would give a value where rho less than alpha becomes mathematically possible.
22		###
23		rhoMin = function(baserate, alpha = 0.05, inc = 10, printInc = FALSE, ...){
24	4x	rhoVal = 1
25	4x	testSetLength = 0
26	1x	if(inc%%1 != 0 \| inc <= 0) stop("Inc value must be a positive integer.")
27	1x	if(alpha > 1 \| alpha < 0) stop("Alpha must be between 0 and 1.")
28
29
30	2x	while(rhoVal > alpha){
31	8x	testSetLength = testSetLength + inc
32	8x	rhoVal = rho(1, baserate, testSetLength, ...)
33	8x	if(printInc) {
34	4x	cat(testSetLength, rhoVal,"\n")
35		}
36		}
37	2x	return(testSetLength)
38
39		}

1		###
2		# Check Baserate, Precision, Kappa Combo
3		#
4		# This function checks to make sure the combination of BR, P, and K will create a valid set
5		# @param BR This is the supplied baserate
6		# @param P This is the supplied precision
7		# @param K This is the supplied kappa
8		# @return returns TRUE if the combination is valid, FALSE otherwise
9		###
10		checkBRPKcombo = function(BR, P, K) {
11		#if right is less than P, then it is a valid combination. If not, then the pair of kappa, precision, and baserate does not correspond to a valid set
12	40475x	right = (2BRK - 2*BR - K)/(K - 2);
13	40474x	if(P > right){
14	34429x	return(TRUE);
15		}else{
16	6045x	return(FALSE);
17		}
18		}

1		###
2		# Calculate Kappa
3		#
4		# This function calculates kappa given a set
5		# @param set This is the set to calculate kappa from
6		# @param isSet TRUE if set FALSE if contigency table
7		# @param kappaThreshold if null then return kappa otherwise return list of kappa and above or not
8		# @return This returns the kappa of the set given
9		###
10		calcKappa <- function(set, isSet = TRUE, kappaThreshold = NULL) {
11	36401x	if (!isSet) {
12	31x	set <- contingencyToSet(set[1, 1], set[2, 1], set[1, 2], set[2, 2])
13		}
14
15		if (
16	36401x	length(which(set[, 1] == set[, 2] & set[, 1] == 1)) == nrow(set) \|
17	36401x	length(which(set[, 1] == set[, 2] & set[, 1] == 0)) == nrow(set)
18		) {
19	917x	return(1)
20		}
21
22		#calculates kappa by calculating the agreement and the baserates and then creating the adjacenty matrix
23	35484x	agreement <- length(which(set[, 1] == set[, 2])) / nrow(set);
24	35484x	baserate2 <- length(which(1 == set[, 2])) / nrow(set);
25	35484x	baserate1 <- length(which(1 == set[, 1])) / nrow(set);
26	35484x	randomAgreement <- baserate1 * baserate2 + (1 - baserate1) * (1 - baserate2);
27	35484x	kappa <- (agreement - randomAgreement) / (1 - randomAgreement);
28
29	35484x	if (is.null(kappaThreshold)) {
30	35482x	return(kappa);
31		} else {
32	2x	return(list(kappa = kappa, above = kappa > kappaThreshold))
33		}
34		}

1		###
2		#' @title Calculate Baserate
3		#'
4		#' @description
5		#' This function calculates the baserate of the first rater, second rater, and the average of both the raters.
6		#'
7		#' @details
8		#' A baserate is the percentage, as a decimal, that a positive code appears in data (either a \code{\link{codeSet}} or \code{\link{contingencyTable}}) for a given rater. It is assumed that the first rater is more experienced and thus provides a better estimation of the actual baserate for a given code, so the first rater's baserate is often used as if it is the actual baserate. If the raters are assumed to have the same experience level, the average baserate may give a better estimation. If the second rater is more experienced, the second rater's baserate may give a better estimation. Functions assume that the first rater is the more experienced rater and thus uses the first rater's baserate as the overall baserate estimation.
9		#'
10		#'
11		#' @param data The \code{\link[=getTestSet]{testSet}} or \code{\link{contingencyTable}} for which the baserate is calculatede
12		#'
13		#' @seealso \code{\link{baserateSet}} and \code{\link{baserateCT}}
14		#'
15		#' @examples
16		#' #Given a code set
17		#' baserate(data = codeSet)
18		#'
19		#' #Given a contingency Table
20		#' baserate(data = contingencyTable)
21		#'
22		#' @export
23		#' @return A list of the format:\describe{
24		#' \item{firstBaserate}{The percentage of the data for which a positive code, or a 1, appears in the first rater}
25		#' \item{secondBaserate}{The percentage of the data for which a positive code, or a 1, appears in the second rater}
26		#' \item{averageBaserate}{The average of the firstBaserate and secondBaserate.}
27		#' }
28		#'
29		###
30		baserate = function(data){
31	7x	if (nrow(data) != 2) {
32	4x	return(baserateSet(data))
33		} else {
34	3x	return(baserateCT(data))
35		}
36		}

1		###
2		# Generate Precision Combo
3		#
4		# This function generates a precision that fits within the range and will create a valid set
5		# @param kappa This is the kappa of the desired pair
6		# @param Pmin This is the minimum precision desired
7		# @param Pmax This is the maximum precision desired
8		# @param BR This is the baserate of the desired pair
9		# @return This returns a precision value that will generate a valid set with the other parameters
10		####
11
12		genPcombo = function(kappa, Pmin, Pmax, BR){
13		#gets a random precision in the precision range
14	!	currPrecision = stats::runif(1, Pmin, Pmax);
15		#checks is the combination of precision, kappa, and baserate is valid. If it is vaid, return the precision, if not, recall the function until a valid precision is found
16	!	if(!checkBRPKcombo(BR, currPrecision, kappa)){
17	!	genPcombo(kappa, Pmin, Pmax, BR);
18		}else{
19	!	return(currPrecision);
20		}
21		}

1		###
2		#' @title Get Test Set
3		#'
4		#' @description
5		#' This function gets a \emph{testSet} from a larger \code{\link{codeSet}} given certain sampling parameters.
6		#'
7		#' @details
8		#' A \emph{testSet} is a \code{\link{codeSet}} that is a subset of a larger \code{\link{codeSet}} with a given set of properties. A \emph{testSet} is constructed by sampling (without replacement) P rows from rows in the larger \code{\link{codeSet}} where the first rater's code was 1, and then appending an additional sample (without replacement) of R rows taken at random from the larger \code{\link{codeSet}} excluding rows included in the first P rows sampled. P is computed as the minbaserate * length of the \emph{testset}. R is computed as testSetLength - P. The result of this sampling procedure is to create a sample with a minimum baserate regardless of the baserate of the larger \code{\link{codeSet}}.If \emph{testSetBaserateInflation} is set to zero, the function selects rows at random.
9		#'
10		#' @param set The \code{\link{codeSet}} from which the \emph{testSet} is taken
11		#' @param testSetLength The length of the \emph{testSet} to be taken
12		#' @param testSetBaserateInflation The minimum guaranteed \code{\link{baserate}} of the \emph{testSet}. Default to 0
13		#'
14		#'
15		#' @export
16		#' @return A \code{\link{codeSet}} with the properties specified
17		###
18
19		getTestSet = function(set, testSetLength, testSetBaserateInflation = 0){
20	1x	if(length(unlist(set)) < testSetLength){stop("testSetLength must be less than the length of set")}
21	1x	if(testSetLength %% 1 \| testSetLength < 1) {stop("testSetLength value must be a positive integer.")}
22	1x	if(testSetBaserateInflation < 0){stop("testSetBaserateInfation must be positive")}
23	1x	if(testSetBaserateInflation >= 1){stop("testSetBaserateInflation must be below 1")}
24	5x	if(length(which(set[,1] == 1 & set[,2] == 1)) == nrow(set)){
25	1x	return(set[1:testSetLength,])
26	4x	}else if(length(which(set[,1] == 0 & set[,2] == 0)) == nrow(set)){
27	2x	if(testSetBaserateInflation == 0){
28	1x	return(set[1:testSetLength,])
29		}else{
30	1x	stop("Not enough positives in first rater to inflate to this level")
31		}
32		}
33	2x	return(getHandSet(set = set, handSetLength = testSetLength, handSetBaserate = testSetBaserateInflation, returnSet = T))
34		}

1		###
2		#' @title Calculate Baserate (CT)
3		#'
4		#' @description
5		#' This function calculates the baserate of the first rater, second rater, and the average of both the raters. Called by \code{\link{baserate}}.
6		#'
7		#' @export
8		#'
9		#' @param CT The \code{\link{contingencyTable}} for which the baserate is calculated
10		#'
11		#' @seealso \code{\link{baserate}} and \code{\link{baserateSet}}
12		#'
13		#' @return A list of the format:\describe{
14		#' \item{firstBaserate}{The percentage of the data for which a positive code, or a 1, appears in the first rater}
15		#' \item{secondBaserate}{The percentage of the data for which a positive code, or a 1, appears in the second rater}
16		#' \item{averageBaserate}{The average of the firstBaserate and secondBaserate.}
17		#' }
18		#'
19		###
20
21		baserateCT = function(CT){
22	1x	if(any(CT < 0)){stop("Values in Contingency Table must be positive")}
23	22x	firstBaserate = sum(CT[1,])/sum(CT)
24	22x	secondBaserate = sum(CT[,1])/sum(CT)
25	22x	averageBaserate = mean(c(firstBaserate,secondBaserate))
26	22x	return(list(firstBaserate = firstBaserate, secondBaserate = secondBaserate, averageBaserate = averageBaserate))
27		}

1		###
2		# Get Boot P Value
3		#
4		# This function gets the p value of the result in the distribution
5		# @param distribution The distribution that the result falls into
6		# @param result The value that you are evaluating where in the distribution it falls
7		# @return This returns the p value from the result into the distribution
8		###
9		getBootPvalue = function(distribution, result) {
10		#returns the percentage of the time that the distribution was greater or equal to the observed kappa
11	139x	N = length(distribution);
12	139x	if(result < mean(distribution, na.rm = T)) {
13		#if the result is less than the mean of the distribution, than the p value is 1
14	16x	return(1);
15		} else {
16		#return the number of times that the distribution is greater than the result as a percentage of the total number of items in the distribution
17	123x	return(length(which(distribution >= result)) / N);
18		}
19		}

1		###
2		#' @include rhoK.R
3		#' @title Rho (set)
4		#'
5		#' @description
6		#' This function calculates rho and kappa for a given \code{\link[=getTestSet]{testSet}}, and returns a list containing both values. Called by \code{\link{rho}}.
7		#'
8		#' @export
9		#'
10		#' @template rho-params
11		#'
12		#' @return A list of the format:\describe{
13		#' \item{rho}{The rho of the \code{\link{codeSet}}}
14		#' \item{kappa}{The Cohen's Kappa of the \code{\link{codeSet}}}
15		#' }
16		###
17		rhoSet = function(
18		x,
19		OcSBaserate = NULL,
20		testSetBaserateInflation = 0,
21		OcSLength = 10000,
22		replicates = 800,
23		ScSKappaThreshold = 0.9,
24		ScSKappaMin = 0.40,
25		ScSPrecisionMin = 0.6,
26		ScSPrecisionMax = 1
27		) {
28	5x	kappa = kappaSet(x);
29
30	5x	row.count = nrow(x);
31	5x	if(is.null(OcSBaserate)) {
32	4x	OcSBaserate = length(which(x[,1] == 1)) / row.count;
33		}
34
35	5x	return(list(
36	5x	rho = rhoK(
37	5x	kappa,
38	5x	OcSBaserate,
39	5x	row.count,
40	5x	testSetBaserateInflation,
41	5x	OcSLength,
42	5x	replicates,
43	5x	ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax
44		),
45	5x	kappa = kappa,
46	5x	recall = rating_set_recall(x),
47	5x	precision = rating_set_precision(x)
48		))
49		}

1		###
2		#' @title Rho
3		#'
4		#' @description
5		#' This function calculates rho for a \code{\link[=getTestSet]{testSet}}, \code{\link{contingencyTable}}, or an observed kappa value with associated set parameters (testSetLength and OcSBaserate).
6		#'
7		#' @details
8		#' Rho is a Monte Carlo rejective method of interrater reliability statistics, implemented here for Cohen's Kappa. Rho constructs a collection of data sets in which kappa is below a specified threshold, and computes the empirical distribution on kappa based on the specified sampling procedure. Rho returns the percent of the empirical distribution greater than or equal to an observed kappa. As a result, Rho quantifies the type 1 error in generalizing from an observed test set to a true value of agreement between two raters.
9		#'
10		#' Rho starts with an observed kappa value, calculated on a subset of a \code{\link{codeSet}}, known as an observed \code{\link[=getTestSet]{testSet}}, and a \emph{kappa threshold} which indicates what is considered significant agreement between raters.
11		#'
12		#' It then generates a collection of fully-coded, simulated
13		#' \code{\link[=getTestSet]{codeSets}} (ScS), further described in
14		#' \code{\link{createSimulatedCodeSet}}, all of which have a kappa value below the kappa
15		#' threshold and similar properties as the original \code{\link{codeSet}}.
16		#'
17		#' Then, kappa is calculated on a \code{\link[=getTestSet]{testSet}} sampled from each of the ScSs in the
18		#' collection to create a null hypothesis distribution. These \code{\link[=getTestSet]{testSets}} mirror the observed \code{\link[=getTestSet]{testSets}} in their size and sampling method. How these \code{\link[=getTestSet]{testSets}} are sampled is futher described in \code{\link{getTestSet}}.
19		#'
20		#' The null hypothesis is that the observed \code{\link[=getTestSet]{testSet}}, was sampled from a data set, which, if both raters were to code in its entirety, would result in a level of agreement below the kappa threshold.
21		#'
22		#' For example, using an alpha level of 0.05, if the observed kappa is greater than 95 percent of the kappas in the null hypothesis distribution, the null hypothesis is rejected. Then one can conclude that the two raters would have acceptable agreement had they coded the entire data set.
23		#'
24		#' @template rho-params
25		#' @param testSetLength The length of the \code{\link[=getTestSet]{testSet}} (ignored unless \emph{data} is an observed kappa value)
26		#'
27		#' @examples
28		#' # Given an observed kappa value
29		#' rho(x = 0.88, OcSBaserate = 0.2, testSetLength = 80)
30		#'
31		#' # Given a test Set
32		#' rho(x = codeSet)
33		#'
34		#' # Given a contingency Table
35		#' rho(x = contingencyTable)
36		#'
37		#' @export
38		#' @return rho and kappa for the given data and parameters (unless kappa is given)
39		###
40		rho = function(
41		x,
42		OcSBaserate = NULL,
43		testSetLength = NULL,
44		testSetBaserateInflation = 0,
45		OcSLength = 10000,
46		replicates = 800,
47		ScSKappaThreshold = 0.9,
48		ScSKappaMin = 0.40,
49		ScSPrecisionMin = 0.6,
50		ScSPrecisionMax = 1
51		) {
52
53		### Calculate rho given a kappa value
54	15x	if (is(x, "numeric")) {
55	1x	if (is.null(OcSBaserate)) { stop("Must give a baserate when a kappa value is given") }
56	1x	if (is.null(testSetLength)) { stop("Must give a testSetLength when a kappa value is given") }
57
58	8x	result <- rhoK(x, OcSBaserate,
59	8x	testSetLength,
60	8x	testSetBaserateInflation,
61	8x	OcSLength,
62	8x	replicates,
63	8x	ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax)
64		}
65
66		### Calculate rho given a code set
67	5x	else if (nrow(x) != 2) {
68	2x	result <- rhoSet(x, OcSBaserate,
69	2x	testSetBaserateInflation,
70	2x	OcSLength,
71	2x	replicates,
72	2x	ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax)
73		}
74
75		### Calculate rho given a contingency table
76		else {
77	3x	result <- rhoCT(x, OcSBaserate,
78	3x	testSetBaserateInflation,
79	3x	OcSLength,
80	3x	replicates,
81	3x	ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax)
82		}
83
84	13x	return(result)
85		}

1		#include <RcppArmadilloExtensions/sample.h>
2		// [[Rcpp::depends(RcppArmadillo)]]
3
4		#include <RcppArmadillo.h>
5		#include <Rcpp.h>
6		using namespace Rcpp;
7		using namespace arma;
8
9		// NumericMatrix toNumericMatrix(arma::imat x) {
10		// int nRows=x.n_rows;
11		//
12		// NumericMatrix y(nRows, x.n_cols);
13		// for (int j=0; j<x.n_rows; j++) {
14		// for (int i=0; i<x.n_cols; i++) {
15		// y(j,i) = x(j, i);
16		// }
17		// }
18		//
19		// return y;
20		// }
21
22		// // @name sample_weighted
23		// // @title sample_weighted
24		// // @param x vector
25		// // @param n integer
26		// // @export
27		// //' @export
28		// // [[Rcpp::export]]
29		// arma::vec sample_weighted(arma::vec xx, arma::vec probs, int n) {
30		// // NumericVector ret(n);
31		// // NumericVector xx(x);
32		// arma::vec ret(n);
33		//
34		// // IntegerVector rng = seq(0, xx.size()-1);
35		// arma::ivec rng = regspace<ivec>(0, xx.size() - 1);
36		// for(int w = 0; w < n; w++) {
37		// // int tot = sum(xx);
38		// // NumericVector probs = xx / tot;
39		// // arma::vec probs2 = toArmaVec(probs);
40		// // IntegerVector s = Rcpp::sugar::sample(rng, 1, false, probs);
41		// arma::ivec s = Rcpp::RcppArmadillo::sample(rng, 1, false, probs);
42		// xx[s[0]] -= 1;
43		// ret[w] = s[0];
44		// }
45		// // return(xx);
46		// return(ret + 1);
47		// }
48
49		// // ' @export
50		// // [[Rcpp::export]]
51		// arma::ivec sample_weighted2(arma::imat xx, int n, bool forR = true) {
52		// arma::ivec ret = arma::ivec(n);
53		// NumericMatrix x = toNumericMatrix(xx);
54		// IntegerVector rng = seq(0, x.size() - 1);
55		//
56		// for(int w = 0; w < n; w++) {
57		// double tot = accu(xx);
58		// NumericVector probs = x / tot;
59		// IntegerVector s = sample(rng, 1, false, probs);
60		// x[s[0]] -= 1;
61		// ret[w] = s[0];
62		// }
63		//
64		// if(forR == true) {
65		// return(ret + 1);
66		// } else {
67		// return(ret);
68		// }
69		// }
70
71		//' @title sample_contingency_table
72		//' @name sample_contingency_table
73		//'
74		//' @param xx contingency table matrix
75		//' @param n int size of the contingency table
76		//' @param forR bool if true, add 1 to the results accounting for R indices starting at 1
77		//'
78		//' @export
79		// [[Rcpp::export]]
80	39004x	arma::ivec sample_contingency_table(arma::imat xx, int n, bool forR = true) {
81	78008x	arma::ivec ret = arma::ivec(n);
82
83		int maxInt;
84		int s;
85	2229214x	for(int w = 0; w < n; w++) {
86	2190210x	maxInt = accu(xx);
87	2190210x	s = randi<int>(distr_param(0, maxInt));
88	14595227x	ret[w] = (s <= xx(0) ? 0 : (s <= (xx(0)+xx(1))) ? 1 : (s <= (xx(0)+xx(1)+xx(2))) ? 2 : 3);
89	6570630x	xx(ret[w])--;
90		}
91
92	39004x	if(forR == true) return(ret + 1);
93	39004x	else return(ret);
94		}
95
96		//' @name getBootPvalue_c
97		//' @title getBootPvalue_c
98		//'
99		//' @param distribution vector of calculated kappas
100		//' @param result double calculated kappa to compare against
101		//'
102		//' @description
103		//' returns the percentage of the time that the distribution was greater or equal to the observed kappa
104		//' if the result is less than the mean of the distribution, than the p value is 1
105		//' else return the number of times that the distribution is greater than the result as a percentage of the total number
106		//' of items in the distribution
107		//'
108		//' @return double calculated p-value
109		//'
110		//' @export
111		// [[Rcpp::export]]
112	122x	double getBootPvalue_c(arma::vec distribution, double result) {
113	122x	if(result < mean(distribution)) {
114	17x	return(1.0);
115		}
116
117		else {
118	210x	arma::uvec matched = find(distribution >= result);
119
120	105x	double rho = (matched.size() * 1.0) / distribution.size();
121	105x	return( rho );
122		}
123		}
124
125		// Validate a Baserate/Kappa combo against the supplied Precision.
126		// Return true if Precision is greater than the calculated right value
127		// [[Rcpp::export]]
128	79441x	bool check_BRK_combo(double BR, double P, double K) {
129	79441x	double right = ((2 * BR * K) - (2 * BR) - K) / (K - 2);
130
131	79441x	return(P > right);
132		}
133
134		//' @name recall
135		//' @title recall
136		//'
137		//' @param kappa double
138		//' @param BR double
139		//' @param P double
140		//'
141		//' @return Recall calculated from provided kappa, BR, and P
142		//'
143		//' @export
144		// [[Rcpp::export]]
145	23502x	double recall(double kappa, double BR, double P) {
146	23502x	double top = kappa * P;
147	23502x	double R = top / (2P-2BR-kappa+2BRkappa);
148	23502x	return(R);
149		}
150
151		// Find a valid precision and kappa combo from a distribution of Kappas and Precisions given a baserate
152		//
153		// [[Rcpp::export]]
154	79441x	NumericVector find_valid_pk(
155		arma::colvec kappaDistribution, arma::vec kappaProbability,
156		arma::colvec precisionDistribution, arma::vec precisionProbability,
157		double baserate
158		) {
159	158882x	arma::ivec kappaRng = regspace<arma::ivec>(0, kappaDistribution.size() - 1);
160
161	158882x	arma::ivec whKappa;
162	79441x	if (kappaProbability.n_elem > 0) {
163	53440x	whKappa = Rcpp::RcppArmadillo::sample(kappaRng, 1, false, kappaProbability);
164		} else {
165	26001x	whKappa = Rcpp::RcppArmadillo::sample(kappaRng, 1, false);
166		}
167	158882x	double currKappa = kappaDistribution[whKappa.at(0)];
168
169	158882x	arma::ivec precRng = regspace<arma::ivec>(0, precisionDistribution.size() - 1);
170	158882x	arma::ivec whPrec = Rcpp::RcppArmadillo::sample(precRng, 1, false);
171	158882x	double currPrec = precisionDistribution[whPrec.at(0)];
172
173	79441x	if (!check_BRK_combo(baserate, currPrec, currKappa)) {
174	46824x	double precisionMin = (2 * baserate * currKappa - 2 * baserate - currKappa) / (currKappa - 2);
175	90210x	arma::uvec indicies = find(precisionDistribution > precisionMin);
176
177	46824x	if (indicies.size() == 0) {
178	3438x	return(find_valid_pk(kappaDistribution, kappaProbability, precisionDistribution, precisionProbability, baserate));
179		}
180
181	86772x	precisionDistribution = precisionDistribution.elem(indicies);
182
183	43386x	if (precisionProbability.size() > 0) {
184	2x	precisionProbability = precisionProbability.elem(indicies);
185		}
186
187	43386x	precRng = regspace<ivec>(0, precisionDistribution.size() - 1);
188	43386x	whPrec = Rcpp::RcppArmadillo::sample(precRng, 1, false);
189	86772x	currPrec = precisionDistribution[whPrec.at(0)];
190		}
191
192	76003x	return(Rcpp::NumericVector::create(currPrec, currKappa));
193		}
194
195
196		//' @name generateKPs_c
197		//' @title generate_kp_list
198		//'
199		//' @param numNeeded int
200		//' @param baserate double
201		//' @param kappaMin double
202		//' @param kappaMax double
203		//' @param precisionMin double
204		//' @param precisionMax double
205		//' @param distributionType int 0 - normal (default), 1 - bell
206		//' @param distributionLength long
207		//'
208		//' @return matrix of kappa and precision values (column 1 as precision)
209		//'
210		//' @export
211		// [[Rcpp::export]]
212	23x	Rcpp::NumericMatrix generate_kp_list(
213		int numNeeded, double baserate,
214		double kappaMin, double kappaMax,
215		double precisionMin, double precisionMax,
216		int distributionType = 0, long distributionLength = 10000
217		) {
218	23x	double kappaStep = ((kappaMax - kappaMin) / (distributionLength - 1));
219
220	46x	colvec kappaDistribution = regspace(kappaMin, kappaStep, kappaMax);
221
222	46x	arma::vec kappaProbability;
223	23x	if(distributionType == 1) {
224	1x	kappaProbability = normpdf(kappaDistribution, 0.9, 0.1);
225		}
226
227	23x	double precStep = (precisionMax - precisionMin) / (10000 - 1);
228	46x	colvec precisionDistribution = regspace(precisionMin, precStep, precisionMax);
229
230	46x	arma::vec precisionProbability;
231	23x	Rcpp::NumericMatrix KPs(numNeeded,2);
232
233	76024x	for(int i=0; i < numNeeded; i++) {
234	152002x	KPs(i, _) = find_valid_pk(
235		kappaDistribution,
236		kappaProbability,
237		precisionDistribution,
238		precisionProbability,
239		baserate
240		);
241		}
242
243	46x	return (KPs);
244		}
245
246		//' @name contingency_table
247		//' @title contingency_table
248		//' @description Create a contingency table using the provied precision, recall, baserate, and length.
249		//'
250		//' @param precision double
251		//' @param rec double
252		//' @param length int
253		//' @param baserate double
254		//'
255		//' @export
256		// [[Rcpp::export]]
257	23503x	arma::imat contingency_table(double precision, double rec, int length, double baserate) {
258	23503x	int gold1s = max(NumericVector::create(round(baserate * length), 1));
259	23503x	int gold0s = length - gold1s;
260	23503x	int TP = max(NumericVector::create(round(gold1s * rec), 1));
261	23503x	int FP = min(NumericVector::create(gold0s, max(NumericVector::create(round(TP * (1 - precision) / precision), 1))));
262
263	23503x	arma::imat ct = arma::imat(2,2);
264	23503x	ct(0,0) = TP;
265	23503x	ct(0,1) = gold1s - TP;
266	23503x	ct(1,0) = FP;
267	23503x	ct(1,1) = gold0s - FP;
268
269	23503x	return(ct);
270		}
271
272		//' @name random_contingency_table
273		//' @title random_contingency_table
274		//' @param setLength [TBD]
275		//' @param baserate [TBD]
276		//' @param kappaMin [TBD]
277		//' @param kappaMax [TBD]
278		//' @param minPrecision [TBD]
279		//' @param maxPrecision [TBD]
280		//'
281		//' @export
282		// [[Rcpp::export]]
283	1x	arma::imat random_contingency_table(
284		int setLength,double baserate,
285		double kappaMin, double kappaMax,
286		double minPrecision = 0, double maxPrecision = 1
287		) {
288	2x	NumericMatrix KP = generate_kp_list(1, baserate, kappaMin, kappaMax, minPrecision, maxPrecision);
289	1x	double kappa = KP(0,1);
290	1x	double precision = KP(0,0);
291	1x	double rec = recall(kappa, baserate, precision);
292
293	1x	arma::imat ct = contingency_table(precision, rec, setLength, baserate);
294
295	2x	return(ct);
296		}
297
298		//' @title kappa_ct
299		//' @description Calculate kappa from a contingency table
300		//' @param ct [TBD]
301		//'
302		//' @export
303		// [[Rcpp::export]]
304	23505x	double kappa_ct(arma::imat ct) {
305	23505x	double a = ct(0,0); //gold 1 & silver 1
306	23505x	double c = ct(0,1); //gold 1 & silver 0
307	23505x	double b = ct(1,0); //gold 0 & silver 1
308	23505x	double d = ct(1,1); //gold 0 & silver 0
309	23505x	double size = accu(ct);
310
311	23505x	double pZero = (a+d) / size;
312	23505x	double pYes = ((a + b) / size) * ((a + c) / size);
313	23505x	double pNo = ((c + d) / size) * ((b + d) / size);
314	23505x	double pE = (pYes + pNo);
315	23505x	double k = (pZero - pE) / (1 - pE);
316
317	23505x	return(k);
318		}
319
320		// [[Rcpp::export]]
321	23502x	arma::imat getHand_ct(arma::imat ct, int handSetLength, double handSetBaserate) {
322	23502x	int positives = ceil(handSetLength * handSetBaserate);
323	47004x	arma::ivec positiveIndices;
324	47004x	arma::imat otherCT(ct);
325
326	23502x	if(positives > 0) {
327	31004x	arma::irowvec gold1s = ct.row(0);
328	15502x	positiveIndices = sample_contingency_table(gold1s, positives, false);
329
330	15502x	double sumOnes = sum(positiveIndices == 0);
331	15502x	double sumTwos = sum(positiveIndices == 1);
332	15502x	positiveIndices *= 2;
333	31004x	otherCT(0,0) = otherCT(0,0) - sumOnes;
334	31004x	otherCT(0,1) = otherCT(0,1) - sumTwos;
335		}
336
337	47004x	arma::ivec otherAsVector = otherCT.as_col();
338	47004x	arma::ivec otherIndices = sample_contingency_table(otherCT, handSetLength - positives, false);
339	70506x	arma::ivec allIndices = join_cols<ivec>(positiveIndices, otherIndices);
340	23502x	arma::imat newCT = arma::imat(2,2);
341
342	23502x	newCT(0,0) = sum(allIndices == 0);
343	23502x	newCT(1,0) = sum(allIndices == 1);
344	23502x	newCT(0,1) = sum(allIndices == 2);
345	23502x	newCT(1,1) = sum(allIndices == 3);
346
347	47004x	return(newCT);
348		}
349
350		//' @name getHand_kappa
351		//' @title getHand_kappa
352		//'
353		//' @description This function returns kappa calculated from a Handset taken from a larger Contingency Table
354		//'
355		//' @param ct KPs matrix of kappa (column 1) and precision (column 2) values
356		//' @param handSetLength The length of the \code{\link[=getTestSet]{testSet}} (ignored unless \emph{data} is an observed kappa value)
357		//' @param handSetBaserate baserate to inflate the sampled contingency table to
358		//'
359		//' @return Kappa as double
360		//'
361		//' @export
362		// [[Rcpp::export]]
363	23500x	double getHand_kappa(arma::imat ct, int handSetLength, double handSetBaserate) {
364	47000x	arma::imat newCT = getHand_ct(ct, handSetLength, handSetBaserate);
365
366	23500x	double k = kappa_ct(newCT);
367	47000x	return(k);
368		}
369
370		// This function calculates rho given the parameters of the initial set and
371		// the kappa of the observed handset and is called by rhoK()
372		//
373		// [[Rcpp::export]]
374	120x	double calcRho_c(
375		double x,
376		double OcSBaserate,
377		int testSetLength,
378		double testSetBaserateInflation = 0,
379		int OcSLength = 10000,
380		int replicates = 800,
381		double ScSKappaThreshold = 0.9,
382		double ScSKappaMin = 0.40,
383		double ScSPrecisionMin = 0.6,
384		double ScSPrecisionMax = 1.0,
385		NumericMatrix KPs = NumericMatrix(0)
386		) {
387	120x	if (KPs.size() == 1 && KPs[0] == 0) {
388	20x	KPs = generate_kp_list(replicates, OcSBaserate, ScSKappaMin, ScSKappaThreshold, ScSPrecisionMin, ScSPrecisionMax);
389		}
390
391	120x	if(KPs.nrow() < replicates) {
392	50x	replicates = KPs.nrow();
393		}
394
395	240x	arma::vec savedKappas = arma::vec(replicates);
396	23620x	for (int KProw = 0; KProw < replicates; KProw++) {
397	47000x	NumericVector KP = KPs(KProw, _);
398	23500x	double precision = KP[0];
399	23500x	double kappa = KP[1];
400	23500x	double rec = recall(kappa, OcSBaserate, precision);
401
402	47000x	arma::imat fullCT = contingency_table(precision, rec, OcSLength, OcSBaserate);
403	23500x	double kk = getHand_kappa(fullCT, testSetLength, testSetBaserateInflation);
404	47000x	savedKappas[KProw] = kk;
405		};
406
407	240x	return(getBootPvalue_c(savedKappas, x));
408		}
409
410		// // @name rhoCT_c
411		// // @title rhoCT_ct
412		// // @param contingencyTable [TBD]
413		// // @param testSetBaserateInflation [TBD]
414		// // @param replicates [TBD]
415		// // @param OcSLength [TBD]
416		// // @param OcSBaserate [TBD]
417		// // @param ScSKappaThreshold [TBD]
418		// // @param ScSKappaMin [TBD]
419		// // @param ScSPrecisionMin [TBD]
420		// // @param ScSPrecisionMax [TBD]
421		// //' @export
422		// // [[Rcpp::export]]
423		// arma::vec rhoCT_c(
424		// arma::imat contingencyTable,
425		// double testSetBaserateInflation = 0.2,
426		// int replicates = 800,
427		// int OcSLength = 10000,
428		// double OcSBaserate = -1.0,
429		// double ScSKappaThreshold = 0.65,
430		// double ScSKappaMin = 0.40,
431		// double ScSPrecisionMin = 0.6,
432		// double ScSPrecisionMax = 1
433		// ){
434		// // Observed kappa
435		// double kappa = kappa_ct(contingencyTable);
436		// double size = accu(contingencyTable);
437		// double gold1;
438		//
439		// if(OcSBaserate == -1.0) {
440		// gold1 = accu(contingencyTable.row(0));
441		// OcSBaserate = gold1 / size;
442		// }
443		//
444		// double rho = calcRho_c(
445		// kappa, OcSBaserate,
446		// size, testSetBaserateInflation,
447		// OcSLength, replicates,
448		// ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax
449		// );
450		//
451		// return(NumericVector::create(kappa, rho));
452		// }
453
454
455		// //' @export
456		// // [[Rcpp::export]]
457		// bool run_rho_in_c(
458		// double threshold, double infl,
459		// int handsetLength = 20,
460		// double handsetBaserate = 0.2,
461		// int replicates = 10000
462		// ) {
463		// arma::imat ct = random_contingency_table(10000, infl, 0.4, threshold, 0.2, 0.8);
464		// // handset --> testset
465		// arma::imat handset = getHand_ct(ct, handsetLength, handsetBaserate);
466		// arma::vec res = rhoCT_c(handset, handsetBaserate, replicates, 10000, -1, 0.65, 0.4, 0.6, 1);
467		//
468		// return(res[0]>threshold);
469		// }
470