1 |
### |
|
2 |
#' @title Calculate kappa (Set) |
|
3 |
#' |
|
4 |
#' @description |
|
5 |
#' This function calculates Cohen's kappa for a given \code{\link{codeSet}}. Called by \code{\link{kappa}}. |
|
6 |
#' |
|
7 |
#' @export |
|
8 |
#' @param set A \code{\link{codeSet}} |
|
9 |
#' |
|
10 |
#' @seealso \code{\link{kappa}} and \code{\link{kappaCT}} |
|
11 |
#' |
|
12 |
#' @return |
|
13 |
#' The kappa of the \code{\link{codeSet}} |
|
14 |
#' |
|
15 |
### |
|
16 |
kappaSet = function(set){ |
|
17 | 70x |
if(any(set != 1 & set != 0)) { |
18 | 1x |
stop("Each set must only consist of zeros and ones.") |
19 |
} |
|
20 |
|
|
21 | 69x |
return(calcKappa(set,isSet = TRUE,kappaThreshold = NULL)); |
22 |
} |
|
23 |
1 |
### |
|
2 |
# Create A Random Set |
|
3 |
# |
|
4 |
# This creates a set with given parameters |
|
5 |
# @param setLength The length of the set to be created |
|
6 |
# @param baserate The baserate of the set to be created |
|
7 |
# @param kappa The kappa of the set to be created |
|
8 |
# @param minPrecision The minimum precision accepted in this set. Defaults to 0 |
|
9 |
# @param maxPrecision The maximum precision accepted in this set. Defaults to 1 |
|
10 |
# @return This returns a set with the given parameters |
|
11 |
### |
|
12 |
createRandomSet = function(setLength, baserate, kappaMin, kappaMax, minPrecision = 0, maxPrecision = 1, type = "set"){ |
|
13 |
#generates a valid precision with the given kappa and baserate |
|
14 | 54x |
KP = generateKPs(1, baserate, kappaMin, kappaMax, minPrecision, maxPrecision)[[1]] |
15 | 54x |
kappa = KP$kappa |
16 | 54x |
precision = KP$precision |
17 | 54x |
recall = getR(kappa, baserate, precision) |
18 |
|
|
19 | 54x |
if(type == "ct") { |
20 | 1x |
fullSet = pr.ct(precision = precision, recall = recall, length = setLength, baserate = baserate); |
21 |
} |
|
22 |
else { |
|
23 | 53x |
fullSet = prset(precision = precision, recall = recall, length = setLength, baserate = baserate); |
24 |
} |
|
25 |
|
|
26 | 54x |
return(fullSet); |
27 |
} |
1 |
### |
|
2 |
# Precision Recall Set |
|
3 |
# |
|
4 |
# This creates a set with a given recall and precision |
|
5 |
# @param precision This is the precision of the given set |
|
6 |
# @param recall This is the recall of the given set |
|
7 |
# @param length This is the length of the set |
|
8 |
# @param baserate This is the baserate of the set |
|
9 |
# @return returns a set with the given parameters |
|
10 |
### |
|
11 |
prset = function(precision, recall, length, baserate) { |
|
12 |
#generates a set given the different 4 quadrants |
|
13 | 36354x |
gold1s = max(round(baserate * length , 0), 1); # TP + FN |
14 | 36354x |
gold0s = length - gold1s; # FP + FN |
15 | 36354x |
TP = max(round(gold1s * recall , 0) , 1); # TP |
16 | 36354x |
FP = min(gold0s, max(round(TP * (1 - precision) / precision , 0), 1)); # FP |
17 | 36354x |
gold = c(rep(1, gold1s), rep(0, gold0s)); |
18 | 36354x |
silver = c(rep(1, TP),rep(0, gold1s - TP), rep(1, FP), rep(0, gold0s - FP)); |
19 | ||
20 | 36354x |
return(cbind(gold, silver)); |
21 |
} |
|
22 | ||
23 |
pr.ct = function(precision, recall, length, baserate) { |
|
24 |
#generates a set given the different 4 quadrants |
|
25 | 1x |
gold1s = max(round(baserate * length , 0), 1); # TP + FN |
26 | 1x |
gold0s = length - gold1s; # FP + FN |
27 | 1x |
TP = max(round(gold1s * recall , 0) , 1); # TP |
28 | 1x |
FP = min(gold0s, max(round(TP * (1 - precision) / precision , 0), 1)); # FP |
29 |
|
|
30 | 1x |
ct = matrix(0, ncol = 2, nrow = 2) |
31 | 1x |
ct[1,1] = TP |
32 | 1x |
ct[1,2] = gold1s - TP |
33 | 1x |
ct[2,1] = FP |
34 | 1x |
ct[2,2] = gold0s - FP |
35 | 1x |
return(ct); |
36 |
} |
1 |
any_equal <- function(target, current, tolerance = sqrt(.Machine$double.eps)) { |
|
2 | 54x |
any(sapply(target, function(targ) abs(current - targ) < tolerance )) |
3 |
} |
|
4 |
all_equal <- function(target, current, tolerance = sqrt(.Machine$double.eps)) { |
|
5 | 5x |
all(sapply(target, function(targ) abs(current - targ) < tolerance )) |
6 |
} |
|
7 | ||
8 | ||
9 |
#' Convert a codeset to a contingency table |
|
10 |
#' |
|
11 |
#' @param x codeset |
|
12 |
#' |
|
13 |
#' @return contingency table as a 2x2 matrix |
|
14 |
#' |
|
15 |
#' @export |
|
16 |
as.contingency.table <- function(x) { |
|
17 | 19x |
y <- NULL |
18 |
|
|
19 | 19x |
if ( nrow(x) > 2 && ncol(x) == 2 ) { |
20 | 12x |
tp = length(which(x[,1] == 1 & x[,2] == 1)) |
21 | 12x |
tn = length(which(x[,1] == 0 & x[,2] == 0)) |
22 | 12x |
fp = length(which(x[,1] == 0 & x[,2] == 1)) |
23 | 12x |
fn = length(which(x[,1] == 1 & x[,2] == 0)) |
24 |
|
|
25 | 12x |
y <- matrix( |
26 | 12x |
c(tp, fp, fn, tn), |
27 | 12x |
ncol = 2, nrow = 2 |
28 |
) |
|
29 |
} |
|
30 |
else { |
|
31 | 7x |
y <- x |
32 |
} |
|
33 |
|
|
34 | 19x |
dimnames(y) = list(c("first_positive", "first_negative"), c("second_positive", "second_negative")) |
35 | 19x |
class(y) <- c("contingency.table", "rating.set", class(y)) |
36 | 19x |
attr(y, "agreement") <- list( |
37 | 19x |
"true_positive" = y[1, 1], |
38 | 19x |
"false_negative" = y[1, 2], |
39 | 19x |
"false_positive" = y[2, 1], |
40 | 19x |
"true_negative" = y[2, 2] |
41 |
) |
|
42 | 19x |
attr(y, "baserate") <- baserateCT(y) |
43 | 19x |
attr(y, "kappa") <- kappaCT(y) |
44 |
|
|
45 | 19x |
return(y) |
46 |
} |
|
47 | ||
48 |
#' Convert codeset to contingency table |
|
49 |
#' |
|
50 |
#' @param x matrix contingency table (2x2) |
|
51 |
#' |
|
52 |
#' @return 2-column matrix representation of the contingency table |
|
53 |
#' |
|
54 |
#' @export |
|
55 |
as.code.set <- function(x) { |
|
56 | 7x |
y <- NULL |
57 | 7x |
if (all(dim(x) == c(2,2))) { |
58 | 3x |
y <- matrix( |
59 | 3x |
c(rep(1, x[1,1]*2), rep(1:0, x[1,2]), rep(0:1, x[2,1]), rep(0, x[2,2]*2)), |
60 | 3x |
byrow = TRUE, |
61 | 3x |
ncol = 2 |
62 |
) |
|
63 |
} |
|
64 |
else { |
|
65 | 4x |
y <- x |
66 |
} |
|
67 |
|
|
68 | 7x |
class(y) <- c("code.set", "rating.set", class(y)) |
69 | 7x |
attr(y, "baserate") <- baserateSet(y) |
70 | 7x |
attr(y, "kappa") <- kappaSet(y) |
71 | 7x |
colnames(y) <- c("first_rater", "second_rater") |
72 |
|
|
73 | 7x |
return(y) |
74 |
} |
|
75 | ||
76 |
#' Helper function to return special values on a rating set |
|
77 |
#' |
|
78 |
#' @param x Set or Contingency.Table |
|
79 |
#' |
|
80 |
#' @param i Value to search for |
|
81 |
#' |
|
82 |
#' @export |
|
83 |
"$.rating.set" <- function (x, i) { |
|
84 | 55x |
if (i %in% names(attributes(x))) { |
85 | 50x |
return(attr(x, i)) |
86 |
} |
|
87 |
else { |
|
88 | 5x |
if (is(x, "code.set")) { |
89 | 2x |
if (i %in% colnames(x)) { |
90 | 1x |
return(x[, which(colnames(x) == i)]) |
91 |
} |
|
92 |
} |
|
93 | 3x |
else if (is(x, "contingency.table")) { |
94 | 3x |
if (i %in% colnames(x)) { |
95 | 1x |
return(sum(x[, which(colnames(x) == i)])) |
96 |
} |
|
97 | 2x |
else if (i %in% rownames(x)) { |
98 | 1x |
return(sum(x[which(rownames(x) == i), ])) |
99 |
} |
|
100 |
} |
|
101 |
} |
|
102 |
|
|
103 | 2x |
return(NULL) |
104 |
} |
|
105 | ||
106 |
#' @export |
|
107 |
.DollarNames.rating.set <- function(x, pattern) { |
|
108 | 2x |
vals <- .codegen_dollar(x, pattern) |
109 |
|
|
110 | 2x |
cols <- colnames(x) |
111 | 2x |
if (!is.null(cols)) { |
112 | 2x |
vals <- c(cols, vals) |
113 |
} |
|
114 | ||
115 | 2x |
rows <- rownames(x) |
116 | 2x |
if (!is.null(rows)) { |
117 | 1x |
vals <- c(rows, vals) |
118 |
} |
|
119 |
|
|
120 | 2x |
vals |
121 |
} |
|
122 | ||
123 |
.codegen_dollar <- function(x, pattern = "") { |
|
124 | 3x |
attrs <- names(attributes(x)) |
125 | 3x |
vals <- attrs[attrs %in% c("baserate", "kappa", "agreement")] |
126 | 3x |
return(vals) |
127 |
} |
|
128 | ||
129 |
#' @export |
|
130 |
print.rating.set <- function(x, ...) { |
|
131 | 1x |
y <- unclass(x) |
132 | ||
133 | 1x |
for(a in additional_attributes) { |
134 | 3x |
attr(y, a) <- NULL |
135 |
} |
|
136 |
|
|
137 | 1x |
print(y, ...) |
138 |
} |
1 |
### |
|
2 |
#' @title Calculate Baserate (Set) |
|
3 |
#' |
|
4 |
#' @description |
|
5 |
#' This function will calculate the baserate of the first rater, second rater, and the average of both the raters. Called by \code{\link{baserate}}. |
|
6 |
#' |
|
7 |
#' @param set The \code{\link{codeSet}} for which the baserate is calculated |
|
8 |
#' |
|
9 |
#' @seealso \code{\link{baserate}} and \code{\link{baserateCT}} |
|
10 |
#' |
|
11 |
#' @export |
|
12 |
#' @return A list of the format:\describe{ |
|
13 |
#' \item{firstBaserate}{The percentage that a positive code, or a 1, appears in the first rater} |
|
14 |
#' \item{secondBaserate}{The percentage that a positive code, or a 1, appears in the second rater} |
|
15 |
#' \item{averageBaserate}{The average percentage that a positive code, or a 1, appears in either of the two raters} |
|
16 |
#' } |
|
17 |
#' |
|
18 |
### |
|
19 |
baserateSet = function(set){ |
|
20 | 11x |
firstBaserate = length(which(set[,1] == 1))/nrow(set) |
21 | 11x |
secondBaserate = length(which(set[,2] == 1))/nrow(set) |
22 | 11x |
averageBaserate = mean(c(firstBaserate,secondBaserate)) |
23 | 11x |
return(list(firstBaserate = firstBaserate, secondBaserate = secondBaserate, averageBaserate = averageBaserate)) |
24 |
} |
1 |
rating_set_recall <- function (x) { |
|
2 | 8x |
if (!is(x, "contingency.table")) { |
3 | 8x |
x <- as.contingency.table(x) |
4 |
} |
|
5 |
|
|
6 | 8x |
x$agreement$true_positive / (x$agreement$true_positive + x$agreement$false_negative) |
7 |
} |
|
8 | ||
9 |
rating_set_precision <- function (x) { |
|
10 | 8x |
if (!is(x, "contingency.table")) { |
11 | 8x |
x <- as.contingency.table(x) |
12 |
} |
|
13 |
|
|
14 | 8x |
x$agreement$true_positive / (x$agreement$true_positive + x$agreement$false_positive) |
15 |
} |
1 |
genPKcombo = function(kappaDistribution, kappaProbability, precisionDistribution, precisionProbability, baserate){ |
|
2 |
# currKappa = sample(kappaDistribution, 1, replace = FALSE, prob = kappaProbability) |
|
3 |
# currPrecision = sample(precisionDistribution, 1, replace = FALSE, prob = precisionProbability) |
|
4 | 40473x |
currKappa = kappaDistribution[sample.int(length(kappaDistribution), 1, replace = FALSE, prob = kappaProbability)] |
5 | 40473x |
currPrecision = precisionDistribution[sample.int(length(precisionDistribution), 1, replace = FALSE, prob = precisionProbability)] |
6 | ||
7 | 40473x |
if(!checkBRPKcombo(baserate, currPrecision, currKappa)){ |
8 | 6044x |
precisionMin = (2*baserate*currKappa - 2*baserate - currKappa)/(currKappa - 2) |
9 | 6044x |
indicies = which(precisionDistribution > precisionMin) |
10 |
|
|
11 | 6044x |
if (length(indicies) == 0) { |
12 | 1607x |
return(genPKcombo(kappaDistribution, kappaProbability, precisionDistribution, precisionProbability, baserate)) |
13 |
} |
|
14 |
|
|
15 | 4437x |
precisionDistribution = precisionDistribution[indicies] |
16 | 4437x |
precisionProbability = precisionProbability[indicies] |
17 |
# currPrecision = sample(precisionDistribution, 1, replace = FALSE, prob = precisionProbability) |
|
18 | 4437x |
currPrecision = precisionDistribution[sample.int(length(precisionDistribution), 1, replace = FALSE, prob = precisionProbability)] |
19 | 4437x |
return(list(precision = currPrecision, kappa = currKappa)) |
20 |
} else { |
|
21 | 34428x |
return(list(precision = currPrecision, kappa = currKappa)) |
22 |
} |
|
23 |
} |
1 |
### |
|
2 |
#' @title Calculate kappa (contingency Table) |
|
3 |
#' |
|
4 |
#' @description |
|
5 |
#' This function calculates Cohen's kappa on a \code{\link{contingencyTable}}. Called by \code{\link{kappa}}. |
|
6 |
#' |
|
7 |
#' @param ct A \code{\link{contingencyTable}} |
|
8 |
#' |
|
9 |
#' @seealso \code{\link{kappa}} and \code{\link{kappaSet}} |
|
10 |
#' |
|
11 |
#' @export |
|
12 |
#' @return The kappa of the \code{\link{contingencyTable}} |
|
13 |
### |
|
14 |
kappaCT = function(ct){ |
|
15 | 1x |
if(any(ct < 0)){stop("Values in Contingency Table must be positive")} |
16 | 1x |
if(ncol(ct) != 2 || nrow(ct) != 2) stop("Incorrect number of dimensions: Contingency table must be 2x2.") |
17 | 26x |
for(i in 1:nrow(ct)) { |
18 | 51x |
for(j in 1:ncol(ct)) { |
19 | 1x |
if (ct[i,j]%%1 != 0) stop("Contingency table values must be positive integers.") |
20 |
} |
|
21 |
} |
|
22 |
|
|
23 | 25x |
return(calcKappa(ct,isSet = FALSE,kappaThreshold = NULL)); |
24 |
} |
1 |
### |
|
2 |
#' @title Get Handset |
|
3 |
#' @description This function is to get a handset of a set and calculate the kappa |
|
4 |
#' @param set This is the set to take a handset of |
|
5 |
#' @param handSetLength This is the length of the handset to take |
|
6 |
#' @param handSetBaserate This is the minimum baserate to inflate the handset to |
|
7 |
#' @param returnSet If TRUE, then return the handSet if FALSE, return the kappa of the handSet |
|
8 |
#' |
|
9 |
#' @return The function returns the handSet if returnSet is TRUE or the kappa of the handSet if not |
|
10 |
#' @export |
|
11 |
### |
|
12 |
getHandSet = function(set, handSetLength, handSetBaserate, returnSet=FALSE) { |
|
13 |
#positives is the minimum number of positive pairs in the first rater |
|
14 | 36306x |
positives = ceiling(handSetLength * handSetBaserate); |
15 | 36306x |
posInd = which(set[,1] == 1); |
16 | ||
17 | 1x |
if(positives > length(posInd)){stop("Not enough positives in first rater to inflate to this level")} |
18 |
#if there is an inflated baserate positives will be > 0, so if handSetBaserate > 0, positives > 0 |
|
19 | 36305x |
if (positives > 0) { |
20 |
#goes through and picks a set that fits the number of positives first then randomly samples from the rest |
|
21 |
#if the set created is not a valid set, then it reruns the process until a valid set is found |
|
22 | 15503x |
positiveIndices = posInd[sample.int(length(posInd),size=positives,replace=FALSE)]; |
23 | 15503x |
others = set[!(1:nrow(set) %in% positiveIndices),]; |
24 | 15503x |
otherIndices = sample.int(nrow(others),size=(handSetLength - positives),replace=FALSE); |
25 | 15503x |
this.set = rbind(set[positiveIndices,], others[otherIndices,]); |
26 | 20802x |
} else if (positives == 0){ |
27 |
#if there is no restriction on positives, then the set is generated by randomly sampling from the entire set |
|
28 |
#this.set=matrix(0,2,2); |
|
29 |
#while ((sum(this.set[,2]) == 0 | sum(this.set[,2]) == nrow(this.set))) { |
|
30 | 20802x |
theseIndices = sample.int(nrow(set),size=handSetLength,replace=FALSE); |
31 | 20802x |
this.set = set[theseIndices,]; |
32 |
#} |
|
33 |
} |
|
34 | ||
35 |
#if return set is true return set rather than kappa |
|
36 | 36305x |
if (returnSet) { |
37 | 4x |
this.set <- as.code.set(this.set); |
38 | 4x |
return(this.set); |
39 |
} |
|
40 | ||
41 |
#otherwise return the kappa of the set |
|
42 | 36301x |
handSetKappa = calcKappa(this.set); |
43 |
|
|
44 | 36301x |
return(handSetKappa); |
45 |
} |
|
46 | ||
47 |
### |
|
48 |
#' @title Get Handset |
|
49 |
#' @description This function is to get a handset of a set and calculate the kappa |
|
50 |
#' |
|
51 |
#' @param full.ct This is the set to take a handset of |
|
52 |
#' @param handSetLength This is the length of the handset to take |
|
53 |
#' @param handSetBaserate This is the minimum baserate to inflate the handset to |
|
54 |
#' @param as_kappa If FALSE then return the handSet, if TRUE (default) return the kappa of the handSet |
|
55 |
#' |
|
56 |
#' @return The function returns the handSet if returnSet is TRUE or the kappa of the handSet if not |
|
57 |
#' @export |
|
58 |
### |
|
59 |
getHandCT <- function(full.ct, handSetLength, handSetBaserate, as_kappa = TRUE) { |
|
60 | 3x |
positives <- ceiling(handSetLength * handSetBaserate) |
61 | 3x |
if (positives > sum(full.ct[1,])) { |
62 | 1x |
stop("Not enough positives in first rater to inflate to this level") |
63 |
} |
|
64 |
|
|
65 |
# positiveIndices <- sample_ct(full.ct[1,], positives) |
|
66 |
# |
|
67 |
# other_ct <- full.ct |
|
68 |
# other_ct[1, ] <- other_ct[1, ] - positiveIndices |
|
69 |
# |
|
70 |
# other_ct_vec <- as.vector(other_ct) |
|
71 |
# this_ct <- sample_ct(other_ct_vec, handSetLength - positives) |
|
72 |
# this_ct[1, ] <- this_ct[1, ] + positiveIndices |
|
73 |
|
|
74 | 2x |
this_ct <- getHand_ct(ct = full.ct, handSetLength = handSetLength, handSetBaserate = handSetBaserate) |
75 | ||
76 | 2x |
if (!as_kappa) { |
77 | 1x |
this_ct <- as.contingency.table(this_ct); |
78 | 1x |
return(this_ct) |
79 |
} |
|
80 | ||
81 | 1x |
return(kappa_ct(this_ct)) |
82 |
} |
|
83 | ||
84 |
# sample_ct <- function(x, size) { |
|
85 |
# x_vec <- as.vector(x) |
|
86 |
# x_sampled <- matrix(rep(0, length(x)), ncol = length(x_vec) / 2) |
|
87 |
# for(i in seq(size)) { |
|
88 |
# s <- sample(seq(x_vec), 1, replace = F, prob = x_vec / sum(x_vec)) |
|
89 |
# x_sampled[[s]] = x_sampled[[s]] + 1 |
|
90 |
# x_vec[s] = x_vec[s] - 1 |
|
91 |
# } |
|
92 |
# |
|
93 |
# return(x_sampled) |
|
94 |
# } |
1 |
### |
|
2 |
#' @title Create Simulated codeSet |
|
3 |
#' |
|
4 |
#' @description |
|
5 |
#' Creates a simulated \code{\link{codeSet}} with the given parameters |
|
6 |
#' |
|
7 |
#' @param length the length of the simulated \code{\link{codeSet}} to be created |
|
8 |
#' @param baserate the \code{\link{baserate}} of the simulated \code{\link{codeSet}} |
|
9 |
#' @param kappaMin the minimum kappa of the simulated \code{\link{codeSet}} |
|
10 |
#' @param kappaMax the maximum kappa of the simulated \code{\link{codeSet}} |
|
11 |
#' @param precisionMin the minimum precision of the simulated \code{\link{codeSet}} |
|
12 |
#' @param precisionMax the maximum precision of the simulated \code{\link{codeSet}} |
|
13 |
#' @param ... Parameters passed to createRandomSet (e.g. type = "set" or type = "ct") |
|
14 |
#' @param tries the maximum number of tries to generate a valid set, smaller set lengths may require an increased number of tries |
|
15 |
#' |
|
16 |
#' @details |
|
17 |
#' \code{\link{codeSet}}s are generated by first picking a random kappa within its range and a random precision within its range. If the random kappa, random precision, and baserate are not mathematically possible, then the precision is resampled from a range of mathematically possible values within its range. A unique simulated \code{\link{codeSet}} is then constructed given these parameters. |
|
18 |
#' |
|
19 |
#' @export |
|
20 |
#' @return A \code{\link{codeSet}} that fulfills the given parameters |
|
21 |
### |
|
22 |
createSimulatedCodeSet = function(length, baserate, kappaMin, kappaMax, precisionMin, precisionMax, ..., tries = 50){ |
|
23 | 1x |
if(length <= 0){stop("length must be positive")} |
24 | 1x |
if(baserate < 0){stop("baserate must be positive")} |
25 | 2x |
if(kappaMin < 0 | kappaMin > 1) stop("kappaMin must be between 0 and 1.") |
26 | 2x |
if(kappaMax < 0 | kappaMax > 1) stop("kappaMax must be between 0 and 1.") |
27 | 1x |
if(kappaMin > kappaMax){stop("kappaMin must be less than kappaMax")} |
28 | 2x |
if(precisionMin < 0 | precisionMin > 1) stop("precisionMin must be between 0 and 1.") |
29 | 2x |
if(precisionMax < 0 | precisionMax > 1) stop("precisionMax must be between 0 and 1.") |
30 | 1x |
if(precisionMin > precisionMax){stop("precisionMin must be less than precisionMax")} |
31 | 1x |
if(tries < 1){stop("tries must be greater than 1")} |
32 |
|
|
33 | 2x |
setFound = NULL |
34 | 2x |
validSet = F |
35 | 2x |
triesLeft = tries |
36 | 2x |
Ks = c() |
37 | 2x |
while(!validSet && triesLeft > 0) { |
38 | 51x |
set = createRandomSet(setLength = length, baserate = baserate, kappaMin = kappaMin, kappaMax = kappaMax, minPrecision = precisionMin, maxPrecision = precisionMax) |
39 | 51x |
k = kappa(set) |
40 |
|
|
41 | 51x |
if ((k > kappaMin && k < kappaMax) || any_equal(k, c(kappaMin, kappaMax))) { |
42 | 1x |
setFound = set |
43 | 1x |
validSet = T |
44 |
} |
|
45 |
|
|
46 | 51x |
Ks = c(Ks, k) |
47 | 51x |
triesLeft = triesLeft - 1 |
48 |
} |
|
49 |
|
|
50 | 2x |
if (is.null(setFound)) { |
51 | 1x |
Ks_under_min <- Ks[which(Ks < kappaMin)] |
52 | 1x |
Kdist.to.min <- kappaMin - Ks_under_min |
53 | 1x |
Kdist.to.min.low <- ifelse(length(Ks_under_min) > 0, min(Kdist.to.min), NA) |
54 |
|
|
55 | 1x |
Ks_over_min <- Ks[which(Ks > kappaMax)] |
56 | 1x |
Kdist.to.max <- Ks_over_min - kappaMax |
57 | 1x |
Kdist.to.max.low <- ifelse(length(Ks_over_min) > 0, min(Kdist.to.max), NA) |
58 |
|
|
59 | 1x |
Kdist.all_mins <- c(Kdist.to.min, Kdist.to.max) |
60 |
|
|
61 | 1x |
both_mins <- c(Kdist.to.min.low, Kdist.to.max.low) |
62 | 1x |
both_mins <- both_mins[!is.na(both_mins)] |
63 | 1x |
closest <- c(Ks_under_min, Ks_over_min) [which(Kdist.all_mins == min(both_mins))][1] |
64 |
|
|
65 | 1x |
stop(paste0( |
66 | 1x |
"Unable to create a set of the provided length (", length, ") ", |
67 | 1x |
"in the provided kappa range (", kappaMin, "\U2014", kappaMax, "). ", |
68 | 1x |
"The closest kappa obtained after ", tries, " tries was: ", |
69 | 1x |
round(x = closest, digits = 5), "\n", |
70 | 1x |
"\tTry increasing the set length or the range of valid kappas" |
71 |
)) |
|
72 |
} |
|
73 |
|
|
74 | 1x |
return(setFound) |
75 |
|
|
76 |
# return(createRandomSet(setLength = length, baserate = baserate, kappaMin = kappaMin, kappaMax = kappaMax, minPrecision = precisionMin, maxPrecision = precisionMax, ...)) |
|
77 |
} |
1 |
### |
|
2 |
#' @title Rho using a file |
|
3 |
#' |
|
4 |
#' @description |
|
5 |
#' This function calculates rho and kappa for a given \code{\link[=getTestSet]{testSet}} as defined by the file |
|
6 |
#' and columns (col1, col2), and returns a list containing both values. Called by \code{\link{rho}}. |
|
7 |
#' |
|
8 |
#' @export |
|
9 |
#' |
|
10 |
#' @param col1 The first column from file |
|
11 |
#' @param col2 The second column from file |
|
12 |
#' @template rho-params |
|
13 |
#' |
|
14 |
#' @return A list of the format:\describe{ |
|
15 |
#' \item{rho}{The rho of the \code{\link{codeSet}}} |
|
16 |
#' \item{kappa}{The Cohen's Kappa of the \code{\link{codeSet}}} |
|
17 |
#' } |
|
18 |
### |
|
19 |
rho.file <- function( |
|
20 |
x, col1, col2, |
|
21 |
OcSBaserate = NULL, |
|
22 |
testSetBaserateInflation = 0, |
|
23 |
OcSLength = 10000, |
|
24 |
replicates = 800, |
|
25 |
ScSKappaThreshold = 0.9, |
|
26 |
ScSKappaMin = .40, |
|
27 |
ScSPrecisionMin = 0.6, |
|
28 |
ScSPrecisionMax = 1 |
|
29 |
) { |
|
30 | 3x |
setFile = utils::read.csv(x) |
31 | 3x |
set = as.matrix(setFile[,c(col1, col2)]) |
32 |
|
|
33 | 3x |
if (!any(is.na(set))) { |
34 | 2x |
rhoSet( |
35 | 2x |
set, |
36 | 2x |
OcSBaserate, |
37 | 2x |
testSetBaserateInflation, |
38 | 2x |
OcSLength, replicates, |
39 | 2x |
ScSKappaThreshold, ScSKappaMin, |
40 | 2x |
ScSPrecisionMin, ScSPrecisionMax |
41 |
) |
|
42 |
} |
|
43 |
else { |
|
44 | 1x |
stop("The columns provided for col1 and col2 must not contain NA") |
45 |
} |
|
46 |
} |
1 |
generateKPs = function( |
|
2 |
numNeeded, baserate, |
|
3 |
kappaMin, kappaMax, |
|
4 |
precisionMin, precisionMax, |
|
5 |
distributionType = 'FLAT', |
|
6 |
distributionLength = 100000 |
|
7 |
) { |
|
8 | 93x |
kappaDistribution = seq(kappaMin, kappaMax, length = distributionLength) |
9 | ||
10 | 93x |
if (distributionType == 'FLAT') { |
11 |
#probability for a flat distribution |
|
12 | 92x |
kappaProbability = NULL |
13 |
} |
|
14 | 1x |
else if (distributionType == 'BELL') { |
15 |
#probability for a bell curve |
|
16 | 1x |
kappaProbability = stats::dnorm(kappaDistribution, mean = 0.9, sd = 0.1) |
17 |
} |
|
18 | ||
19 | 93x |
precisionDistribution = seq(precisionMin, precisionMax, length = 10000) |
20 | 93x |
precisionProbability = NULL |
21 | ||
22 | 93x |
KPs = tryCatch({ |
23 | 93x |
KPs = list() |
24 | 93x |
for (i in 1:numNeeded) { |
25 | 38865x |
KPs[[length(KPs) + 1]] = genPKcombo(kappaDistribution, kappaProbability, precisionDistribution, precisionProbability, baserate) |
26 |
} |
|
27 | 92x |
KPs |
28 | 93x |
},error=function(e){ |
29 | 1x |
NULL |
30 |
}) |
|
31 | 1x |
if(is.null(KPs)){stop("Could not generate a valid set given ranges of kappa and precision")} |
32 | ||
33 | 92x |
return (KPs); |
34 |
} |
1 |
### |
|
2 |
#' @title Rho (contingency Table) |
|
3 |
#' |
|
4 |
#' @description |
|
5 |
#' This function calculates rho and kappa for a given \code{\link{contingencyTable}}, and returns a list containing both values. Called by \code{\link{rho}}. |
|
6 |
#' |
|
7 |
#' |
|
8 |
#' @template rho-params |
|
9 |
#' |
|
10 |
#' @export |
|
11 |
#' @return A list of the format:\describe{ |
|
12 |
#' \item{rho}{The rho of the \code{\link{contingencyTable}}} |
|
13 |
#' \item{kappa}{The Cohen's Kappa of the \code{\link{contingencyTable}}} |
|
14 |
#' } |
|
15 |
### |
|
16 |
rhoCT = function( |
|
17 |
x, |
|
18 |
OcSBaserate = NULL, |
|
19 |
testSetBaserateInflation = 0, |
|
20 |
OcSLength = 10000, |
|
21 |
replicates = 800, |
|
22 |
ScSKappaThreshold = 0.9, |
|
23 |
ScSKappaMin = .40, |
|
24 |
ScSPrecisionMin = 0.6, |
|
25 |
ScSPrecisionMax = 1 |
|
26 |
){ |
|
27 | 4x |
if (any(x < 0)) { |
28 | 1x |
stop("Values in Contingency Table must be positive") |
29 |
} |
|
30 | 3x |
kappa = kappa_ct(x); |
31 |
|
|
32 | 3x |
if (is.null(OcSBaserate)) { |
33 | 3x |
OcSBaserate = (sum(x[1,])) / (sum(x)) |
34 |
} |
|
35 |
|
|
36 | 3x |
rho_result <- rhoK( |
37 | 3x |
kappa, |
38 | 3x |
OcSBaserate, |
39 | 3x |
sum(x), |
40 | 3x |
testSetBaserateInflation, |
41 | 3x |
OcSLength, |
42 | 3x |
replicates, |
43 | 3x |
ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax |
44 |
) |
|
45 |
|
|
46 | 3x |
return(list( |
47 | 3x |
rho = rho_result, |
48 | 3x |
kappa = kappa, |
49 | 3x |
recall = rating_set_recall(x), |
50 | 3x |
precision = rating_set_precision(x) |
51 |
)) |
|
52 |
} |
1 |
### |
|
2 |
#' @title Rho (kappa) |
|
3 |
#' |
|
4 |
#' @description |
|
5 |
#' This function calculates rho for an observed kappa value with associated set parameters |
|
6 |
#' (testSetLength and OcSBaserate). Called by \code{\link{rho}}. A p-value is returned and if |
|
7 |
#' this value is less than 0.05, it is said that the handset does generalize to the entire set |
|
8 |
#' |
|
9 |
#' @template rho-params |
|
10 |
#' @param testSetLength The length of the \code{\link[=getTestSet]{testSet}} (ignored unless \emph{data} is an observed kappa value) |
|
11 |
#' @param method set to "c" to calculate using the C++ implmentation. Defaults to "standard" |
|
12 |
#' |
|
13 |
#' @export |
|
14 |
#' @return rho for the given parameters |
|
15 |
### |
|
16 |
rhoK = function( |
|
17 |
x, |
|
18 |
OcSBaserate, |
|
19 |
testSetLength, |
|
20 |
testSetBaserateInflation = 0, |
|
21 |
OcSLength = 10000, |
|
22 |
replicates = 800, |
|
23 |
ScSKappaThreshold = 0.9, |
|
24 |
ScSKappaMin = 0.40, |
|
25 |
ScSPrecisionMin = 0.6, |
|
26 |
ScSPrecisionMax = 1, |
|
27 |
method = "standard" |
|
28 |
) { |
|
29 |
if ( |
|
30 | 50x |
(x > 1 | x < 0) || |
31 | 50x |
(OcSBaserate > 1 | OcSBaserate < 0) |
32 | 3x |
) stop("x and OcSBaserate must be between 0 and 1.") |
33 | ||
34 | 47x |
if (OcSBaserate < 0 || ScSKappaThreshold < 0 || ScSKappaMin < 0) |
35 | 2x |
stop("OcSBaserate, ScSKappaThreshold, and ScSKappaMin must be positive.") |
36 | ||
37 |
if ( |
|
38 | 45x |
testSetLength %% 1 != 0 || |
39 | 45x |
OcSLength %% 1 != 0 || |
40 | 45x |
replicates %% 1 != 0 |
41 | 3x |
) stop("testSetLength, OcSLength, and replicates values must be a positive integer.") |
42 | ||
43 | 42x |
if (ScSKappaThreshold < ScSKappaMin) |
44 | 1x |
stop("ScSKappaThreshold must be greater than ScSKappaMin.") |
45 | ||
46 |
if ( |
|
47 | 41x |
(ScSPrecisionMin < 0 | ScSPrecisionMin > 1) || |
48 | 41x |
(ScSPrecisionMax < 0 | ScSPrecisionMax > 1) |
49 | 4x |
) stop("ScSPrecisionMin and ScSPrecisionMax must be between 0 and 1.") |
50 | ||
51 | 37x |
if (ScSPrecisionMax < ScSPrecisionMin) |
52 | 1x |
stop("ScSPrecisionMax must be greater than ScSPrecisionMin.") |
53 | ||
54 | 36x |
result <- NULL |
55 | 36x |
if (method == "standard") { |
56 | 26x |
result <- calcRho( x, OcSBaserate, testSetLength, testSetBaserateInflation, OcSLength, replicates, ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax) |
57 |
} |
|
58 |
else { |
|
59 | 10x |
result <- calcRho_c(x, OcSBaserate, testSetLength, testSetBaserateInflation, OcSLength, replicates, ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax) |
60 |
} |
|
61 | 36x |
return(result) |
62 |
} |
1 |
#' Generate a Handset |
|
2 |
#' |
|
3 |
#' @description Generate a vector representing indices of set, using the handSetBaserate |
|
4 |
#' to determine the minimum number of indices that are positive |
|
5 |
#' |
|
6 |
#' @param set matrix of two columns |
|
7 |
#' @param handSetLength number of indices to find |
|
8 |
#' @param handSetBaserate number between 0 and 1 to use as a minimum number of positive indices |
|
9 |
#' |
|
10 |
#' @return vector of indices from set |
|
11 |
#' @export |
|
12 |
getHandSetIndices = function(set, handSetLength = 20, handSetBaserate = 0.2) { |
|
13 | 4x |
positives = ceiling(handSetLength * handSetBaserate); |
14 | 4x |
posInd = which(set[, 1] == 1); |
15 |
|
|
16 | 4x |
if (positives > length(posInd)) { |
17 | 1x |
stop("Not enough positives in first rater to inflate to this level") |
18 |
} |
|
19 |
|
|
20 | 3x |
this.set = NULL |
21 | 3x |
if (positives > 0) { |
22 | 2x |
positiveIndices = posInd[sample.int(length(posInd), size=positives, replace=FALSE)] |
23 | 2x |
others = which(!(1:nrow(set) %in% positiveIndices)) |
24 | 2x |
otherIndices = sample.int(length(others), size = (handSetLength - positives), replace = FALSE) |
25 | 2x |
this.set = c(positiveIndices, others[otherIndices]) |
26 |
} |
|
27 | 1x |
else if (positives == 0) { |
28 | 1x |
theseIndices = sample.int(length(set),size=handSetLength,replace=FALSE) |
29 | 1x |
this.set = theseIndices |
30 |
} |
|
31 |
|
|
32 | 3x |
return(sample(this.set)) |
33 |
} |
1 |
### |
|
2 |
# This function calculates rho given the parameters of the initial set and the kappa of the observed handset and is |
|
3 |
# called by rhoK() |
|
4 |
### |
|
5 |
calcRho = function( |
|
6 |
x, |
|
7 |
OcSBaserate, |
|
8 |
testSetLength, |
|
9 |
testSetBaserateInflation = 0, |
|
10 |
OcSLength = 10000, |
|
11 |
replicates = 800, |
|
12 |
ScSKappaThreshold = 0.9, |
|
13 |
ScSKappaMin = 0.40, |
|
14 |
ScSPrecisionMin = 0.6, |
|
15 |
ScSPrecisionMax = 1, |
|
16 |
KPs = NULL |
|
17 |
) { |
|
18 |
|
|
19 | 136x |
if(is.null(KPs)) { |
20 | 36x |
KPs = generateKPs(replicates, OcSBaserate, ScSKappaMin, ScSKappaThreshold, ScSPrecisionMin, ScSPrecisionMax) |
21 |
} |
|
22 | 136x |
if(length(KPs) < replicates) { |
23 | 50x |
replicates = length(KPs) |
24 |
} |
|
25 |
|
|
26 |
#creates distribution from random kappa sets |
|
27 | 136x |
savedKappas = rep(0,replicates); |
28 |
|
|
29 |
# cat("Runs: ", runs, "\n") |
|
30 |
# cat("KPs length:, ", length(KPs), "\n") |
|
31 | 136x |
for (i in 1:replicates) { |
32 | 36300x |
KP = KPs[i] |
33 | 36300x |
kappa = KP[[1]]$kappa |
34 | 36300x |
precision = KP[[1]]$precision |
35 | 36300x |
recall = getR(kappa, OcSBaserate, precision) |
36 | 36300x |
fullSet = prset(precision = precision, recall = recall, length = OcSLength, baserate = OcSBaserate); |
37 |
|
|
38 |
#computes kappa of handset to add to distribution |
|
39 | 36300x |
savedKappas[i] = getHandSet(set = fullSet, handSetLength = testSetLength, handSetBaserate = testSetBaserateInflation); |
40 |
} |
|
41 | ||
42 |
# compare observedKappa to distribution |
|
43 | 136x |
return(getBootPvalue(savedKappas, x)); |
44 |
} |
1 |
contingencyToSet = function(TP, FP, FN, TN){ |
|
2 | 31x |
setLength = TP + FP + FN + TN; |
3 | ||
4 | 31x |
gold1s = TP + FN; |
5 | 31x |
gold0s = setLength - gold1s; |
6 | ||
7 | 31x |
gold = c(rep(1, gold1s), rep(0, gold0s)); |
8 | 31x |
silver = c(rep(1, TP),rep(0, gold1s - TP), rep(1, FP), rep(0, gold0s - FP)); |
9 | ||
10 | 31x |
return(cbind(gold, silver)); |
11 |
}; |
1 |
### |
|
2 |
#' @title Calculate kappa |
|
3 |
#' |
|
4 |
#' @description |
|
5 |
#' This function calculates Cohen's kappa on a \code{\link{contingencyTable}} or a \code{\link{codeSet}} |
|
6 |
#' |
|
7 |
#' @param data A \code{\link{contingencyTable}} or a \code{\link{codeSet}} |
|
8 |
#' |
|
9 |
#' |
|
10 |
#' @seealso \code{\link{kappaSet}} and \code{\link{kappaCT}} |
|
11 |
#' |
|
12 |
#' @examples |
|
13 |
#' #Given a code set |
|
14 |
#' kappa(data = codeSet) |
|
15 |
#' |
|
16 |
#' #Given a contingency Table |
|
17 |
#' kappa(data = contingencyTable) |
|
18 |
#' |
|
19 |
#' @export |
|
20 |
#' @return The kappa of the \code{\link{contingencyTable}} or \code{\link{codeSet}} |
|
21 |
### |
|
22 |
kappa <- function(data) { |
|
23 | 61x |
if (all(dim(data) == 2)) { |
24 | 5x |
return(kappaCT(data)) |
25 |
} else { |
|
26 | 56x |
return(kappaSet(data)) |
27 |
} |
|
28 |
} |
1 |
### |
|
2 |
# Get Recall |
|
3 |
# |
|
4 |
# This gets the recall from a pair |
|
5 |
# @param kappa This is the kappa of the pair |
|
6 |
# @param BR This is the baserate of the pair |
|
7 |
# @param P this is the precision of the pair |
|
8 |
# @return returns the recall given the parameters |
|
9 |
### |
|
10 |
getR = function(kappa, BR, P){ |
|
11 |
#gets recall that corresponds with the kappa, baserate, and precision |
|
12 | 36355x |
top = kappa*P; |
13 | 36355x |
R = top / (2*P-2*BR-kappa+2*BR*kappa); |
14 | 36355x |
return (R); |
15 |
} |
1 |
### |
|
2 |
#' @title Rho Min |
|
3 |
#' |
|
4 |
#' @description |
|
5 |
#' This function calculates the minimum testSetLength where it is possible to get a rho less than alpha for the given parameters of rho. |
|
6 |
#' |
|
7 |
#' @param baserate A \code{\link{baserate}} |
|
8 |
#' @param alpha The threshold of significance for rho (similar to an alpha level for a p value), defaulted to 0.05 |
|
9 |
#' @param inc An integer indicating by how much the testSetLength should increase each iteration |
|
10 |
#' @param printInc A boolean indicating whether to print out each increment value with it's corresponding significance for rho |
|
11 |
#' @param ... Any additional parameters passed into \code{\link{rho}} |
|
12 |
#' |
|
13 |
#' @examples |
|
14 |
#' #Add testSetBaserateInflation as an additional parameter |
|
15 |
#' rhoMin(0.2, testSetBaserateInflation = 0.33) |
|
16 |
#' |
|
17 |
#' #Add testSetBaserateInflation as well as changing inc and selecting printInc |
|
18 |
#' rhoMin(0.2, inc = 5, printInc = TRUE, testSetBaserateInflation = 0.33) |
|
19 |
#' |
|
20 |
#' @export |
|
21 |
#' @return The minimum length of testSet, to the nearest multiple of inc, greater than the minimum length, that would give a value where rho less than alpha becomes mathematically possible. |
|
22 |
### |
|
23 |
rhoMin = function(baserate, alpha = 0.05, inc = 10, printInc = FALSE, ...){ |
|
24 | 4x |
rhoVal = 1 |
25 | 4x |
testSetLength = 0 |
26 | 1x |
if(inc%%1 != 0 | inc <= 0) stop("Inc value must be a positive integer.") |
27 | 1x |
if(alpha > 1 | alpha < 0) stop("Alpha must be between 0 and 1.") |
28 |
|
|
29 |
|
|
30 | 2x |
while(rhoVal > alpha){ |
31 | 8x |
testSetLength = testSetLength + inc |
32 | 8x |
rhoVal = rho(1, baserate, testSetLength, ...) |
33 | 8x |
if(printInc) { |
34 | 4x |
cat(testSetLength, rhoVal,"\n") |
35 |
} |
|
36 |
} |
|
37 | 2x |
return(testSetLength) |
38 |
|
|
39 |
} |
1 |
### |
|
2 |
# Check Baserate, Precision, Kappa Combo |
|
3 |
# |
|
4 |
# This function checks to make sure the combination of BR, P, and K will create a valid set |
|
5 |
# @param BR This is the supplied baserate |
|
6 |
# @param P This is the supplied precision |
|
7 |
# @param K This is the supplied kappa |
|
8 |
# @return returns TRUE if the combination is valid, FALSE otherwise |
|
9 |
### |
|
10 |
checkBRPKcombo = function(BR, P, K) { |
|
11 |
#if right is less than P, then it is a valid combination. If not, then the pair of kappa, precision, and baserate does not correspond to a valid set |
|
12 | 40475x |
right = (2*BR*K - 2*BR - K)/(K - 2); |
13 | 40474x |
if(P > right){ |
14 | 34429x |
return(TRUE); |
15 |
}else{ |
|
16 | 6045x |
return(FALSE); |
17 |
} |
|
18 |
} |
1 |
### |
|
2 |
# Calculate Kappa |
|
3 |
# |
|
4 |
# This function calculates kappa given a set |
|
5 |
# @param set This is the set to calculate kappa from |
|
6 |
# @param isSet TRUE if set FALSE if contigency table |
|
7 |
# @param kappaThreshold if null then return kappa otherwise return list of kappa and above or not |
|
8 |
# @return This returns the kappa of the set given |
|
9 |
### |
|
10 |
calcKappa <- function(set, isSet = TRUE, kappaThreshold = NULL) { |
|
11 | 36401x |
if (!isSet) { |
12 | 31x |
set <- contingencyToSet(set[1, 1], set[2, 1], set[1, 2], set[2, 2]) |
13 |
} |
|
14 | ||
15 |
if ( |
|
16 | 36401x |
length(which(set[, 1] == set[, 2] & set[, 1] == 1)) == nrow(set) | |
17 | 36401x |
length(which(set[, 1] == set[, 2] & set[, 1] == 0)) == nrow(set) |
18 |
) { |
|
19 | 917x |
return(1) |
20 |
} |
|
21 | ||
22 |
#calculates kappa by calculating the agreement and the baserates and then creating the adjacenty matrix |
|
23 | 35484x |
agreement <- length(which(set[, 1] == set[, 2])) / nrow(set); |
24 | 35484x |
baserate2 <- length(which(1 == set[, 2])) / nrow(set); |
25 | 35484x |
baserate1 <- length(which(1 == set[, 1])) / nrow(set); |
26 | 35484x |
randomAgreement <- baserate1 * baserate2 + (1 - baserate1) * (1 - baserate2); |
27 | 35484x |
kappa <- (agreement - randomAgreement) / (1 - randomAgreement); |
28 | ||
29 | 35484x |
if (is.null(kappaThreshold)) { |
30 | 35482x |
return(kappa); |
31 |
} else { |
|
32 | 2x |
return(list(kappa = kappa, above = kappa > kappaThreshold)) |
33 |
} |
|
34 |
} |
1 |
### |
|
2 |
#' @title Calculate Baserate |
|
3 |
#' |
|
4 |
#' @description |
|
5 |
#' This function calculates the baserate of the first rater, second rater, and the average of both the raters. |
|
6 |
#' |
|
7 |
#' @details |
|
8 |
#' A baserate is the percentage, as a decimal, that a positive code appears in data (either a \code{\link{codeSet}} or \code{\link{contingencyTable}}) for a given rater. It is assumed that the first rater is more experienced and thus provides a better estimation of the actual baserate for a given code, so the first rater's baserate is often used as if it is the actual baserate. If the raters are assumed to have the same experience level, the average baserate may give a better estimation. If the second rater is more experienced, the second rater's baserate may give a better estimation. Functions assume that the first rater is the more experienced rater and thus uses the first rater's baserate as the overall baserate estimation. |
|
9 |
#' |
|
10 |
#' |
|
11 |
#' @param data The \code{\link[=getTestSet]{testSet}} or \code{\link{contingencyTable}} for which the baserate is calculatede |
|
12 |
#' |
|
13 |
#' @seealso \code{\link{baserateSet}} and \code{\link{baserateCT}} |
|
14 |
#' |
|
15 |
#' @examples |
|
16 |
#' #Given a code set |
|
17 |
#' baserate(data = codeSet) |
|
18 |
#' |
|
19 |
#' #Given a contingency Table |
|
20 |
#' baserate(data = contingencyTable) |
|
21 |
#' |
|
22 |
#' @export |
|
23 |
#' @return A list of the format:\describe{ |
|
24 |
#' \item{firstBaserate}{The percentage of the data for which a positive code, or a 1, appears in the first rater} |
|
25 |
#' \item{secondBaserate}{The percentage of the data for which a positive code, or a 1, appears in the second rater} |
|
26 |
#' \item{averageBaserate}{The average of the firstBaserate and secondBaserate.} |
|
27 |
#' } |
|
28 |
#' |
|
29 |
### |
|
30 |
baserate = function(data){ |
|
31 | 7x |
if (nrow(data) != 2) { |
32 | 4x |
return(baserateSet(data)) |
33 |
} else { |
|
34 | 3x |
return(baserateCT(data)) |
35 |
} |
|
36 |
} |
1 |
### |
|
2 |
# Generate Precision Combo |
|
3 |
# |
|
4 |
# This function generates a precision that fits within the range and will create a valid set |
|
5 |
# @param kappa This is the kappa of the desired pair |
|
6 |
# @param Pmin This is the minimum precision desired |
|
7 |
# @param Pmax This is the maximum precision desired |
|
8 |
# @param BR This is the baserate of the desired pair |
|
9 |
# @return This returns a precision value that will generate a valid set with the other parameters |
|
10 |
#### |
|
11 | ||
12 |
genPcombo = function(kappa, Pmin, Pmax, BR){ |
|
13 |
#gets a random precision in the precision range |
|
14 | ! |
currPrecision = stats::runif(1, Pmin, Pmax); |
15 |
#checks is the combination of precision, kappa, and baserate is valid. If it is vaid, return the precision, if not, recall the function until a valid precision is found |
|
16 | ! |
if(!checkBRPKcombo(BR, currPrecision, kappa)){ |
17 | ! |
genPcombo(kappa, Pmin, Pmax, BR); |
18 |
}else{ |
|
19 | ! |
return(currPrecision); |
20 |
} |
|
21 |
} |
1 |
### |
|
2 |
#' @title Get Test Set |
|
3 |
#' |
|
4 |
#' @description |
|
5 |
#' This function gets a \emph{testSet} from a larger \code{\link{codeSet}} given certain sampling parameters. |
|
6 |
#' |
|
7 |
#' @details |
|
8 |
#' A \emph{testSet} is a \code{\link{codeSet}} that is a subset of a larger \code{\link{codeSet}} with a given set of properties. A \emph{testSet} is constructed by sampling (without replacement) P rows from rows in the larger \code{\link{codeSet}} where the first rater's code was 1, and then appending an additional sample (without replacement) of R rows taken at random from the larger \code{\link{codeSet}} excluding rows included in the first P rows sampled. P is computed as the minbaserate * length of the \emph{testset}. R is computed as testSetLength - P. The result of this sampling procedure is to create a sample with a minimum baserate regardless of the baserate of the larger \code{\link{codeSet}}.If \emph{testSetBaserateInflation} is set to zero, the function selects rows at random. |
|
9 |
#' |
|
10 |
#' @param set The \code{\link{codeSet}} from which the \emph{testSet} is taken |
|
11 |
#' @param testSetLength The length of the \emph{testSet} to be taken |
|
12 |
#' @param testSetBaserateInflation The minimum guaranteed \code{\link{baserate}} of the \emph{testSet}. Default to 0 |
|
13 |
#' |
|
14 |
#' |
|
15 |
#' @export |
|
16 |
#' @return A \code{\link{codeSet}} with the properties specified |
|
17 |
### |
|
18 | ||
19 |
getTestSet = function(set, testSetLength, testSetBaserateInflation = 0){ |
|
20 | 1x |
if(length(unlist(set)) < testSetLength){stop("testSetLength must be less than the length of set")} |
21 | 1x |
if(testSetLength %% 1 | testSetLength < 1) {stop("testSetLength value must be a positive integer.")} |
22 | 1x |
if(testSetBaserateInflation < 0){stop("testSetBaserateInfation must be positive")} |
23 | 1x |
if(testSetBaserateInflation >= 1){stop("testSetBaserateInflation must be below 1")} |
24 | 5x |
if(length(which(set[,1] == 1 & set[,2] == 1)) == nrow(set)){ |
25 | 1x |
return(set[1:testSetLength,]) |
26 | 4x |
}else if(length(which(set[,1] == 0 & set[,2] == 0)) == nrow(set)){ |
27 | 2x |
if(testSetBaserateInflation == 0){ |
28 | 1x |
return(set[1:testSetLength,]) |
29 |
}else{ |
|
30 | 1x |
stop("Not enough positives in first rater to inflate to this level") |
31 |
} |
|
32 |
} |
|
33 | 2x |
return(getHandSet(set = set, handSetLength = testSetLength, handSetBaserate = testSetBaserateInflation, returnSet = T)) |
34 |
} |
1 |
### |
|
2 |
#' @title Calculate Baserate (CT) |
|
3 |
#' |
|
4 |
#' @description |
|
5 |
#' This function calculates the baserate of the first rater, second rater, and the average of both the raters. Called by \code{\link{baserate}}. |
|
6 |
#' |
|
7 |
#' @export |
|
8 |
#' |
|
9 |
#' @param CT The \code{\link{contingencyTable}} for which the baserate is calculated |
|
10 |
#' |
|
11 |
#' @seealso \code{\link{baserate}} and \code{\link{baserateSet}} |
|
12 |
#' |
|
13 |
#' @return A list of the format:\describe{ |
|
14 |
#' \item{firstBaserate}{The percentage of the data for which a positive code, or a 1, appears in the first rater} |
|
15 |
#' \item{secondBaserate}{The percentage of the data for which a positive code, or a 1, appears in the second rater} |
|
16 |
#' \item{averageBaserate}{The average of the firstBaserate and secondBaserate.} |
|
17 |
#' } |
|
18 |
#' |
|
19 |
### |
|
20 | ||
21 |
baserateCT = function(CT){ |
|
22 | 1x |
if(any(CT < 0)){stop("Values in Contingency Table must be positive")} |
23 | 22x |
firstBaserate = sum(CT[1,])/sum(CT) |
24 | 22x |
secondBaserate = sum(CT[,1])/sum(CT) |
25 | 22x |
averageBaserate = mean(c(firstBaserate,secondBaserate)) |
26 | 22x |
return(list(firstBaserate = firstBaserate, secondBaserate = secondBaserate, averageBaserate = averageBaserate)) |
27 |
} |
1 |
### |
|
2 |
# Get Boot P Value |
|
3 |
# |
|
4 |
# This function gets the p value of the result in the distribution |
|
5 |
# @param distribution The distribution that the result falls into |
|
6 |
# @param result The value that you are evaluating where in the distribution it falls |
|
7 |
# @return This returns the p value from the result into the distribution |
|
8 |
### |
|
9 |
getBootPvalue = function(distribution, result) { |
|
10 |
#returns the percentage of the time that the distribution was greater or equal to the observed kappa |
|
11 | 139x |
N = length(distribution); |
12 | 139x |
if(result < mean(distribution, na.rm = T)) { |
13 |
#if the result is less than the mean of the distribution, than the p value is 1 |
|
14 | 16x |
return(1); |
15 |
} else { |
|
16 |
#return the number of times that the distribution is greater than the result as a percentage of the total number of items in the distribution |
|
17 | 123x |
return(length(which(distribution >= result)) / N); |
18 |
} |
|
19 |
} |
1 |
### |
|
2 |
#' @include rhoK.R |
|
3 |
#' @title Rho (set) |
|
4 |
#' |
|
5 |
#' @description |
|
6 |
#' This function calculates rho and kappa for a given \code{\link[=getTestSet]{testSet}}, and returns a list containing both values. Called by \code{\link{rho}}. |
|
7 |
#' |
|
8 |
#' @export |
|
9 |
#' |
|
10 |
#' @template rho-params |
|
11 |
#' |
|
12 |
#' @return A list of the format:\describe{ |
|
13 |
#' \item{rho}{The rho of the \code{\link{codeSet}}} |
|
14 |
#' \item{kappa}{The Cohen's Kappa of the \code{\link{codeSet}}} |
|
15 |
#' } |
|
16 |
### |
|
17 |
rhoSet = function( |
|
18 |
x, |
|
19 |
OcSBaserate = NULL, |
|
20 |
testSetBaserateInflation = 0, |
|
21 |
OcSLength = 10000, |
|
22 |
replicates = 800, |
|
23 |
ScSKappaThreshold = 0.9, |
|
24 |
ScSKappaMin = 0.40, |
|
25 |
ScSPrecisionMin = 0.6, |
|
26 |
ScSPrecisionMax = 1 |
|
27 |
) { |
|
28 | 5x |
kappa = kappaSet(x); |
29 |
|
|
30 | 5x |
row.count = nrow(x); |
31 | 5x |
if(is.null(OcSBaserate)) { |
32 | 4x |
OcSBaserate = length(which(x[,1] == 1)) / row.count; |
33 |
} |
|
34 | ||
35 | 5x |
return(list( |
36 | 5x |
rho = rhoK( |
37 | 5x |
kappa, |
38 | 5x |
OcSBaserate, |
39 | 5x |
row.count, |
40 | 5x |
testSetBaserateInflation, |
41 | 5x |
OcSLength, |
42 | 5x |
replicates, |
43 | 5x |
ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax |
44 |
), |
|
45 | 5x |
kappa = kappa, |
46 | 5x |
recall = rating_set_recall(x), |
47 | 5x |
precision = rating_set_precision(x) |
48 |
)) |
|
49 |
} |
1 |
### |
|
2 |
#' @title Rho |
|
3 |
#' |
|
4 |
#' @description |
|
5 |
#' This function calculates rho for a \code{\link[=getTestSet]{testSet}}, \code{\link{contingencyTable}}, or an observed kappa value with associated set parameters (testSetLength and OcSBaserate). |
|
6 |
#' |
|
7 |
#' @details |
|
8 |
#' Rho is a Monte Carlo rejective method of interrater reliability statistics, implemented here for Cohen's Kappa. Rho constructs a collection of data sets in which kappa is below a specified threshold, and computes the empirical distribution on kappa based on the specified sampling procedure. Rho returns the percent of the empirical distribution greater than or equal to an observed kappa. As a result, Rho quantifies the type 1 error in generalizing from an observed test set to a true value of agreement between two raters. |
|
9 |
#' |
|
10 |
#' Rho starts with an observed kappa value, calculated on a subset of a \code{\link{codeSet}}, known as an observed \code{\link[=getTestSet]{testSet}}, and a \emph{kappa threshold} which indicates what is considered significant agreement between raters. |
|
11 |
#' |
|
12 |
#' It then generates a collection of fully-coded, simulated |
|
13 |
#' \code{\link[=getTestSet]{codeSets}} (ScS), further described in |
|
14 |
#' \code{\link{createSimulatedCodeSet}}, all of which have a kappa value below the kappa |
|
15 |
#' threshold and similar properties as the original \code{\link{codeSet}}. |
|
16 |
#' |
|
17 |
#' Then, kappa is calculated on a \code{\link[=getTestSet]{testSet}} sampled from each of the ScSs in the |
|
18 |
#' collection to create a null hypothesis distribution. These \code{\link[=getTestSet]{testSets}} mirror the observed \code{\link[=getTestSet]{testSets}} in their size and sampling method. How these \code{\link[=getTestSet]{testSets}} are sampled is futher described in \code{\link{getTestSet}}. |
|
19 |
#' |
|
20 |
#' The null hypothesis is that the observed \code{\link[=getTestSet]{testSet}}, was sampled from a data set, which, if both raters were to code in its entirety, would result in a level of agreement below the kappa threshold. |
|
21 |
#' |
|
22 |
#' For example, using an alpha level of 0.05, if the observed kappa is greater than 95 percent of the kappas in the null hypothesis distribution, the null hypothesis is rejected. Then one can conclude that the two raters would have acceptable agreement had they coded the entire data set. |
|
23 |
#' |
|
24 |
#' @template rho-params |
|
25 |
#' @param testSetLength The length of the \code{\link[=getTestSet]{testSet}} (ignored unless \emph{data} is an observed kappa value) |
|
26 |
#' |
|
27 |
#' @examples |
|
28 |
#' # Given an observed kappa value |
|
29 |
#' rho(x = 0.88, OcSBaserate = 0.2, testSetLength = 80) |
|
30 |
#' |
|
31 |
#' # Given a test Set |
|
32 |
#' rho(x = codeSet) |
|
33 |
#' |
|
34 |
#' # Given a contingency Table |
|
35 |
#' rho(x = contingencyTable) |
|
36 |
#' |
|
37 |
#' @export |
|
38 |
#' @return rho and kappa for the given data and parameters (unless kappa is given) |
|
39 |
### |
|
40 |
rho = function( |
|
41 |
x, |
|
42 |
OcSBaserate = NULL, |
|
43 |
testSetLength = NULL, |
|
44 |
testSetBaserateInflation = 0, |
|
45 |
OcSLength = 10000, |
|
46 |
replicates = 800, |
|
47 |
ScSKappaThreshold = 0.9, |
|
48 |
ScSKappaMin = 0.40, |
|
49 |
ScSPrecisionMin = 0.6, |
|
50 |
ScSPrecisionMax = 1 |
|
51 |
) { |
|
52 | ||
53 |
### Calculate rho given a kappa value |
|
54 | 15x |
if (is(x, "numeric")) { |
55 | 1x |
if (is.null(OcSBaserate)) { stop("Must give a baserate when a kappa value is given") } |
56 | 1x |
if (is.null(testSetLength)) { stop("Must give a testSetLength when a kappa value is given") } |
57 | ||
58 | 8x |
result <- rhoK(x, OcSBaserate, |
59 | 8x |
testSetLength, |
60 | 8x |
testSetBaserateInflation, |
61 | 8x |
OcSLength, |
62 | 8x |
replicates, |
63 | 8x |
ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax) |
64 |
} |
|
65 | ||
66 |
### Calculate rho given a code set |
|
67 | 5x |
else if (nrow(x) != 2) { |
68 | 2x |
result <- rhoSet(x, OcSBaserate, |
69 | 2x |
testSetBaserateInflation, |
70 | 2x |
OcSLength, |
71 | 2x |
replicates, |
72 | 2x |
ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax) |
73 |
} |
|
74 | ||
75 |
### Calculate rho given a contingency table |
|
76 |
else { |
|
77 | 3x |
result <- rhoCT(x, OcSBaserate, |
78 | 3x |
testSetBaserateInflation, |
79 | 3x |
OcSLength, |
80 | 3x |
replicates, |
81 | 3x |
ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax) |
82 |
} |
|
83 | ||
84 | 13x |
return(result) |
85 |
} |
1 |
#include <RcppArmadilloExtensions/sample.h> |
|
2 |
// [[Rcpp::depends(RcppArmadillo)]] |
|
3 | ||
4 |
#include <RcppArmadillo.h> |
|
5 |
#include <Rcpp.h> |
|
6 |
using namespace Rcpp; |
|
7 |
using namespace arma; |
|
8 | ||
9 |
// NumericMatrix toNumericMatrix(arma::imat x) { |
|
10 |
// int nRows=x.n_rows; |
|
11 |
// |
|
12 |
// NumericMatrix y(nRows, x.n_cols); |
|
13 |
// for (int j=0; j<x.n_rows; j++) { |
|
14 |
// for (int i=0; i<x.n_cols; i++) { |
|
15 |
// y(j,i) = x(j, i); |
|
16 |
// } |
|
17 |
// } |
|
18 |
// |
|
19 |
// return y; |
|
20 |
// } |
|
21 | ||
22 |
// // @name sample_weighted |
|
23 |
// // @title sample_weighted |
|
24 |
// // @param x vector |
|
25 |
// // @param n integer |
|
26 |
// // @export |
|
27 |
// //' @export |
|
28 |
// // [[Rcpp::export]] |
|
29 |
// arma::vec sample_weighted(arma::vec xx, arma::vec probs, int n) { |
|
30 |
// // NumericVector ret(n); |
|
31 |
// // NumericVector xx(x); |
|
32 |
// arma::vec ret(n); |
|
33 |
// |
|
34 |
// // IntegerVector rng = seq(0, xx.size()-1); |
|
35 |
// arma::ivec rng = regspace<ivec>(0, xx.size() - 1); |
|
36 |
// for(int w = 0; w < n; w++) { |
|
37 |
// // int tot = sum(xx); |
|
38 |
// // NumericVector probs = xx / tot; |
|
39 |
// // arma::vec probs2 = toArmaVec(probs); |
|
40 |
// // IntegerVector s = Rcpp::sugar::sample(rng, 1, false, probs); |
|
41 |
// arma::ivec s = Rcpp::RcppArmadillo::sample(rng, 1, false, probs); |
|
42 |
// xx[s[0]] -= 1; |
|
43 |
// ret[w] = s[0]; |
|
44 |
// } |
|
45 |
// // return(xx); |
|
46 |
// return(ret + 1); |
|
47 |
// } |
|
48 | ||
49 |
// // ' @export |
|
50 |
// // [[Rcpp::export]] |
|
51 |
// arma::ivec sample_weighted2(arma::imat xx, int n, bool forR = true) { |
|
52 |
// arma::ivec ret = arma::ivec(n); |
|
53 |
// NumericMatrix x = toNumericMatrix(xx); |
|
54 |
// IntegerVector rng = seq(0, x.size() - 1); |
|
55 |
// |
|
56 |
// for(int w = 0; w < n; w++) { |
|
57 |
// double tot = accu(xx); |
|
58 |
// NumericVector probs = x / tot; |
|
59 |
// IntegerVector s = sample(rng, 1, false, probs); |
|
60 |
// x[s[0]] -= 1; |
|
61 |
// ret[w] = s[0]; |
|
62 |
// } |
|
63 |
// |
|
64 |
// if(forR == true) { |
|
65 |
// return(ret + 1); |
|
66 |
// } else { |
|
67 |
// return(ret); |
|
68 |
// } |
|
69 |
// } |
|
70 | ||
71 |
//' @title sample_contingency_table |
|
72 |
//' @name sample_contingency_table |
|
73 |
//' |
|
74 |
//' @param xx contingency table matrix |
|
75 |
//' @param n int size of the contingency table |
|
76 |
//' @param forR bool if true, add 1 to the results accounting for R indices starting at 1 |
|
77 |
//' |
|
78 |
//' @export |
|
79 |
// [[Rcpp::export]] |
|
80 | 39004x |
arma::ivec sample_contingency_table(arma::imat xx, int n, bool forR = true) { |
81 | 78008x |
arma::ivec ret = arma::ivec(n); |
82 |
|
|
83 |
int maxInt; |
|
84 |
int s; |
|
85 | 2229214x |
for(int w = 0; w < n; w++) { |
86 | 2190210x |
maxInt = accu(xx); |
87 | 2190210x |
s = randi<int>(distr_param(0, maxInt)); |
88 | 14595227x |
ret[w] = (s <= xx(0) ? 0 : (s <= (xx(0)+xx(1))) ? 1 : (s <= (xx(0)+xx(1)+xx(2))) ? 2 : 3); |
89 | 6570630x |
xx(ret[w])--; |
90 |
} |
|
91 |
|
|
92 | 39004x |
if(forR == true) return(ret + 1); |
93 | 39004x |
else return(ret); |
94 |
} |
|
95 | ||
96 |
//' @name getBootPvalue_c |
|
97 |
//' @title getBootPvalue_c |
|
98 |
//' |
|
99 |
//' @param distribution vector of calculated kappas |
|
100 |
//' @param result double calculated kappa to compare against |
|
101 |
//' |
|
102 |
//' @description |
|
103 |
//' returns the percentage of the time that the distribution was greater or equal to the observed kappa |
|
104 |
//' if the result is less than the mean of the distribution, than the p value is 1 |
|
105 |
//' else return the number of times that the distribution is greater than the result as a percentage of the total number |
|
106 |
//' of items in the distribution |
|
107 |
//' |
|
108 |
//' @return double calculated p-value |
|
109 |
//' |
|
110 |
//' @export |
|
111 |
// [[Rcpp::export]] |
|
112 | 122x |
double getBootPvalue_c(arma::vec distribution, double result) { |
113 | 122x |
if(result < mean(distribution)) { |
114 | 17x |
return(1.0); |
115 |
} |
|
116 | ||
117 |
else { |
|
118 | 210x |
arma::uvec matched = find(distribution >= result); |
119 |
|
|
120 | 105x |
double rho = (matched.size() * 1.0) / distribution.size(); |
121 | 105x |
return( rho ); |
122 |
} |
|
123 |
} |
|
124 | ||
125 |
// Validate a Baserate/Kappa combo against the supplied Precision. |
|
126 |
// Return true if Precision is greater than the calculated right value |
|
127 |
// [[Rcpp::export]] |
|
128 | 79441x |
bool check_BRK_combo(double BR, double P, double K) { |
129 | 79441x |
double right = ((2 * BR * K) - (2 * BR) - K) / (K - 2); |
130 |
|
|
131 | 79441x |
return(P > right); |
132 |
} |
|
133 | ||
134 |
//' @name recall |
|
135 |
//' @title recall |
|
136 |
//' |
|
137 |
//' @param kappa double |
|
138 |
//' @param BR double |
|
139 |
//' @param P double |
|
140 |
//' |
|
141 |
//' @return Recall calculated from provided kappa, BR, and P |
|
142 |
//' |
|
143 |
//' @export |
|
144 |
// [[Rcpp::export]] |
|
145 | 23502x |
double recall(double kappa, double BR, double P) { |
146 | 23502x |
double top = kappa * P; |
147 | 23502x |
double R = top / (2*P-2*BR-kappa+2*BR*kappa); |
148 | 23502x |
return(R); |
149 |
} |
|
150 | ||
151 |
// Find a valid precision and kappa combo from a distribution of Kappas and Precisions given a baserate |
|
152 |
// |
|
153 |
// [[Rcpp::export]] |
|
154 | 79441x |
NumericVector find_valid_pk( |
155 |
arma::colvec kappaDistribution, arma::vec kappaProbability, |
|
156 |
arma::colvec precisionDistribution, arma::vec precisionProbability, |
|
157 |
double baserate |
|
158 |
) { |
|
159 | 158882x |
arma::ivec kappaRng = regspace<arma::ivec>(0, kappaDistribution.size() - 1); |
160 | ||
161 | 158882x |
arma::ivec whKappa; |
162 | 79441x |
if (kappaProbability.n_elem > 0) { |
163 | 53440x |
whKappa = Rcpp::RcppArmadillo::sample(kappaRng, 1, false, kappaProbability); |
164 |
} else { |
|
165 | 26001x |
whKappa = Rcpp::RcppArmadillo::sample(kappaRng, 1, false); |
166 |
} |
|
167 | 158882x |
double currKappa = kappaDistribution[whKappa.at(0)]; |
168 |
|
|
169 | 158882x |
arma::ivec precRng = regspace<arma::ivec>(0, precisionDistribution.size() - 1); |
170 | 158882x |
arma::ivec whPrec = Rcpp::RcppArmadillo::sample(precRng, 1, false); |
171 | 158882x |
double currPrec = precisionDistribution[whPrec.at(0)]; |
172 |
|
|
173 | 79441x |
if (!check_BRK_combo(baserate, currPrec, currKappa)) { |
174 | 46824x |
double precisionMin = (2 * baserate * currKappa - 2 * baserate - currKappa) / (currKappa - 2); |
175 | 90210x |
arma::uvec indicies = find(precisionDistribution > precisionMin); |
176 | ||
177 | 46824x |
if (indicies.size() == 0) { |
178 | 3438x |
return(find_valid_pk(kappaDistribution, kappaProbability, precisionDistribution, precisionProbability, baserate)); |
179 |
} |
|
180 | ||
181 | 86772x |
precisionDistribution = precisionDistribution.elem(indicies); |
182 |
|
|
183 | 43386x |
if (precisionProbability.size() > 0) { |
184 | 2x |
precisionProbability = precisionProbability.elem(indicies); |
185 |
} |
|
186 |
|
|
187 | 43386x |
precRng = regspace<ivec>(0, precisionDistribution.size() - 1); |
188 | 43386x |
whPrec = Rcpp::RcppArmadillo::sample(precRng, 1, false); |
189 | 86772x |
currPrec = precisionDistribution[whPrec.at(0)]; |
190 |
} |
|
191 |
|
|
192 | 76003x |
return(Rcpp::NumericVector::create(currPrec, currKappa)); |
193 |
} |
|
194 | ||
195 | ||
196 |
//' @name generateKPs_c |
|
197 |
//' @title generate_kp_list |
|
198 |
//' |
|
199 |
//' @param numNeeded int |
|
200 |
//' @param baserate double |
|
201 |
//' @param kappaMin double |
|
202 |
//' @param kappaMax double |
|
203 |
//' @param precisionMin double |
|
204 |
//' @param precisionMax double |
|
205 |
//' @param distributionType int 0 - normal (default), 1 - bell |
|
206 |
//' @param distributionLength long |
|
207 |
//' |
|
208 |
//' @return matrix of kappa and precision values (column 1 as precision) |
|
209 |
//' |
|
210 |
//' @export |
|
211 |
// [[Rcpp::export]] |
|
212 | 23x |
Rcpp::NumericMatrix generate_kp_list( |
213 |
int numNeeded, double baserate, |
|
214 |
double kappaMin, double kappaMax, |
|
215 |
double precisionMin, double precisionMax, |
|
216 |
int distributionType = 0, long distributionLength = 10000 |
|
217 |
) { |
|
218 | 23x |
double kappaStep = ((kappaMax - kappaMin) / (distributionLength - 1)); |
219 | ||
220 | 46x |
colvec kappaDistribution = regspace(kappaMin, kappaStep, kappaMax); |
221 | ||
222 | 46x |
arma::vec kappaProbability; |
223 | 23x |
if(distributionType == 1) { |
224 | 1x |
kappaProbability = normpdf(kappaDistribution, 0.9, 0.1); |
225 |
} |
|
226 | ||
227 | 23x |
double precStep = (precisionMax - precisionMin) / (10000 - 1); |
228 | 46x |
colvec precisionDistribution = regspace(precisionMin, precStep, precisionMax); |
229 | ||
230 | 46x |
arma::vec precisionProbability; |
231 | 23x |
Rcpp::NumericMatrix KPs(numNeeded,2); |
232 | ||
233 | 76024x |
for(int i=0; i < numNeeded; i++) { |
234 | 152002x |
KPs(i, _) = find_valid_pk( |
235 |
kappaDistribution, |
|
236 |
kappaProbability, |
|
237 |
precisionDistribution, |
|
238 |
precisionProbability, |
|
239 |
baserate |
|
240 |
); |
|
241 |
} |
|
242 | ||
243 | 46x |
return (KPs); |
244 |
} |
|
245 | ||
246 |
//' @name contingency_table |
|
247 |
//' @title contingency_table |
|
248 |
//' @description Create a contingency table using the provied precision, recall, baserate, and length. |
|
249 |
//' |
|
250 |
//' @param precision double |
|
251 |
//' @param rec double |
|
252 |
//' @param length int |
|
253 |
//' @param baserate double |
|
254 |
//' |
|
255 |
//' @export |
|
256 |
// [[Rcpp::export]] |
|
257 | 23503x |
arma::imat contingency_table(double precision, double rec, int length, double baserate) { |
258 | 23503x |
int gold1s = max(NumericVector::create(round(baserate * length), 1)); |
259 | 23503x |
int gold0s = length - gold1s; |
260 | 23503x |
int TP = max(NumericVector::create(round(gold1s * rec), 1)); |
261 | 23503x |
int FP = min(NumericVector::create(gold0s, max(NumericVector::create(round(TP * (1 - precision) / precision), 1)))); |
262 |
|
|
263 | 23503x |
arma::imat ct = arma::imat(2,2); |
264 | 23503x |
ct(0,0) = TP; |
265 | 23503x |
ct(0,1) = gold1s - TP; |
266 | 23503x |
ct(1,0) = FP; |
267 | 23503x |
ct(1,1) = gold0s - FP; |
268 |
|
|
269 | 23503x |
return(ct); |
270 |
} |
|
271 | ||
272 |
//' @name random_contingency_table |
|
273 |
//' @title random_contingency_table |
|
274 |
//' @param setLength [TBD] |
|
275 |
//' @param baserate [TBD] |
|
276 |
//' @param kappaMin [TBD] |
|
277 |
//' @param kappaMax [TBD] |
|
278 |
//' @param minPrecision [TBD] |
|
279 |
//' @param maxPrecision [TBD] |
|
280 |
//' |
|
281 |
//' @export |
|
282 |
// [[Rcpp::export]] |
|
283 | 1x |
arma::imat random_contingency_table( |
284 |
int setLength,double baserate, |
|
285 |
double kappaMin, double kappaMax, |
|
286 |
double minPrecision = 0, double maxPrecision = 1 |
|
287 |
) { |
|
288 | 2x |
NumericMatrix KP = generate_kp_list(1, baserate, kappaMin, kappaMax, minPrecision, maxPrecision); |
289 | 1x |
double kappa = KP(0,1); |
290 | 1x |
double precision = KP(0,0); |
291 | 1x |
double rec = recall(kappa, baserate, precision); |
292 |
|
|
293 | 1x |
arma::imat ct = contingency_table(precision, rec, setLength, baserate); |
294 |
|
|
295 | 2x |
return(ct); |
296 |
} |
|
297 | ||
298 |
//' @title kappa_ct |
|
299 |
//' @description Calculate kappa from a contingency table |
|
300 |
//' @param ct [TBD] |
|
301 |
//' |
|
302 |
//' @export |
|
303 |
// [[Rcpp::export]] |
|
304 | 23505x |
double kappa_ct(arma::imat ct) { |
305 | 23505x |
double a = ct(0,0); //gold 1 & silver 1 |
306 | 23505x |
double c = ct(0,1); //gold 1 & silver 0 |
307 | 23505x |
double b = ct(1,0); //gold 0 & silver 1 |
308 | 23505x |
double d = ct(1,1); //gold 0 & silver 0 |
309 | 23505x |
double size = accu(ct); |
310 |
|
|
311 | 23505x |
double pZero = (a+d) / size; |
312 | 23505x |
double pYes = ((a + b) / size) * ((a + c) / size); |
313 | 23505x |
double pNo = ((c + d) / size) * ((b + d) / size); |
314 | 23505x |
double pE = (pYes + pNo); |
315 | 23505x |
double k = (pZero - pE) / (1 - pE); |
316 |
|
|
317 | 23505x |
return(k); |
318 |
} |
|
319 | ||
320 |
// [[Rcpp::export]] |
|
321 | 23502x |
arma::imat getHand_ct(arma::imat ct, int handSetLength, double handSetBaserate) { |
322 | 23502x |
int positives = ceil(handSetLength * handSetBaserate); |
323 | 47004x |
arma::ivec positiveIndices; |
324 | 47004x |
arma::imat otherCT(ct); |
325 |
|
|
326 | 23502x |
if(positives > 0) { |
327 | 31004x |
arma::irowvec gold1s = ct.row(0); |
328 | 15502x |
positiveIndices = sample_contingency_table(gold1s, positives, false); |
329 |
|
|
330 | 15502x |
double sumOnes = sum(positiveIndices == 0); |
331 | 15502x |
double sumTwos = sum(positiveIndices == 1); |
332 | 15502x |
positiveIndices *= 2; |
333 | 31004x |
otherCT(0,0) = otherCT(0,0) - sumOnes; |
334 | 31004x |
otherCT(0,1) = otherCT(0,1) - sumTwos; |
335 |
} |
|
336 |
|
|
337 | 47004x |
arma::ivec otherAsVector = otherCT.as_col(); |
338 | 47004x |
arma::ivec otherIndices = sample_contingency_table(otherCT, handSetLength - positives, false); |
339 | 70506x |
arma::ivec allIndices = join_cols<ivec>(positiveIndices, otherIndices); |
340 | 23502x |
arma::imat newCT = arma::imat(2,2); |
341 |
|
|
342 | 23502x |
newCT(0,0) = sum(allIndices == 0); |
343 | 23502x |
newCT(1,0) = sum(allIndices == 1); |
344 | 23502x |
newCT(0,1) = sum(allIndices == 2); |
345 | 23502x |
newCT(1,1) = sum(allIndices == 3); |
346 |
|
|
347 | 47004x |
return(newCT); |
348 |
} |
|
349 | ||
350 |
//' @name getHand_kappa |
|
351 |
//' @title getHand_kappa |
|
352 |
//' |
|
353 |
//' @description This function returns kappa calculated from a Handset taken from a larger Contingency Table |
|
354 |
//' |
|
355 |
//' @param ct KPs matrix of kappa (column 1) and precision (column 2) values |
|
356 |
//' @param handSetLength The length of the \code{\link[=getTestSet]{testSet}} (ignored unless \emph{data} is an observed kappa value) |
|
357 |
//' @param handSetBaserate baserate to inflate the sampled contingency table to |
|
358 |
//' |
|
359 |
//' @return Kappa as double |
|
360 |
//' |
|
361 |
//' @export |
|
362 |
// [[Rcpp::export]] |
|
363 | 23500x |
double getHand_kappa(arma::imat ct, int handSetLength, double handSetBaserate) { |
364 | 47000x |
arma::imat newCT = getHand_ct(ct, handSetLength, handSetBaserate); |
365 |
|
|
366 | 23500x |
double k = kappa_ct(newCT); |
367 | 47000x |
return(k); |
368 |
} |
|
369 | ||
370 |
// This function calculates rho given the parameters of the initial set and |
|
371 |
// the kappa of the observed handset and is called by rhoK() |
|
372 |
// |
|
373 |
// [[Rcpp::export]] |
|
374 | 120x |
double calcRho_c( |
375 |
double x, |
|
376 |
double OcSBaserate, |
|
377 |
int testSetLength, |
|
378 |
double testSetBaserateInflation = 0, |
|
379 |
int OcSLength = 10000, |
|
380 |
int replicates = 800, |
|
381 |
double ScSKappaThreshold = 0.9, |
|
382 |
double ScSKappaMin = 0.40, |
|
383 |
double ScSPrecisionMin = 0.6, |
|
384 |
double ScSPrecisionMax = 1.0, |
|
385 |
NumericMatrix KPs = NumericMatrix(0) |
|
386 |
) { |
|
387 | 120x |
if (KPs.size() == 1 && KPs[0] == 0) { |
388 | 20x |
KPs = generate_kp_list(replicates, OcSBaserate, ScSKappaMin, ScSKappaThreshold, ScSPrecisionMin, ScSPrecisionMax); |
389 |
} |
|
390 | ||
391 | 120x |
if(KPs.nrow() < replicates) { |
392 | 50x |
replicates = KPs.nrow(); |
393 |
} |
|
394 | ||
395 | 240x |
arma::vec savedKappas = arma::vec(replicates); |
396 | 23620x |
for (int KProw = 0; KProw < replicates; KProw++) { |
397 | 47000x |
NumericVector KP = KPs(KProw, _); |
398 | 23500x |
double precision = KP[0]; |
399 | 23500x |
double kappa = KP[1]; |
400 | 23500x |
double rec = recall(kappa, OcSBaserate, precision); |
401 | ||
402 | 47000x |
arma::imat fullCT = contingency_table(precision, rec, OcSLength, OcSBaserate); |
403 | 23500x |
double kk = getHand_kappa(fullCT, testSetLength, testSetBaserateInflation); |
404 | 47000x |
savedKappas[KProw] = kk; |
405 |
}; |
|
406 | ||
407 | 240x |
return(getBootPvalue_c(savedKappas, x)); |
408 |
} |
|
409 | ||
410 |
// // @name rhoCT_c |
|
411 |
// // @title rhoCT_ct |
|
412 |
// // @param contingencyTable [TBD] |
|
413 |
// // @param testSetBaserateInflation [TBD] |
|
414 |
// // @param replicates [TBD] |
|
415 |
// // @param OcSLength [TBD] |
|
416 |
// // @param OcSBaserate [TBD] |
|
417 |
// // @param ScSKappaThreshold [TBD] |
|
418 |
// // @param ScSKappaMin [TBD] |
|
419 |
// // @param ScSPrecisionMin [TBD] |
|
420 |
// // @param ScSPrecisionMax [TBD] |
|
421 |
// //' @export |
|
422 |
// // [[Rcpp::export]] |
|
423 |
// arma::vec rhoCT_c( |
|
424 |
// arma::imat contingencyTable, |
|
425 |
// double testSetBaserateInflation = 0.2, |
|
426 |
// int replicates = 800, |
|
427 |
// int OcSLength = 10000, |
|
428 |
// double OcSBaserate = -1.0, |
|
429 |
// double ScSKappaThreshold = 0.65, |
|
430 |
// double ScSKappaMin = 0.40, |
|
431 |
// double ScSPrecisionMin = 0.6, |
|
432 |
// double ScSPrecisionMax = 1 |
|
433 |
// ){ |
|
434 |
// // Observed kappa |
|
435 |
// double kappa = kappa_ct(contingencyTable); |
|
436 |
// double size = accu(contingencyTable); |
|
437 |
// double gold1; |
|
438 |
// |
|
439 |
// if(OcSBaserate == -1.0) { |
|
440 |
// gold1 = accu(contingencyTable.row(0)); |
|
441 |
// OcSBaserate = gold1 / size; |
|
442 |
// } |
|
443 |
// |
|
444 |
// double rho = calcRho_c( |
|
445 |
// kappa, OcSBaserate, |
|
446 |
// size, testSetBaserateInflation, |
|
447 |
// OcSLength, replicates, |
|
448 |
// ScSKappaThreshold, ScSKappaMin, ScSPrecisionMin, ScSPrecisionMax |
|
449 |
// ); |
|
450 |
// |
|
451 |
// return(NumericVector::create(kappa, rho)); |
|
452 |
// } |
|
453 | ||
454 | ||
455 |
// //' @export |
|
456 |
// // [[Rcpp::export]] |
|
457 |
// bool run_rho_in_c( |
|
458 |
// double threshold, double infl, |
|
459 |
// int handsetLength = 20, |
|
460 |
// double handsetBaserate = 0.2, |
|
461 |
// int replicates = 10000 |
|
462 |
// ) { |
|
463 |
// arma::imat ct = random_contingency_table(10000, infl, 0.4, threshold, 0.2, 0.8); |
|
464 |
// // handset --> testset |
|
465 |
// arma::imat handset = getHand_ct(ct, handsetLength, handsetBaserate); |
|
466 |
// arma::vec res = rhoCT_c(handset, handsetBaserate, replicates, 10000, -1, 0.65, 0.4, 0.6, 1); |
|
467 |
// |
|
468 |
// return(res[0]>threshold); |
|
469 |
// } |
|
470 |