Perform JER calibration from randomization p-values

Usage

calibrate(
  p0,
  m,
  alpha,
  family = c("Linear", "Beta", "Simes"),
  K = nrow(p0),
  p = NULL,
  max_steps_down = 10L,
  piv_stat0 = NULL
)

calibrate0(
  p0,
  m,
  alpha,
  family = c("Linear", "Beta", "Simes"),
  K = nrow(p0),
  piv_stat0 = NULL
)

Arguments

p0: A matrix with B rows. Each row is a vector of null p-values
m: The total number of tested hypotheses
alpha: A numeric value in [0,1], the target (JER) risk
family: A character value, the name of a threshold family. Should be one of "Linear", "Beta" and "Simes", or "Oracle". "Linear" and "Simes" families are identical.
K: An integer value in [1,m], the number of elements in the reference family. Defaults to m
p: A vector of 'm' p.values, used for step-down control. If not provided, single step control is performed.
max_steps_down: A numeric value, the maximum number of steps down to perform. Defaults to 10 (but the algorithm generally converges in 1 or 2 steps).
piv_stat0: Don't use! Should be removed soon...

Value

A list with elements

thr: A numeric vector of length K, such that the estimated probability that there exists an index k between 1 and K such that the k-th maximum of the test statistics of is greater than thr[k], is less than alpha
piv_stat: A vector of B pivotal statitics
lambda: A numeric value, the result of the calibration

Details

'calibrate0' performs single step calibration, whereas 'calibrate' performs step-down calibration. Hence the output of 'calibrate(..., max_steps_down = 0)' and 'calibrate0(...)' should be identical

References

Blanchard, G., Neuvial, P., & Roquain, E. (2020). Post hoc confidence bounds on false positives using reference families. Annals of Statistics, 48(3), 1281-1303.

Examples


set.seed(0xBEEF)
m <- 50
sim <- gaussianSamples(m = m, rho = 0.3, n = 45, 
                       pi0 = 0.8, SNR = 3, prob = 0.5)
Y <- sim$X
groups <- sim$categ
p <- rowWelchTests(Y, groups)$p.value

B <- 100
null_groups <- replicate(B, sample(groups))
p0 <- rowWelchTests(Y, null_groups)$p.value

calib0 <- calibrate0(p0, m, alpha = 0.1) # single step
calib <- calibrate(p0, m, alpha = 0.1, p = p)
calib$lambda >= calib0$lambda 
#>  10% 
#> TRUE 

maxFP(p, calib$thr)
#> [1] 45

if (FALSE) { # \dontrun{
# Gene expression
data(expr_ALL, package = "sanssouci.data")
X <- expr_ALL; rm(expr_ALL)
groups <- ifelse(colnames(X) == "BCR/ABL", 1, 0) # map to 0/1

null_groups <- replicate(500, sample(groups))
perm <- rowWelchTests(X, null_groups)
p0 <- perm$p.value

alpha <- 0.1
m <- nrow(X)
p <- rowWelchTests(X, groups)$p.value
calib_L <- calibrate(p0, m, alpha, family = "Linear")
calib_B <- calibrate(p0, m, alpha, family = "Beta", K = 100)

## post hoc bounds
thr <- calib_L$thr
minTP(p, thr)  ## lower bound on true positives


## example of user selection: rejections of BH(0.05) procedure
adjp <- p.adjust(p, method = "BH") 
sel <- which(adjp < 0.05)
length(sel)

minTP(p[sel], thr)

# confidence bound on the FDP
FDP_bound <- sanssouci:::curveMaxFP(sort(p), thr)/seq(along = p)
plot(head(FDP_bound, 300), t = 's', 
  xlab = "Number of features",
  ylab = "Upper bound on False Discovery Proportion")
} # }