Difference between revisions of "Golubsim.R"
m (New page: # {{R}}{{#security:*|Sven}}) |
(Function added) |
||
Line 1: | Line 1: | ||
# {{R}}{{#security:*|Sven}} | # {{R}}{{#security:*|Sven}} | ||
+ | nCr <- function(n,r) { | ||
+ | factorial(n) / (factorial(n-r) * factorial(r)) | ||
+ | } | ||
+ | golubsim <- function(outfile = "tmp", iiter=nCr(11,6), seqpi0=1, alpha = 0.05, | ||
+ | adaptive=FALSE, mu0=0, mu1=3, sigma=1, seed=1, | ||
+ | datadir = "Data", lambdaseq = seq(0, 0.95, by=0.05), | ||
+ | ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE, | ||
+ | pad=TRUE, FDR = FALSE) | ||
+ | { | ||
+ | # ----------------------------------------------------------------------------- # | ||
+ | # m = numeric number of genes to simulate | ||
+ | # itter = numeric number of iterations | ||
+ | # sepi0 = numeric vector of True pi0 values to simulate | ||
+ | # alpha = numeric number [0,1] type I probability test cutoff | ||
+ | # adaptive = logical Finner adaptive FDR control | ||
+ | # use.siggenes = logical to use pi0.est function in siggenes package | ||
+ | # lambdaseq = numeric vector lambda range for pi0.est smoothing splines | ||
+ | # ncs.value = character string "paper " or "max" | ||
+ | # use.convest = logical to use convest function in limma package | ||
+ | # pad = logical Populate padded lists for speed | ||
+ | # ----------------------------------------------------------------------------- # | ||
+ | |||
+ | # 1) ---------------------------------- Setup --------------------------------- # | ||
+ | # Check and create directories for save(...) of objects | ||
+ | if(length(dir(datadir))==0) { | ||
+ | create.dir(datadir) | ||
+ | } | ||
+ | |||
+ | # Check and install multtest | ||
+ | if(!require(multtest, quietly=TRUE)) { | ||
+ | cat("Package:Multtest not available attempting to install\n") | ||
+ | install.packages("multtest") | ||
+ | } | ||
+ | |||
+ | data(golub) | ||
+ | m <- nrow(golub) | ||
+ | |||
+ | # AML slides=11 | ||
+ | n <- 11 | ||
+ | r <- 6 | ||
+ | |||
+ | m <- nCr(n,r) | ||
+ | AML.samples <- list() | ||
+ | i <- 1 | ||
+ | while(i <= m) { | ||
+ | value <- sort(sample(n, r, replace=FALSE)) | ||
+ | key <- paste(value, collapse=" ") | ||
+ | if(is.null(AML.samples[[key]])) { | ||
+ | AML.samples[[key]] <- value | ||
+ | i <-i+1 | ||
+ | print(i) | ||
+ | } | ||
+ | } | ||
+ | |||
+ | # ALL slides=27 | ||
+ | n <- 27 | ||
+ | r <- 6 | ||
+ | |||
+ | ALL.samples <- list() | ||
+ | i <- 1 | ||
+ | while(i <= m) { | ||
+ | value <- sort(sample(n, r, replace=FALSE)) | ||
+ | key <- paste(value, collapse=" ") | ||
+ | if(is.null(ALL.samples[[key]])) { | ||
+ | ALL.samples[[key]] <- value | ||
+ | i <-i+1 | ||
+ | print(i) | ||
+ | } | ||
+ | } | ||
+ | |||
+ | # Need to randomise the way the keys are drawn | ||
+ | ALLdraw <- sample(m,m, replace=FALSE) | ||
+ | AMLdraw <- sample(m,m, replace=FALSE) | ||
+ | |||
+ | golub.ALL <- golub[,golub.cl==0] | ||
+ | golub.AML <- golub[,golub.cl==1] | ||
+ | |||
+ | golub.clSample <- rep(0:1, each=6) | ||
+ | |||
+ | design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample) | ||
+ | |||
+ | |||
+ | # Check and install limma | ||
+ | if(!require(limma, quietly=TRUE)) { | ||
+ | cat("Package:limma not available attempting to install\n") | ||
+ | install.packages("limma") | ||
+ | } | ||
+ | # Check and install siggenes | ||
+ | if(!require(siggenes, quietly=TRUE)) { | ||
+ | cat("Package:siggenes not available attempting to install\n") | ||
+ | install.packages("siggenes") | ||
+ | } | ||
+ | |||
+ | set.seed(seed) | ||
+ | starttime <- proc.time()[3] | ||
+ | |||
+ | # Set up False discovery and False non discovery vectors | ||
+ | #FD <- rep(NA, iiter) | ||
+ | #FND <- rep(NA, iiter) | ||
+ | |||
+ | # Output lists | ||
+ | |||
+ | FDlist <- list(m=m, alpha=alpha, p0=list(),p0NW=list(), convest=list()) | ||
+ | |||
+ | |||
+ | # 2) ---------------------------- pi0 outer loop ------------------------------ # | ||
+ | # Main loop for each value of pi0, the proportion of truly Null hypotheses | ||
+ | |||
+ | #for(pi0 in seqpi0) | ||
+ | # { | ||
+ | # False discovery threshold | ||
+ | # if(adaptive) | ||
+ | # { | ||
+ | # Adaptive control adjustment on true pi0 (finner & Roters, 2002) | ||
+ | # alphastar<- alpha/pi0 | ||
+ | # } else { | ||
+ | # Non adaptive control | ||
+ | alphastar <- alpha | ||
+ | # } | ||
+ | |||
+ | # m1 <- round(m * (1-pi0)) | ||
+ | # m0 <- m - m1 | ||
+ | # DEmeans <- c(rep(mu1,m1), rep(mu0,m0)) | ||
+ | # DEflag <- as.logical(DEmeans) | ||
+ | |||
+ | # 3) --------------------------- iiter inner loop ----------------------------- # | ||
+ | for(i in seq(iiter)){ | ||
+ | |||
+ | # Names for list elements | ||
+ | # pi0char <- as.character(round(pi0, 5)) | ||
+ | |||
+ | # 4) ------------------------ Permutation analysis ---------------------------- # | ||
+ | x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]], | ||
+ | golub.AML[,AML.samples[[AMLdraw[i]]]] | ||
+ | ) | ||
+ | |||
+ | fit <- lmFit(x, design.Sample) | ||
+ | fit <- eBayes(fit) | ||
+ | pvalues <- fit$p.value[,"AMLvsALL"] | ||
+ | # p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH") | ||
+ | # pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE) | ||
+ | |||
+ | # 5) ----------------------------- Saving statistics -------------------------- # | ||
+ | # pi0 estimation | ||
+ | # Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95) | ||
+ | # Maximum value of lambda needs to be less than 1 estimated by (m-1)/m | ||
+ | |||
+ | if(use.convest) {# Convest estimation | ||
+ | FDlist[["convest"]][i] <- convest(pvalues, niter=50) | ||
+ | } | ||
+ | |||
+ | # Siggenes or first principles | ||
+ | if( use.siggenes ) { | ||
+ | require(siggenes, quietly=TRUE) | ||
+ | # A) weighting by 1-lambda | ||
+ | pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value, | ||
+ | ncs.weights=1-lambdaseq)$p0 | ||
+ | FDlist[["p0"]][i] <- round(pi0hat,8) | ||
+ | # B) no 1-lambda weighting | ||
+ | pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value, | ||
+ | ncs.weights=NULL)$p0 | ||
+ | FDlist[["p0NW"]][i] <- round(pi0hat,8) | ||
+ | } else { | ||
+ | pi0lambda <- tapply(lambdaseq, lambdaseq, | ||
+ | function(x){sum(pvalues>x)/(m*(1-x))}) | ||
+ | # A) weighting by 1-lambda | ||
+ | pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq, | ||
+ | df=3)$y[length(lambdaseq)],1) | ||
+ | FDlist[["p0"]][i] <- round(pi0hat,8) | ||
+ | # B) no 1-lambda weighting | ||
+ | pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, | ||
+ | df=3)$y[length(lambdaseq)],1) | ||
+ | FDlist[["p0NW"]][i] <- round(pi0hat,8) | ||
+ | } | ||
+ | |||
+ | # if(FDR) { | ||
+ | # Step down fdr Bengamini & Hochberg control | ||
+ | # Porder <- order(pvalues) | ||
+ | # Sorted pvalues | ||
+ | # cutoff <- pvalues[Porder] > (seq(m) * alphastar)/m | ||
+ | #Reordered DEflag | ||
+ | # DEorder <- DEflag[Porder] | ||
+ | # rejected <- DEorder[!cutoff] | ||
+ | # accepted <- DEorder[cutoff] | ||
+ | # FD[i] <- sum(!rejected) | ||
+ | # FND[i] <- sum(accepted) | ||
+ | # } | ||
+ | # } | ||
+ | # Writing to FDlist every iteration of p | ||
+ | # if(FDR) { | ||
+ | # FDlist[["V"]][[pi0char]] <- FD | ||
+ | # FDlist[["T"]][[pi0char]] <- FND | ||
+ | # } | ||
+ | save(FDlist, file=file.path(datadir, outfile), compress=T) | ||
+ | } # End of main loop | ||
+ | |||
+ | # Return simulation time | ||
+ | endtime <- proc.time()[3] - starttime | ||
+ | return(endtime) | ||
+ | } | ||
+ | |||
+ | #fdrsim(outfile = "tmp", m=100, iiter=1000, seqpi0=1, #seqpi0= seq(0,1, length=5), | ||
+ | # alpha = 0.05, adaptive=F, mu0=0, mu1=3, sigma=1,seed=2, Estpi0=TRUE) | ||
+ | #load(file.path("Data", "tmp")) | ||
+ | |||
+ | #source("plotfun.R") | ||
+ | #plotfun(FDlist) |
Revision as of 02:05, 20 June 2007
Code snipits and programs written in R, S or S-PLUS{{#security:*|Sven}} nCr <- function(n,r) {
factorial(n) / (factorial(n-r) * factorial(r))
} golubsim <- function(outfile = "tmp", iiter=nCr(11,6), seqpi0=1, alpha = 0.05,
adaptive=FALSE, mu0=0, mu1=3, sigma=1, seed=1, datadir = "Data", lambdaseq = seq(0, 0.95, by=0.05), ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE, pad=TRUE, FDR = FALSE)
{
- ----------------------------------------------------------------------------- #
- m = numeric number of genes to simulate
- itter = numeric number of iterations
- sepi0 = numeric vector of True pi0 values to simulate
- alpha = numeric number [0,1] type I probability test cutoff
- adaptive = logical Finner adaptive FDR control
- use.siggenes = logical to use pi0.est function in siggenes package
- lambdaseq = numeric vector lambda range for pi0.est smoothing splines
- ncs.value = character string "paper " or "max"
- use.convest = logical to use convest function in limma package
- pad = logical Populate padded lists for speed
- ----------------------------------------------------------------------------- #
- 1) ---------------------------------- Setup --------------------------------- #
- Check and create directories for save(...) of objects
if(length(dir(datadir))==0) {
create.dir(datadir)
}
- Check and install multtest
if(!require(multtest, quietly=TRUE)) {
cat("Package:Multtest not available attempting to install\n") install.packages("multtest")
}
data(golub) m <- nrow(golub)
- AML slides=11
n <- 11 r <- 6
m <- nCr(n,r) AML.samples <- list() i <- 1 while(i <= m) {
value <- sort(sample(n, r, replace=FALSE)) key <- paste(value, collapse=" ") if(is.null(AML.sampleskey)) { AML.sampleskey <- value i <-i+1 print(i) }
}
- ALL slides=27
n <- 27 r <- 6
ALL.samples <- list() i <- 1 while(i <= m) {
value <- sort(sample(n, r, replace=FALSE)) key <- paste(value, collapse=" ") if(is.null(ALL.sampleskey)) { ALL.sampleskey <- value i <-i+1 print(i) }
}
- Need to randomise the way the keys are drawn
ALLdraw <- sample(m,m, replace=FALSE) AMLdraw <- sample(m,m, replace=FALSE)
golub.ALL <- golub[,golub.cl==0] golub.AML <- golub[,golub.cl==1]
golub.clSample <- rep(0:1, each=6)
design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample)
- Check and install limma
if(!require(limma, quietly=TRUE)) {
cat("Package:limma not available attempting to install\n") install.packages("limma")
}
- Check and install siggenes
if(!require(siggenes, quietly=TRUE)) {
cat("Package:siggenes not available attempting to install\n") install.packages("siggenes")
}
set.seed(seed) starttime <- proc.time()[3]
- Set up False discovery and False non discovery vectors
- FD <- rep(NA, iiter)
- FND <- rep(NA, iiter)
- Output lists
FDlist <- list(m=m, alpha=alpha, p0=list(),p0NW=list(), convest=list())
- 2) ---------------------------- pi0 outer loop ------------------------------ #
- Main loop for each value of pi0, the proportion of truly Null hypotheses
- for(pi0 in seqpi0)
- {
# False discovery threshold
- if(adaptive)
- {
# Adaptive control adjustment on true pi0 (finner & Roters, 2002)
- alphastar<- alpha/pi0
- } else {
# Non adaptive control
alphastar <- alpha
- }
- m1 <- round(m * (1-pi0))
- m0 <- m - m1
- DEmeans <- c(rep(mu1,m1), rep(mu0,m0))
- DEflag <- as.logical(DEmeans)
- 3) --------------------------- iiter inner loop ----------------------------- #
for(i in seq(iiter)){
# Names for list elements
- pi0char <- as.character(round(pi0, 5))
- 4) ------------------------ Permutation analysis ---------------------------- #
x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]], golub.AML[,AML.samples[[AMLdraw[i]]]] ) fit <- lmFit(x, design.Sample) fit <- eBayes(fit) pvalues <- fit$p.value[,"AMLvsALL"]
- p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
- pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)
- 5) ----------------------------- Saving statistics -------------------------- #
- pi0 estimation
- Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
- Maximum value of lambda needs to be less than 1 estimated by (m-1)/m
if(use.convest) {# Convest estimation FDlist"convest"[i] <- convest(pvalues, niter=50) } # Siggenes or first principles if( use.siggenes ) { require(siggenes, quietly=TRUE) # A) weighting by 1-lambda pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value, ncs.weights=1-lambdaseq)$p0 FDlist"p0"[i] <- round(pi0hat,8) # B) no 1-lambda weighting pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value, ncs.weights=NULL)$p0 FDlist"p0NW"[i] <- round(pi0hat,8) } else { pi0lambda <- tapply(lambdaseq, lambdaseq, function(x){sum(pvalues>x)/(m*(1-x))}) # A) weighting by 1-lambda pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq, df=3)$y[length(lambdaseq)],1) FDlist"p0"[i] <- round(pi0hat,8) # B) no 1-lambda weighting pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, df=3)$y[length(lambdaseq)],1) FDlist"p0NW"[i] <- round(pi0hat,8) }
- if(FDR) {
# Step down fdr Bengamini & Hochberg control
- Porder <- order(pvalues)
# Sorted pvalues
- cutoff <- pvalues[Porder] > (seq(m) * alphastar)/m
#Reordered DEflag
- DEorder <- DEflag[Porder]
- rejected <- DEorder[!cutoff]
- accepted <- DEorder[cutoff]
- FD[i] <- sum(!rejected)
- FND[i] <- sum(accepted)
- }
- }
# Writing to FDlist every iteration of p
save(FDlist, file=file.path(datadir, outfile), compress=T)
} # End of main loop
- Return simulation time
endtime <- proc.time()[3] - starttime return(endtime) }
- fdrsim(outfile = "tmp", m=100, iiter=1000, seqpi0=1, #seqpi0= seq(0,1, length=5),
- alpha = 0.05, adaptive=F, mu0=0, mu1=3, sigma=1,seed=2, Estpi0=TRUE)
- load(file.path("Data", "tmp"))
- source("plotfun.R")
- plotfun(FDlist)