Difference between revisions of "Golubsim.R"
From Organic Design wiki
(Comments removed) |
(Bug fixes) |
||
| Line 3: | Line 3: | ||
factorial(n) / (factorial(n-r) * factorial(r)) | factorial(n) / (factorial(n-r) * factorial(r)) | ||
} | } | ||
| − | golubsim <- function(outfile = "tmp", iiter=nCr(11,6) | + | golubsim <- function(outfile = "tmp", iiter=nCr(11,6), alpha = 0.05, seed=1, |
| − | |||
datadir = "Data", lambdaseq = seq(0, 0.95, by=0.05), | datadir = "Data", lambdaseq = seq(0, 0.95, by=0.05), | ||
| − | ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE | + | ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE |
| − | + | ) { | |
| − | { | ||
# ----------------------------------------------------------------------------- # | # ----------------------------------------------------------------------------- # | ||
| − | |||
# itter = numeric number of iterations | # itter = numeric number of iterations | ||
| − | |||
# alpha = numeric number [0,1] type I probability test cutoff | # alpha = numeric number [0,1] type I probability test cutoff | ||
| − | |||
# use.siggenes = logical to use pi0.est function in siggenes package | # use.siggenes = logical to use pi0.est function in siggenes package | ||
# lambdaseq = numeric vector lambda range for pi0.est smoothing splines | # lambdaseq = numeric vector lambda range for pi0.est smoothing splines | ||
# ncs.value = character string "paper " or "max" | # ncs.value = character string "paper " or "max" | ||
# use.convest = logical to use convest function in limma package | # use.convest = logical to use convest function in limma package | ||
| − | |||
# ----------------------------------------------------------------------------- # | # ----------------------------------------------------------------------------- # | ||
# 1) ---------------------------------- Setup --------------------------------- # | # 1) ---------------------------------- Setup --------------------------------- # | ||
# Check and create directories for save(...) of objects | # Check and create directories for save(...) of objects | ||
| − | if(length(dir(datadir))==0) { | + | if(length(dir(datadir))==0) { |
| − | + | create.dir(datadir) | |
| − | } | + | } |
# Check and install multtest | # Check and install multtest | ||
| − | if(!require(multtest, quietly=TRUE)) { | + | if(!require(multtest, quietly=TRUE)) { |
| − | + | cat("Package:Multtest not available attempting to install\n") | |
| − | + | install.packages("multtest") | |
| − | } | + | } |
| − | data(golub) | + | data(golub) |
| − | m <- nrow(golub) | + | m <- nrow(golub) |
# AML slides=11 | # AML slides=11 | ||
| − | n <- 11 | + | n <- 11 |
| − | r <- 6 | + | r <- 6 |
| − | + | ||
| − | AML.samples <- list() | + | AML.samples <- list() |
| − | i <- 1 | + | i <- 1 |
| − | while(i <= | + | while(i <= iiter) { |
| − | + | value <- sort(sample(n, r, replace=FALSE)) | |
| − | + | key <- paste(value, collapse=" ") | |
| − | + | if(is.null(AML.samples[[key]])) { | |
| − | + | AML.samples[[key]] <- value | |
| − | + | i <-i+1 | |
| − | + | # if(!(i %% 10)){ # print every 10th | |
| + | # print(i) | ||
| + | # } | ||
| + | } | ||
} | } | ||
| − | + | ||
# ALL slides=27 | # ALL slides=27 | ||
| − | n <- 27 | + | n <- 27 |
| − | r <- 6 | + | r <- 6 |
| − | + | ||
| − | ALL.samples <- list() | + | ALL.samples <- list() |
| − | i <- 1 | + | i <- 1 |
| − | while(i <= | + | while(i <= iiter) { |
| − | + | value <- sort(sample(n, r, replace=FALSE)) | |
| − | + | key <- paste(value, collapse=" ") | |
| − | + | if(is.null(ALL.samples[[key]])) { | |
| − | + | ALL.samples[[key]] <- value | |
| − | + | i <-i+1 | |
| − | + | # if(!(i %% 10)){ # print every 10th | |
| + | # print(i) | ||
| + | # } | ||
| + | } | ||
} | } | ||
| − | + | cat("Column permutations sampled\n") | |
# Need to randomise the way the keys are drawn | # Need to randomise the way the keys are drawn | ||
| − | ALLdraw <- sample( | + | ALLdraw <- sample(iiter,iiter, replace=FALSE) |
| − | AMLdraw <- sample( | + | AMLdraw <- sample(iiter,iiter, replace=FALSE) |
| − | |||
| − | |||
| − | |||
| − | |||
| − | |||
| − | design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample) | + | golub.ALL <- golub[,golub.cl==0] |
| − | + | golub.AML <- golub[,golub.cl==1] | |
| + | |||
| + | golub.clSample <- rep(0:1, each=6) | ||
| + | |||
| + | design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample) | ||
| + | |||
# Check and install limma | # Check and install limma | ||
| − | if(!require(limma, quietly=TRUE)) { | + | if(!require(limma, quietly=TRUE)) { |
| − | + | cat("Package:limma not available attempting to install\n") | |
| − | + | install.packages("limma") | |
| − | } | + | } |
# Check and install siggenes | # Check and install siggenes | ||
| − | if(!require(siggenes, quietly=TRUE)) { | + | if(!require(siggenes, quietly=TRUE)) { |
| − | + | cat("Package:siggenes not available attempting to install\n") | |
| − | + | install.packages("siggenes") | |
| − | } | + | } |
| − | set.seed(seed) | + | set.seed(seed) |
| − | starttime <- proc.time()[3] | + | starttime <- proc.time()[3] |
# Output lists | # Output lists | ||
| − | FDlist <- list(m=m, alpha=alpha, p0= | + | FDlist <- list(m=m, alpha=alpha, p0=rep(NA,iiter), p0NW=rep(NA,iiter), convest=rep(NA,iiter)) |
| − | alphastar <- alpha # Do we need this? | + | alphastar <- alpha # Do we need this? |
# 3) --------------------------- iiter loop ----------------------------- # | # 3) --------------------------- iiter loop ----------------------------- # | ||
| − | for(i in seq(iiter)){ | + | for(i in seq(iiter)){ |
# 4) ------------------------ Permutation analysis ---------------------------- # | # 4) ------------------------ Permutation analysis ---------------------------- # | ||
| − | + | x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]], | |
| − | + | golub.AML[,AML.samples[[AMLdraw[i]]]] | |
| − | + | ) | |
| − | + | fit <- lmFit(x, design.Sample) | |
| − | + | fit <- eBayes(fit) | |
| − | + | pvalues <- fit$p.value[,"AMLvsALL"] | |
# p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH") | # p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH") | ||
# pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE) | # pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE) | ||
| Line 119: | Line 119: | ||
# Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95) | # Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95) | ||
# Maximum value of lambda needs to be less than 1 estimated by (m-1)/m | # Maximum value of lambda needs to be less than 1 estimated by (m-1)/m | ||
| − | + | if(use.convest) {# Convest estimation | |
| − | + | FDlist[["convest"]][i] <- convest(pvalues, niter=50) | |
| − | + | } | |
# Siggenes or first principles | # Siggenes or first principles | ||
| − | + | if( use.siggenes ) { | |
| − | + | require(siggenes, quietly=TRUE) | |
# A) weighting by 1-lambda | # A) weighting by 1-lambda | ||
| − | + | pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value, | |
| − | + | ncs.weights=1-lambdaseq)$p0 | |
| − | + | FDlist[["p0"]][i] <- round(pi0hat,8) | |
# B) no 1-lambda weighting | # B) no 1-lambda weighting | ||
| − | + | pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value, | |
| − | + | ncs.weights=NULL)$p0 | |
| − | + | FDlist[["p0NW"]][i] <- round(pi0hat,8) | |
| − | + | } else { | |
| − | + | pi0lambda <- tapply(lambdaseq, lambdaseq, | |
| − | + | function(x){sum(pvalues>x)/(m*(1-x))}) | |
# A) weighting by 1-lambda | # A) weighting by 1-lambda | ||
| − | + | pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq, | |
| − | + | df=3)$y[length(lambdaseq)],1) | |
| − | + | FDlist[["p0"]][i] <- round(pi0hat,8) | |
# B) no 1-lambda weighting | # B) no 1-lambda weighting | ||
| − | + | pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, | |
| − | + | df=3)$y[length(lambdaseq)],1) | |
| − | + | FDlist[["p0NW"]][i] <- round(pi0hat,8) | |
| − | + | } | |
| − | + | save(FDlist, file=file.path(datadir, outfile), compress=T) | |
| − | } # End of main loop | + | } # End of main loop |
# Return simulation time | # Return simulation time | ||
| − | endtime <- proc.time()[3] - starttime | + | endtime <- proc.time()[3] - starttime |
| − | return(endtime) | + | return(endtime) |
} | } | ||
Latest revision as of 02:27, 20 June 2007
Code snipits and programs written in R, S or S-PLUS{{#security:*|Sven}} nCr <- function(n,r) {
factorial(n) / (factorial(n-r) * factorial(r))
} golubsim <- function(outfile = "tmp", iiter=nCr(11,6), alpha = 0.05, seed=1,
datadir = "Data", lambdaseq = seq(0, 0.95, by=0.05),
ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE
) {
- ----------------------------------------------------------------------------- #
- itter = numeric number of iterations
- alpha = numeric number [0,1] type I probability test cutoff
- use.siggenes = logical to use pi0.est function in siggenes package
- lambdaseq = numeric vector lambda range for pi0.est smoothing splines
- ncs.value = character string "paper " or "max"
- use.convest = logical to use convest function in limma package
- ----------------------------------------------------------------------------- #
- 1) ---------------------------------- Setup --------------------------------- #
- Check and create directories for save(...) of objects
if(length(dir(datadir))==0) {
create.dir(datadir)
}
- Check and install multtest
if(!require(multtest, quietly=TRUE)) {
cat("Package:Multtest not available attempting to install\n")
install.packages("multtest")
}
data(golub) m <- nrow(golub)
- AML slides=11
n <- 11 r <- 6
AML.samples <- list()
i <- 1
while(i <= iiter) {
value <- sort(sample(n, r, replace=FALSE))
key <- paste(value, collapse=" ")
if(is.null(AML.sampleskey)) {
AML.sampleskey <- value
i <-i+1
- if(!(i %% 10)){ # print every 10th
- print(i)
- }
} }
- ALL slides=27
n <- 27
r <- 6
ALL.samples <- list()
i <- 1
while(i <= iiter) {
value <- sort(sample(n, r, replace=FALSE))
key <- paste(value, collapse=" ")
if(is.null(ALL.sampleskey)) {
ALL.sampleskey <- value
i <-i+1
- if(!(i %% 10)){ # print every 10th
- print(i)
- }
} }
cat("Column permutations sampled\n")
- Need to randomise the way the keys are drawn
ALLdraw <- sample(iiter,iiter, replace=FALSE)
AMLdraw <- sample(iiter,iiter, replace=FALSE)
golub.ALL <- golub[,golub.cl==0]
golub.AML <- golub[,golub.cl==1]
golub.clSample <- rep(0:1, each=6)
design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample)
- Check and install limma
if(!require(limma, quietly=TRUE)) {
cat("Package:limma not available attempting to install\n")
install.packages("limma")
}
- Check and install siggenes
if(!require(siggenes, quietly=TRUE)) {
cat("Package:siggenes not available attempting to install\n")
install.packages("siggenes")
}
set.seed(seed) starttime <- proc.time()[3]
- Output lists
FDlist <- list(m=m, alpha=alpha, p0=rep(NA,iiter), p0NW=rep(NA,iiter), convest=rep(NA,iiter))
alphastar <- alpha # Do we need this?
- 3) --------------------------- iiter loop ----------------------------- #
for(i in seq(iiter)){
- 4) ------------------------ Permutation analysis ---------------------------- #
x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]],
golub.AML[,AML.samples[[AMLdraw[i]]]]
)
fit <- lmFit(x, design.Sample)
fit <- eBayes(fit)
pvalues <- fit$p.value[,"AMLvsALL"]
- p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
- pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)
- 5) ----------------------------- Saving statistics -------------------------- #
- pi0 estimation
- Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
- Maximum value of lambda needs to be less than 1 estimated by (m-1)/m
if(use.convest) {# Convest estimation
FDlist"convest"[i] <- convest(pvalues, niter=50)
}
# Siggenes or first principles
if( use.siggenes ) {
require(siggenes, quietly=TRUE)
# A) weighting by 1-lambda
pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
ncs.weights=1-lambdaseq)$p0
FDlist"p0"[i] <- round(pi0hat,8)
# B) no 1-lambda weighting
pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
ncs.weights=NULL)$p0
FDlist"p0NW"[i] <- round(pi0hat,8)
} else {
pi0lambda <- tapply(lambdaseq, lambdaseq,
function(x){sum(pvalues>x)/(m*(1-x))})
# A) weighting by 1-lambda
pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq,
df=3)$y[length(lambdaseq)],1)
FDlist"p0"[i] <- round(pi0hat,8)
# B) no 1-lambda weighting
pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
df=3)$y[length(lambdaseq)],1)
FDlist"p0NW"[i] <- round(pi0hat,8)
}
save(FDlist, file=file.path(datadir, outfile), compress=T)
} # End of main loop
- Return simulation time
endtime <- proc.time()[3] - starttime return(endtime)
}
- load(file.path("Data", "tmp"))
- source("plotfun.R")
- plotfun(FDlist)



