Difference between revisions of "Golubsim.R"

Revision as of 02:07, 20 June 2007

Code snipits and programs written in R, S or S-PLUS{{#security:*|Sven}} nCr <- function(n,r) {

 factorial(n) / (factorial(n-r) * factorial(r))

} golubsim <- function(outfile = "tmp", iiter=nCr(11,6), seqpi0=1, alpha = 0.05,

                    adaptive=FALSE, mu0=0, mu1=3, sigma=1, seed=1,
                    datadir = "Data", lambdaseq = seq(0, 0.95, by=0.05),
                    ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE,
                    pad=TRUE, FDR = FALSE)

{

----------------------------------------------------------------------------- #
m = numeric number of genes to simulate
itter = numeric number of iterations
sepi0 = numeric vector of True pi0 values to simulate
alpha = numeric number [0,1] type I probability test cutoff
adaptive = logical Finner adaptive FDR control
use.siggenes = logical to use pi0.est function in siggenes package
lambdaseq = numeric vector lambda range for pi0.est smoothing splines
ncs.value = character string "paper " or "max"
use.convest = logical to use convest function in limma package
pad = logical Populate padded lists for speed
----------------------------------------------------------------------------- #

1) ---------------------------------- Setup --------------------------------- #
Check and create directories for save(...) of objects

if(length(dir(datadir))==0) {

 create.dir(datadir)

}

Check and install multtest

if(!require(multtest, quietly=TRUE)) {

 cat("Package:Multtest not available attempting to install\n")
 install.packages("multtest")

}

data(golub) m <- nrow(golub)

AML slides=11

n <- 11 r <- 6

m <- nCr(n,r) AML.samples <- list() i <- 1 while(i <= m) {

 value <- sort(sample(n, r, replace=FALSE))
 key   <- paste(value, collapse=" ")
 if(is.null(AML.sampleskey)) {
   AML.sampleskey <- value
   i <-i+1
   print(i)
 }

}

ALL slides=27

n <- 27 r <- 6

ALL.samples <- list() i <- 1 while(i <= m) {

 value <- sort(sample(n, r, replace=FALSE))
 key   <- paste(value, collapse=" ")
 if(is.null(ALL.sampleskey)) {
   ALL.sampleskey <- value
   i <-i+1
   print(i)
 }

}

Need to randomise the way the keys are drawn

ALLdraw <- sample(m,m, replace=FALSE) AMLdraw <- sample(m,m, replace=FALSE)

golub.ALL <- golub[,golub.cl==0] golub.AML <- golub[,golub.cl==1]

golub.clSample <- rep(0:1, each=6)

design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample)

Check and install limma

if(!require(limma, quietly=TRUE)) {

 cat("Package:limma not available attempting to install\n")
 install.packages("limma")

}

Check and install siggenes

if(!require(siggenes, quietly=TRUE)) {

 cat("Package:siggenes not available attempting to install\n")
 install.packages("siggenes")

}

set.seed(seed) starttime <- proc.time()[3]

Output lists

FDlist <- list(m=m, alpha=alpha, p0=list(),p0NW=list(), convest=list())

alphastar <- alpha # Do we need this?

3) --------------------------- iiter loop ----------------------------- #

for(i in seq(iiter)){

4) ------------------------ Permutation analysis ---------------------------- #

 x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]],
            golub.AML[,AML.samples[[AMLdraw[i]]]]
            )

 fit <- lmFit(x, design.Sample) 
 fit <- eBayes(fit)
 pvalues <- fit$p.value[,"AMLvsALL"]

p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)

5) ----------------------------- Saving statistics -------------------------- #
pi0 estimation
Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
Maximum value of lambda needs to be less than 1 estimated by (m-1)/m

 if(use.convest) {# Convest estimation
   FDlist"convest"[i] <- convest(pvalues, niter=50)
 }
 # Siggenes or first principles
 if( use.siggenes ) {
   require(siggenes, quietly=TRUE)
         # A) weighting by 1-lambda
   pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                     ncs.weights=1-lambdaseq)$p0
   FDlist"p0"[i] <- round(pi0hat,8) 
         # B) no 1-lambda weighting
   pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                     ncs.weights=NULL)$p0
   FDlist"p0NW"[i] <- round(pi0hat,8)
 } else {
   pi0lambda <- tapply(lambdaseq, lambdaseq,
                       function(x){sum(pvalues>x)/(m*(1-x))})
         # A) weighting by 1-lambda
   pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq, 
                               df=3)$y[length(lambdaseq)],1)
   FDlist"p0"[i] <- round(pi0hat,8) 
         # B) no 1-lambda weighting
   pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
                               df=3)$y[length(lambdaseq)],1)
   FDlist"p0NW"[i] <- round(pi0hat,8) 
 }
 save(FDlist, file=file.path(datadir, outfile), compress=T)

} # End of main loop

Return simulation time

endtime <- proc.time()[3] - starttime return(endtime) }

load(file.path("Data", "tmp"))

source("plotfun.R")
plotfun(FDlist)

@@ Line 95: / Line 95: @@
 set.seed(seed)
 starttime <- proc.time()[3]
-# Set up False discovery and False non discovery vectors
-#FD <- rep(NA, iiter)
-#FND <- rep(NA, iiter)
 # Output lists
@@ Line 104: / Line 100: @@
 FDlist <- list(m=m, alpha=alpha, p0=list(),p0NW=list(), convest=list())
+alphastar <- alpha # Do we need this?
-# 2) ---------------------------- pi0 outer loop ------------------------------ #
+# 3) --------------------------- iiter loop ----------------------------- #
-# Main loop for each value of pi0, the proportion of truly Null hypotheses
-#for(pi0 in seqpi0)
-#  {
-  # False discovery threshold
-#    if(adaptive)
-#      {
-    # Adaptive control adjustment on true pi0 (finner & Roters, 2002)
-#        alphastar<- alpha/pi0
-#      } else {
-    # Non  adaptive control
-alphastar <- alpha
-#      }
-#    m1 <- round(m * (1-pi0))
-#    m0 <- m - m1
-#    DEmeans <- c(rep(mu1,m1), rep(mu0,m0))
-#    DEflag <- as.logical(DEmeans)
-# 3) --------------------------- iiter inner loop ----------------------------- #
 for(i in seq(iiter)){
-      # Names for list elements
-#      pi0char <- as.character(round(pi0, 5))
 # 4) ------------------------ Permutation analysis ---------------------------- #
    x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]],
@@ Line 146: / Line 119: @@
 # Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
 # Maximum value of lambda needs to be less than 1 estimated by (m-1)/m
    if(use.convest) {# Convest estimation
      FDlist[["convest"]][i] <- convest(pvalues, niter=50)
    }
+  # Siggenes or first principles
-        # Siggenes or first principles
    if( use.siggenes ) {
      require(siggenes, quietly=TRUE)
@@ Line 174: / Line 145: @@
      FDlist[["p0NW"]][i] <- round(pi0hat,8)
    }
-#      if(FDR) {
-      # Step down fdr Bengamini & Hochberg control
-#        Porder <- order(pvalues)
-      # Sorted pvalues
-#        cutoff <- pvalues[Porder] > (seq(m) * alphastar)/m
-      #Reordered DEflag
-#        DEorder  <- DEflag[Porder]
-#        rejected <- DEorder[!cutoff]
-#        accepted <- DEorder[cutoff]
-#        FD[i]  <- sum(!rejected)
-#        FND[i] <- sum(accepted)
-#      }
-#    }
-   # Writing to FDlist every iteration of p
-#    if(FDR) {
-#      FDlist[["V"]][[pi0char]] <- FD
-#      FDlist[["T"]][[pi0char]] <- FND
-#    }
    save(FDlist, file=file.path(datadir, outfile), compress=T)
 } # End of main loop
@@ Line 201: / Line 153: @@
 }
-#fdrsim(outfile = "tmp", m=100, iiter=1000, seqpi0=1, #seqpi0= seq(0,1, length=5),
-# alpha = 0.05, adaptive=F, mu0=0, mu1=3, sigma=1,seed=2, Estpi0=TRUE)
 #load(file.path("Data", "tmp"))
 #source("plotfun.R")
 #plotfun(FDlist)

Difference between revisions of "Golubsim.R"

Revision as of 02:07, 20 June 2007

Navigation menu

Views

Personal tools

Navigation

Search

Navigation

Blogs

Site map

Tools