Difference between revisions of "Golubsim.R"

Revision as of 02:05, 20 June 2007

Code snipits and programs written in R, S or S-PLUS{{#security:*|Sven}} nCr <- function(n,r) {

 factorial(n) / (factorial(n-r) * factorial(r))

} golubsim <- function(outfile = "tmp", iiter=nCr(11,6), seqpi0=1, alpha = 0.05,

                    adaptive=FALSE, mu0=0, mu1=3, sigma=1, seed=1,
                    datadir = "Data", lambdaseq = seq(0, 0.95, by=0.05),
                    ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE,
                    pad=TRUE, FDR = FALSE)

{

----------------------------------------------------------------------------- #
m = numeric number of genes to simulate
itter = numeric number of iterations
sepi0 = numeric vector of True pi0 values to simulate
alpha = numeric number [0,1] type I probability test cutoff
adaptive = logical Finner adaptive FDR control
use.siggenes = logical to use pi0.est function in siggenes package
lambdaseq = numeric vector lambda range for pi0.est smoothing splines
ncs.value = character string "paper " or "max"
use.convest = logical to use convest function in limma package
pad = logical Populate padded lists for speed
----------------------------------------------------------------------------- #

1) ---------------------------------- Setup --------------------------------- #
Check and create directories for save(...) of objects

if(length(dir(datadir))==0) {

 create.dir(datadir)

}

Check and install multtest

if(!require(multtest, quietly=TRUE)) {

 cat("Package:Multtest not available attempting to install\n")
 install.packages("multtest")

}

data(golub) m <- nrow(golub)

AML slides=11

n <- 11 r <- 6

m <- nCr(n,r) AML.samples <- list() i <- 1 while(i <= m) {

 value <- sort(sample(n, r, replace=FALSE))
 key   <- paste(value, collapse=" ")
 if(is.null(AML.sampleskey)) {
   AML.sampleskey <- value
   i <-i+1
   print(i)
 }

}

ALL slides=27

n <- 27 r <- 6

ALL.samples <- list() i <- 1 while(i <= m) {

 value <- sort(sample(n, r, replace=FALSE))
 key   <- paste(value, collapse=" ")
 if(is.null(ALL.sampleskey)) {
   ALL.sampleskey <- value
   i <-i+1
   print(i)
 }

}

Need to randomise the way the keys are drawn

ALLdraw <- sample(m,m, replace=FALSE) AMLdraw <- sample(m,m, replace=FALSE)

golub.ALL <- golub[,golub.cl==0] golub.AML <- golub[,golub.cl==1]

golub.clSample <- rep(0:1, each=6)

design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample)

Check and install limma

if(!require(limma, quietly=TRUE)) {

 cat("Package:limma not available attempting to install\n")
 install.packages("limma")

}

Check and install siggenes

if(!require(siggenes, quietly=TRUE)) {

 cat("Package:siggenes not available attempting to install\n")
 install.packages("siggenes")

}

set.seed(seed) starttime <- proc.time()[3]

Set up False discovery and False non discovery vectors
FD <- rep(NA, iiter)
FND <- rep(NA, iiter)

Output lists

FDlist <- list(m=m, alpha=alpha, p0=list(),p0NW=list(), convest=list())

2) ---------------------------- pi0 outer loop ------------------------------ #
Main loop for each value of pi0, the proportion of truly Null hypotheses

for(pi0 in seqpi0)
{

 # False discovery threshold

if(adaptive)
{

   # Adaptive control adjustment on true pi0 (finner & Roters, 2002)

alphastar<- alpha/pi0
} else {

   # Non  adaptive control

alphastar <- alpha

}

m1 <- round(m * (1-pi0))
m0 <- m - m1
DEmeans <- c(rep(mu1,m1), rep(mu0,m0))
DEflag <- as.logical(DEmeans)

3) --------------------------- iiter inner loop ----------------------------- #

for(i in seq(iiter)){

     # Names for list elements

pi0char <- as.character(round(pi0, 5))

4) ------------------------ Permutation analysis ---------------------------- #

 x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]],
            golub.AML[,AML.samples[[AMLdraw[i]]]]
            )

 fit <- lmFit(x, design.Sample) 
 fit <- eBayes(fit)
 pvalues <- fit$p.value[,"AMLvsALL"]

p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)

5) ----------------------------- Saving statistics -------------------------- #
pi0 estimation
Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
Maximum value of lambda needs to be less than 1 estimated by (m-1)/m

 if(use.convest) {# Convest estimation
   FDlist"convest"[i] <- convest(pvalues, niter=50)
 }
       
       # Siggenes or first principles
 if( use.siggenes ) {
   require(siggenes, quietly=TRUE)
         # A) weighting by 1-lambda
   pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                     ncs.weights=1-lambdaseq)$p0
   FDlist"p0"[i] <- round(pi0hat,8) 
         # B) no 1-lambda weighting
   pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
                     ncs.weights=NULL)$p0
   FDlist"p0NW"[i] <- round(pi0hat,8)
 } else {
   pi0lambda <- tapply(lambdaseq, lambdaseq,
                       function(x){sum(pvalues>x)/(m*(1-x))})
         # A) weighting by 1-lambda
   pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq, 
                               df=3)$y[length(lambdaseq)],1)
   FDlist"p0"[i] <- round(pi0hat,8) 
         # B) no 1-lambda weighting
   pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
                               df=3)$y[length(lambdaseq)],1)
   FDlist"p0NW"[i] <- round(pi0hat,8) 
 }

if(FDR) {

     # Step down fdr Bengamini & Hochberg control

Porder <- order(pvalues)

     # Sorted pvalues

cutoff <- pvalues[Porder] > (seq(m) * alphastar)/m

     #Reordered DEflag

DEorder <- DEflag[Porder]
rejected <- DEorder[!cutoff]
accepted <- DEorder[cutoff]
FD[i] <- sum(!rejected)
FND[i] <- sum(accepted)
}
}

  # Writing to FDlist every iteration of p

if(FDR) {
FDlist"V"pi0char <- FD
FDlist"T"pi0char <- FND
}

 save(FDlist, file=file.path(datadir, outfile), compress=T)

} # End of main loop

Return simulation time

endtime <- proc.time()[3] - starttime return(endtime) }

fdrsim(outfile = "tmp", m=100, iiter=1000, seqpi0=1, #seqpi0= seq(0,1, length=5),
alpha = 0.05, adaptive=F, mu0=0, mu1=3, sigma=1,seed=2, Estpi0=TRUE)
load(file.path("Data", "tmp"))

source("plotfun.R")
plotfun(FDlist)

Difference between revisions of "Golubsim.R"

Revision as of 02:05, 20 June 2007

Navigation menu

Views

Personal tools

Navigation

Search

Navigation

Blogs

Recent activity

Site map

Tools

@@ Line 1: / Line 1: @@
 # {{R}}{{#security:*|Sven}}
+nCr <- function(n,r) {
+  factorial(n) / (factorial(n-r) * factorial(r))
+}
+golubsim <- function(outfile = "tmp", iiter=nCr(11,6), seqpi0=1, alpha = 0.05,
+                     adaptive=FALSE, mu0=0, mu1=3, sigma=1, seed=1,
+                     datadir = "Data", lambdaseq = seq(0, 0.95, by=0.05),
+                     ncs.value="paper", use.siggenes=TRUE, use.convest = TRUE,
+                     pad=TRUE, FDR = FALSE)
+{
+# ----------------------------------------------------------------------------- #
+# m            = numeric number of genes to simulate
+# itter        = numeric number of iterations
+# sepi0        = numeric vector of True pi0 values to simulate
+# alpha        = numeric number [0,1] type I probability test cutoff
+# adaptive     = logical Finner adaptive FDR control
+# use.siggenes = logical to use pi0.est function in siggenes package
+# lambdaseq    = numeric vector lambda range for pi0.est smoothing splines
+# ncs.value    =  character string "paper  " or "max"
+# use.convest  = logical to use convest function in limma package
+# pad          = logical Populate padded lists for speed
+# ----------------------------------------------------------------------------- #
+# 1) ---------------------------------- Setup --------------------------------- #
+# Check and create directories for save(...) of objects
+if(length(dir(datadir))==0) {
+  create.dir(datadir)
+}
+# Check and install multtest
+if(!require(multtest, quietly=TRUE)) {
+  cat("Package:Multtest not available attempting to install\n")
+  install.packages("multtest")
+}
+data(golub)
+m <- nrow(golub)
+# AML slides=11
+n <- 11
+r <-  6
+m <- nCr(n,r)
+AML.samples <- list()
+i <- 1
+while(i <= m) {
+  value <- sort(sample(n, r, replace=FALSE))
+  key   <- paste(value, collapse=" ")
+  if(is.null(AML.samples[[key]])) {
+    AML.samples[[key]] <- value
+    i <-i+1
+    print(i)
+  }
+}
+# ALL slides=27
+n <- 27
+r <-  6
+ALL.samples <- list()
+i <- 1
+while(i <= m) {
+  value <- sort(sample(n, r, replace=FALSE))
+  key   <- paste(value, collapse=" ")
+  if(is.null(ALL.samples[[key]])) {
+    ALL.samples[[key]] <- value
+    i <-i+1
+    print(i)
+  }
+}
+# Need to randomise the way the keys are drawn
+ALLdraw <- sample(m,m, replace=FALSE)
+AMLdraw <- sample(m,m, replace=FALSE)
+golub.ALL <- golub[,golub.cl==0]
+golub.AML <- golub[,golub.cl==1]
+golub.clSample <- rep(0:1, each=6)
+design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample)
+# Check and install limma
+if(!require(limma, quietly=TRUE)) {
+  cat("Package:limma not available attempting to install\n")
+  install.packages("limma")
+}
+# Check and install siggenes
+if(!require(siggenes, quietly=TRUE)) {
+  cat("Package:siggenes not available attempting to install\n")
+  install.packages("siggenes")
+}
+set.seed(seed)
+starttime <- proc.time()[3]
+# Set up False discovery and False non discovery vectors
+#FD <- rep(NA, iiter)
+#FND <- rep(NA, iiter)
+# Output lists
+FDlist <- list(m=m, alpha=alpha, p0=list(),p0NW=list(), convest=list())
+# 2) ---------------------------- pi0 outer loop ------------------------------ #
+# Main loop for each value of pi0, the proportion of truly Null hypotheses
+#for(pi0 in seqpi0)
+#  {
+  # False discovery threshold
+#    if(adaptive)
+#      {
+    # Adaptive control adjustment on true pi0 (finner & Roters, 2002)
+#        alphastar<- alpha/pi0
+#      } else {
+    # Non  adaptive control
+alphastar <- alpha
+#      }
+#    m1 <- round(m * (1-pi0))
+#    m0 <- m - m1
+#    DEmeans <- c(rep(mu1,m1), rep(mu0,m0))
+#    DEflag <- as.logical(DEmeans)
+# 3) --------------------------- iiter inner loop ----------------------------- #
+for(i in seq(iiter)){
+      # Names for list elements
+#      pi0char <- as.character(round(pi0, 5))
+# 4) ------------------------ Permutation analysis ---------------------------- #
+  x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]],
+             golub.AML[,AML.samples[[AMLdraw[i]]]]
+             )
+  fit <- lmFit(x, design.Sample)
+  fit <- eBayes(fit)
+  pvalues <- fit$p.value[,"AMLvsALL"]
+#  p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
+#  pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)
+# 5) ----------------------------- Saving statistics -------------------------- #
+# pi0 estimation
+# Storey uses seq(0,0.95, by=0.05) for lambda, estimates at pi(0.95)
+# Maximum value of lambda needs to be less than 1 estimated by (m-1)/m
+  if(use.convest) {# Convest estimation
+    FDlist[["convest"]][i] <- convest(pvalues, niter=50)
+  }
+        # Siggenes or first principles
+  if( use.siggenes ) {
+    require(siggenes, quietly=TRUE)
+          # A) weighting by 1-lambda
+    pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
+                      ncs.weights=1-lambdaseq)$p0
+    FDlist[["p0"]][i] <- round(pi0hat,8)
+          # B) no 1-lambda weighting
+    pi0hat <- pi0.est(pvalues, lambdaseq, ncs.value=ncs.value,
+                      ncs.weights=NULL)$p0
+    FDlist[["p0NW"]][i] <- round(pi0hat,8)
+  } else {
+    pi0lambda <- tapply(lambdaseq, lambdaseq,
+                        function(x){sum(pvalues>x)/(m*(1-x))})
+          # A) weighting by 1-lambda
+    pi0hat <- min(smooth.spline(lambdaseq, pi0lambda, w=1-lambdaseq,
+                                df=3)$y[length(lambdaseq)],1)
+    FDlist[["p0"]][i] <- round(pi0hat,8)
+          # B) no 1-lambda weighting
+    pi0hat <- min(smooth.spline(lambdaseq, pi0lambda,
+                                df=3)$y[length(lambdaseq)],1)
+    FDlist[["p0NW"]][i] <- round(pi0hat,8)
+  }
+#      if(FDR) {
+      # Step down fdr Bengamini & Hochberg control
+#        Porder <- order(pvalues)
+      # Sorted pvalues
+#        cutoff <- pvalues[Porder] > (seq(m) * alphastar)/m
+      #Reordered DEflag
+#        DEorder  <- DEflag[Porder]
+#        rejected <- DEorder[!cutoff]
+#        accepted <- DEorder[cutoff]
+#        FD[i]  <- sum(!rejected)
+#        FND[i] <- sum(accepted)
+#      }
+#    }
+   # Writing to FDlist every iteration of p
+#    if(FDR) {
+#      FDlist[["V"]][[pi0char]] <- FD
+#      FDlist[["T"]][[pi0char]] <- FND
+#    }
+  save(FDlist, file=file.path(datadir, outfile), compress=T)
+} # End of main loop
+# Return simulation time
+endtime <- proc.time()[3] - starttime
+return(endtime)
+}
+#fdrsim(outfile = "tmp", m=100, iiter=1000, seqpi0=1, #seqpi0= seq(0,1, length=5),
+# alpha = 0.05, adaptive=F, mu0=0, mu1=3, sigma=1,seed=2, Estpi0=TRUE)
+#load(file.path("Data", "tmp"))
+#source("plotfun.R")
+#plotfun(FDlist)