Difference between revisions of "ExploreGolub.R"

Latest revision as of 03:08, 19 June 2007

Code snipits and programs written in R, S or S-PLUS {{#security:*|Sven,Mik}} library(multtest) library(locfdr) library(NutriTools) library(limma) library(siggenes)

data(golub)

1) Golub struture - Affymetrix dataset

dim(golub)

golub.cl 0= ALL (27 slides) 1=AML (11 slides)

table(golub.cl)

2) Exploratory analysis
Clustering
Slide 21 different from others in ALL

clusterPlot(t(golub[,golub.cl==0]))

Slide 2 different from others in ALL

clusterPlot(t(golub[,golub.cl==1]))

library(affy) mva.pairs(golub[,golub.cl==1][,1:5], log.it=FALSE)

nbreaks <- 100 x <- rowMeans(golub[,golub.cl==0]) y <- rowMeans(golub[,golub.cl==1])

mdplot(cbind(x,y))

hist(y-x, breaks=nbreaks)

Abundance apears to be a slight mixture (hump more prevelant with background subtraction)
Mixture could be of exponential and low expressing distribution on raw scale

hist((y+x)/2, breaks=nbreaks)

hist(2^(y-x), breaks=nbreaks)

Abundance, observed exponential like decay on raw scale

hist(2^((y+x)/2), breaks=nbreaks) qqplot(rexp(10000,1), 2^((y+x)/2)) abline(0,1)

2) Analysis

design <- cbind("ALL"=1, "AMLvsALL"=golub.cl) fit <- lmFit(golub, design) fit <- eBayes(fit) p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH") pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)

convest(fit$p.value[, "AMLvsALL"]) pi0.est(fit$p.value[, "AMLvsALL"])

Check t is moderated t-statistic

topTable(fit, coef="AMLvsALL", number=1) fit$t[829,"AMLvsALL"]

3) Efrons Bayesian H0 estimation

locfdr(fit$t[,"AMLvsALL"])

z-transformed locfdr

z <- qnorm(pt(fit$t[,"AMLvsALL"], fit$df.residual + fit$df.prior)) locfdr(z)

Sampling of golub.cl

n <- 11 r <- 6

m <- nCr(n,r) AML.samples <- list() i <- 1 while(i <= m) {

 value <- sort(sample(n, r, replace=FALSE))
 key   <- paste(value, collapse=" ")
 if(is.null(AML.sampleskey)) {
   AML.sampleskey <- value
   i <-i+1
   print(i)
 }

}

n <- 27 r <- 6

ALL.samples <- list() i <- 1 while(i <= m) {

 value <- sort(sample(n, r, replace=FALSE))
 key   <- paste(value, collapse=" ")
 if(is.null(ALL.sampleskey)) {
   ALL.sampleskey <- value
   i <-i+1
   print(i)
 }

}

Need to randomise the way the keys are drawn

ALLdraw <- sample(m,m, replace=FALSE) AMLdraw <- sample(m,m, replace=FALSE)

golub.ALL <- golub[,golub.cl==0] golub.AML <- golub[,golub.cl==1]

golub.clSample <- rep(0:1, each=6)

design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample) for(i in 1:10) {

 x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]],
            golub.AML[,AML.samples[[AMLdraw[i]]]]
            )

 fit <- lmFit(x, design.Sample) 
 fit <- eBayes(fit)
 p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
 pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)

 pi0.est(fit$p.value[,"AMLvsALL"])$p0
 convest(fit$p.value[,"AMLvsALL"])

3) Efrons Bayesian H0 estimation

 locfdr(fit$t[,"AMLvsALL"])

}

Difference between revisions of "ExploreGolub.R"

Latest revision as of 03:08, 19 June 2007

Navigation menu

Views

Personal tools

Navigation

Search

Navigation

Blogs

Site map

Tools

@@ Line 1: / Line 1: @@
+# {{R}} {{#security:*|Sven,Mik}}
 library(multtest)
+library(locfdr)
+library(NutriTools)
+library(limma)
+library(siggenes)
 data(golub)
-nbreaks=100
+# 1) Golub struture - Affymetrix dataset
+dim(golub)
+# golub.cl 0= ALL (27 slides) 1=AML (11 slides)
+table(golub.cl)
+# 2) Exploratory analysis
+# Clustering
+# Slide 21 different from others in ALL
+clusterPlot(t(golub[,golub.cl==0]))
+# Slide 2 different from others in ALL
+clusterPlot(t(golub[,golub.cl==1]))
+library(affy)
+mva.pairs(golub[,golub.cl==1][,1:5], log.it=FALSE)
+nbreaks <- 100
 x <- rowMeans(golub[,golub.cl==0])
 y <- rowMeans(golub[,golub.cl==1])
 mdplot(cbind(x,y))
 hist(y-x, breaks=nbreaks)
-hist(2^(y-x), breaks=nbreaks)
 # Abundance apears to be a slight mixture (hump more prevelant with background subtraction)
 # Mixture could be of exponential and low expressing distribution on raw scale
 hist((y+x)/2, breaks=nbreaks)
+hist(2^(y-x), breaks=nbreaks)
 # Abundance, observed exponential like decay on raw scale
 hist(2^((y+x)/2), breaks=nbreaks)
 qqplot(rexp(10000,1), 2^((y+x)/2))
 abline(0,1)
+# 2) Analysis
+design <- cbind("ALL"=1, "AMLvsALL"=golub.cl)
+fit <- lmFit(golub, design)
+fit <- eBayes(fit)
+p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
+pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)
+convest(fit$p.value[, "AMLvsALL"])
+pi0.est(fit$p.value[, "AMLvsALL"])
+# Check t is moderated t-statistic
+topTable(fit, coef="AMLvsALL", number=1)
+fit$t[829,"AMLvsALL"]
+# 3) Efrons Bayesian H0 estimation
+locfdr(fit$t[,"AMLvsALL"])
+# z-transformed locfdr
+z <- qnorm(pt(fit$t[,"AMLvsALL"], fit$df.residual + fit$df.prior))
+locfdr(z)
+# Sampling of golub.cl
+n <- 11
+r <-  6
+m <- nCr(n,r)
+AML.samples <- list()
+i <- 1
+while(i <= m) {
+  value <- sort(sample(n, r, replace=FALSE))
+  key   <- paste(value, collapse=" ")
+  if(is.null(AML.samples[[key]])) {
+    AML.samples[[key]] <- value
+    i <-i+1
+    print(i)
+  }
+}
+n <- 27
+r <-  6
+ALL.samples <- list()
+i <- 1
+while(i <= m) {
+  value <- sort(sample(n, r, replace=FALSE))
+  key   <- paste(value, collapse=" ")
+  if(is.null(ALL.samples[[key]])) {
+    ALL.samples[[key]] <- value
+    i <-i+1
+    print(i)
+  }
+}
+# Need to randomise the way the keys are drawn
+ALLdraw <- sample(m,m, replace=FALSE)
+AMLdraw <- sample(m,m, replace=FALSE)
+golub.ALL <- golub[,golub.cl==0]
+golub.AML <- golub[,golub.cl==1]
+golub.clSample <- rep(0:1, each=6)
+design.Sample <- cbind("ALL"=1, "AMLvsALL"=golub.clSample)
+for(i in 1:10) {
+  x <- cbind(golub.ALL[,ALL.samples[[ALLdraw[i]]]],
+             golub.AML[,AML.samples[[AMLdraw[i]]]]
+             )
+  fit <- lmFit(x, design.Sample)
+  fit <- eBayes(fit)
+  p.adjusted <- p.adjust(fit$p.value[,"AMLvsALL"], method="BH")
+  pGenes <- sum(p.adjusted < 0.05, na.rm=TRUE)
+  pi0.est(fit$p.value[,"AMLvsALL"])$p0
+  convest(fit$p.value[,"AMLvsALL"])
+# 3) Efrons Bayesian H0 estimation
+  locfdr(fit$t[,"AMLvsALL"])
+}