Difference between revisions of "FirstPrinciples.R"

Latest revision as of 07:58, 29 May 2007

Code snipits and programs written in R, S or S-PLUS library(limma) options(digits=2) packageDescription("limma", field="Version")

========================================================================
0) cDNA example analysing M matrix in MAList
========================================================================

simData <- function() {

Generating a RGList for transformation into MAList

 set.seed(2)
 n     <<- 4
 preps <<- 4

RG <- new("RGList") RG$R <- matrix(rnorm(n*preps,8,2), nc=preps) RG$G <- matrix(rnorm(n*preps,8,2), nc=preps)

Ordering genes same ass topTable for convenience

RG <<- RG[c(4,2,1,3),]

MA <<- MA.RG(RG) colnames(MA$M) <<- paste("array", 1:preps, sep="") rownames(MA$M) <<- paste("gene", 1:n, sep="") MA$M invisible() }

simData()

design <- rep(1, preps) fit <- lmFit(MA, design=design, weights=NULL) fit$coeff fit <- eBayes(fit) topTable(fit, adjust="none")

Checking M values

cbind(rowMeans=rowMeans(MA$M), fit$coef)

Ordinary t-statistics use unequal variance

ordinary.t <- fit$coef / fit$stdev.unscaled / fit$sigma ordinary.t

Checking t-statistics, var.equal T/F is the same

check.t <- apply(MA$M,1, function(x){t.test(x, paired=FALSE,

                                           var.equal=TRUE)$statistic})

as.matrix(check.t) cbind(ordinary.t, check.t)

Sigma component - sd(x)

fit$sigma apply(MA$M, 1, sd)

Stderr component - se(x_bar)

drop(fit$stdev.unscaled) * fit$sigma apply(MA$M, 1, function(x){sd(x)/sqrt(length(x))})

========================================================================
2) Underlying lm.series fits a weighted linear regression
========================================================================

x <- as.matrix(design) Y <- t(MA$M)

lm.series, no weights usage lm.fit(x,Y)

lmfit <- lm.fit(x,Y) lmfit$coef

Same as rowmeans if no weights vector provided

rowMeans(MA$M)

lmFit coefficients

c(fit$coef)

gene 1, lm.fit(x,y)

lmfit <- lm.fit(x,Y) lmfit$coef

first principles

solve(t(x)%*%x)%*%t(x)%*%Y

========================================================================
3) Adding weights to genes
========================================================================

RG$weights <- matrix(1, n,preps) RG$weights[row(RG$weights)<col(RG$weights)] <- 0 RG$weights

MA <- MA.RG(RG)

design <- rep(1, preps) fit <- lmFit(MA, design=design, weights=MA$weights) fit$coeff fit <- eBayes(fit) topTable(fit, adjust="none")

fit$sigma

Sigma component - sd(x)

tmp <- MA$M tmp[MA$weights==0] <- NA apply(tmp, 1, sd, na.rm=TRUE)

Stderr component - se(x_bar)

drop(fit$stdev.unscaled) * fit$sigma apply(tmp, 1, function(x,...){sd(x,...)/sqrt(length(x[!is.na(x)]))}, na.rm=TRUE)

========================================================================
3) First principles using lm.wfit
========================================================================

fit on first gene only
y <- MA$M[1,]

W <- MA$weights

weighted loop over all four genes lm.wfit(x,y,w) ( remember Y is t(MA$M) )

for(i in 1:preps) {

 print(lm.wfit(x, Y[,i], W[i,])$coef)

} fit$coef

Equivalent subsetting/averaging out 0 weight information

for(i in 1:preps) {

 print(Y[,i] %*% W[i,] / sum(W[i,]))

}

first principles on gene 2

i <- 2 w <- t(MA$weights[i,]) y <- MA$M[i,] solve(t(x)%*%t(w)%*%w%*%x)%*%t(x)%*%t(w)%*%w%*%y

Modifying the first weight

w[1] <- 0.5 print(lm.wfit(x, y, w)$coef)

verification by multiplying weights by y

sum(w/sum(w) * y)

========================================================================
Adding blocks, duplicateCorrelation calls randomizedBlockFit (statmod)
corfit <- duplicateCorrelation(MA, ndups=1, block=c(1,1,2,2))
fit <- lmFit(MA, block=c(1,1,2,2), correlation=corfit$consensus)
========================================================================
========================================================================
3) Analysis using limma
========================================================================

block <- rep(1:2, each=2) corfit <- duplicateCorrelation(MA, ndups=1, block=block) fit <- lmFit(MA, block=block, correlation=corfit$consensus,

            weights=RG$weights)

design <- rep(1, preps) fit$coeff fit <- eBayes(fit) topTable(fit)

Block structure and 0 correlation identical to no blocking

fit2 <- lmFit(MA, block=block, correlation=0) fit2 <- eBayes(fit2) topTable(fit2)

fit3 <- lmFit(MA, correlation=0, block=block) fit3 <- eBayes(fit3) topTable(fit3)

for( cor in seq(0, 0.99, by=0.1) ) {

 fit <- lmFit(MA, block=block, correlation=cor)
 fit <- eBayes(fit)
 cat(sprintf("%4.2f", fit$p.value), "\n")

}

i <- 4 ndups <- 1 narrays <- preps A <- factor(rep(1:narrays, rep(ndups, narrays))) block <- rep(1:2, each=2) y <- MA$M[i,] X <- as.matrix(design)

under independence Z <- model.matrix(~0+factor(1:preps))

Z <- model.matrix(~0+block)

w <- MA$weights[i,] s <- randomizedBlockFit(y, X, Z, only.varcomp = TRUE,

                 maxit = 20)$varcomp

s2 <- randomizedBlockFit(y, X, Z, w, only.varcomp = TRUE,

                 maxit = 20)$varcomp

gls.series by first principles

ub <- unique(block) nblocks <- length(ub) Z <- matrix(block, narrays, nblocks) == matrix(ub, narrays,

            nblocks, byrow = TRUE)

V <- Z %*% (corfit$cor * t(Z)) diag(V) <- 1 wrs <- 1/sqrt(drop(MA$weights[i, ]))

cholV <- chol(V) y2 <- backsolve(cholV, y, transpose = TRUE) X2 <- backsolve(cholV, X, transpose = TRUE)

       out <- lm.fit(X2, y2)

Verification

fit$coef[4] out$coef

========================================================================
Section from lm.series which calls lm.fit, and lm.wfit
========================================================================
y <- as.vector(M[i, ])
obs <- is.finite(y)
if (sum(obs) > 0) {
X <- design[obs, , drop = FALSE]
y <- y[obs]
if (is.null(weights))
out <- lm.fit(X, y)
else {
w <- as.vector(weights[i, obs])
out <- lm.wfit(X, y, w)
}
========================================================================

fit <- lmFit(MA) names(fit)

ebayes adds df's, ted) (moderated) lods, F, and pvalues (usually adjusted)

fit2 <- ebayes(fit) names(fit2)

----------------------- Affymetrix approach for t-tests ------------------------ #

library(affy) eset <- new("exprSet") eset@exprs <- cbind(log2(RG$R), log2(RG$G))

design <- cbind(rep(1, preps*2), rep(0:1, each=preps)) fit <- lmFit(eset, design=design, weights=NULL) fit$coeff fit <- eBayes(fit) topTable(fit, coef=2, adjust="none")

fit$coef # First coef is M of group 1, second coef is the mean differences between groups for(i in 1:n) {

 print(t.test(eset@exprs[i,1:4], eset@exprs[i,5:8])$est)

}

ordinary.t <- fit$coef / fit$stdev.unscaled / fit$sigma ordinary.t

Checking t-statistics (not paired because we are using M values)

check.t <- apply(exprs(eset),1, function(x){# var.equal T/F is the same

                                           n <- length(x)
                                           t.test(x[(n/2+1):n], x[1:(n/2)],
                                           paired=FALSE, var.equal=TRUE)$statistic})

check.t

----------------------- Subsetting out sigma component example ------------------------ #

MA <- new("MAList")
MA$M <- matrix(rnorm(100),nc=4)

fit <- lmFit(MA) fit <- eBayes(fit) topTable(fit, number=1)

Select first gene

rowID <- as.numeric(rownames(topTable(fit))[1]) fit$sigma[ rowID ] <- NA

topTable(fit, number=1)

First gene now different

fit <- eBayes(fit) topTable(fit, number=1)

Check gene 1 is now gene 4

topTable(fit,number=25)

x <- rep(1:4, 2:5)

Difference between revisions of "FirstPrinciples.R"

Latest revision as of 07:58, 29 May 2007

Navigation menu

Views

Personal tools

Navigation

Search

Navigation

Blogs

Site map

Tools

@@ Line 1: / Line 1: @@
+# {{R}}
 library(limma)
 options(digits=2)
@@ Line 7: / Line 8: @@
 # ========================================================================
-set.seed(2)
+simData <- function() {
-n     <- 4
+#  Generating a RGList for transformation into MAList
-preps <- 4
-# ========================================================================
-# 1) Generating a RGList for transformation into MAList
-# ========================================================================
+  set.seed(2)
+  n     <<- 4
+  preps <<- 4
 RG <- new("RGList")
 RG$R <- matrix(rnorm(n*preps,8,2), nc=preps)
 RG$G <- matrix(rnorm(n*preps,8,2), nc=preps)
-RG$printer <-  structure(list(ngrid.c=1, ngrid.r=1, nspot.c=2, nspot.r=2),
-                         class="printlayout")
 # Ordering genes same ass topTable for convenience
-RG <- RG[c(4,2,1,3),]
+RG <<- RG[c(4,2,1,3),]
-MA <- MA.RG(RG)
+MA <<- MA.RG(RG)
-colnames(MA$M) <- paste("array", 1:preps, sep="")
+colnames(MA$M) <<- paste("array", 1:preps, sep="")
-rownames(MA$M) <- paste("gene", 1:n, sep="")
+rownames(MA$M) <<- paste("gene", 1:n, sep="")
 MA$M
+invisible()
+}
+simData()
 design <- rep(1, preps)
@@ Line 35: / Line 37: @@
 # Checking M values
-rowMeans(MA$M)
+cbind(rowMeans=rowMeans(MA$M), fit$coef)
 # Ordinary t-statistics use unequal variance
 ordinary.t <- fit$coef / fit$stdev.unscaled / fit$sigma
 ordinary.t
-# Checking t-statistics (not paired because we are using M values)
+# Checking t-statistics, var.equal T/F is the same
-for(i in 1:n) { # var.equal T/F is the same
+check.t <- apply(MA$M,1, function(x){t.test(x, paired=FALSE,
-  print(t.test(MA$M[i,], paired=FALSE, var.equal=TRUE)$statistic)
+                                            var.equal=TRUE)$statistic})
-}
+as.matrix(check.t)
+cbind(ordinary.t, check.t)
 # Sigma component - sd(x)
@@ Line 55: / Line 58: @@
 # ========================================================================
-# 2) Underlying lm.series fits a linear regression (weighted)
+# 2) Underlying lm.series fits a weighted linear regression
 # ========================================================================
@@ Line 87: / Line 90: @@
 MA <- MA.RG(RG)
 design <- rep(1, preps)
 fit <- lmFit(MA, design=design, weights=MA$weights)
@@ Line 93: / Line 97: @@
 topTable(fit, adjust="none")
+fit$sigma
 # Sigma component - sd(x)
-MA$M[MA$weights==0] <- NA
+tmp <- MA$M
-fit$sigma
+tmp[MA$weights==0] <- NA
-apply(MA$M, 1, sd, na.rm=TRUE)
+apply(tmp, 1, sd, na.rm=TRUE)
 # Stderr component - se(x_bar)
 drop(fit$stdev.unscaled) * fit$sigma
-apply(MA$M, 1, function(x,...){sd(x,...)/sqrt(length(x[!is.na(x)]))}, na.rm=TRUE)
+apply(tmp, 1, function(x,...){sd(x,...)/sqrt(length(x[!is.na(x)]))}, na.rm=TRUE)
 # ========================================================================
@@ Line 151: / Line 156: @@
 topTable(fit)
-fit$coef
+# Block structure and 0 correlation identical to no blocking
+fit2 <- lmFit(MA, block=block, correlation=0)
+fit2 <- eBayes(fit2)
+topTable(fit2)
+fit3 <- lmFit(MA, correlation=0, block=block)
+fit3 <- eBayes(fit3)
+topTable(fit3)
+for( cor in seq(0, 0.99, by=0.1) ) {
+  fit <- lmFit(MA, block=block, correlation=cor)
+  fit <- eBayes(fit)
+  cat(sprintf("%4.2f", fit$p.value), "\n")
+}
 i <- 4
@@ Line 233: / Line 252: @@
 # Checking t-statistics (not paired because we are using M values)
-for(i in 1:n) { # var.equal T/F is the same
+check.t <- apply(exprs(eset),1, function(x){# var.equal T/F is the same
-  print(t.test(exprs(eset)[i,(1:n)+preps],exprs(eset)[i,(1:n)] , paired=FALSE, var.equal=TRUE)$statistic)
+                                            n <- length(x)
-}
+                                            t.test(x[(n/2+1):n], x[1:(n/2)],
+                                            paired=FALSE, var.equal=TRUE)$statistic})
+check.t
 #  ----------------------- Subsetting out sigma component example ------------------------ #
-MA <- new("MAList")
+#MA <- new("MAList")
-MA$M <- matrix(rnorm(100),nc=4)
+#MA$M <- matrix(rnorm(100),nc=4)
 fit <- lmFit(MA)
@@ Line 247: / Line 267: @@
 # Select first gene
-as.numeric(rownames(topTable(fit))[1])
+rowID <- as.numeric(rownames(topTable(fit))[1])
-fit$sigma[as.numeric(rownames(topTable(fit))[1])] <- NA
+fit$sigma[ rowID ] <- NA
 topTable(fit, number=1)
@@ Line 256: / Line 276: @@
 topTable(fit, number=1)
-# Check
+# Check gene 1 is now gene 4
 topTable(fit,number=25)
+x <- rep(1:4, 2:5)