Difference between revisions of "FirstPrinciples.R"
(Adding sigma subset example) |
(Test {{R}}) |
||
(5 intermediate revisions by the same user not shown) | |||
Line 1: | Line 1: | ||
+ | # {{R}} | ||
library(limma) | library(limma) | ||
options(digits=2) | options(digits=2) | ||
Line 7: | Line 8: | ||
# ======================================================================== | # ======================================================================== | ||
− | + | simData <- function() { | |
− | + | # Generating a RGList for transformation into MAList | |
− | |||
− | |||
− | |||
− | # | ||
− | |||
+ | set.seed(2) | ||
+ | n <<- 4 | ||
+ | preps <<- 4 | ||
RG <- new("RGList") | RG <- new("RGList") | ||
RG$R <- matrix(rnorm(n*preps,8,2), nc=preps) | RG$R <- matrix(rnorm(n*preps,8,2), nc=preps) | ||
RG$G <- matrix(rnorm(n*preps,8,2), nc=preps) | RG$G <- matrix(rnorm(n*preps,8,2), nc=preps) | ||
− | |||
− | |||
# Ordering genes same ass topTable for convenience | # Ordering genes same ass topTable for convenience | ||
− | RG <- RG[c(4,2,1,3),] | + | RG <<- RG[c(4,2,1,3),] |
− | MA <- MA.RG(RG) | + | MA <<- MA.RG(RG) |
− | colnames(MA$M) <- paste("array", 1:preps, sep="") | + | colnames(MA$M) <<- paste("array", 1:preps, sep="") |
− | rownames(MA$M) <- paste("gene", 1:n, sep="") | + | rownames(MA$M) <<- paste("gene", 1:n, sep="") |
MA$M | MA$M | ||
+ | invisible() | ||
+ | } | ||
+ | |||
+ | |||
+ | simData() | ||
design <- rep(1, preps) | design <- rep(1, preps) | ||
Line 35: | Line 37: | ||
# Checking M values | # Checking M values | ||
− | rowMeans(MA$M) | + | cbind(rowMeans=rowMeans(MA$M), fit$coef) |
# Ordinary t-statistics use unequal variance | # Ordinary t-statistics use unequal variance | ||
ordinary.t <- fit$coef / fit$stdev.unscaled / fit$sigma | ordinary.t <- fit$coef / fit$stdev.unscaled / fit$sigma | ||
ordinary.t | ordinary.t | ||
− | # Checking t-statistics | + | # Checking t-statistics, var.equal T/F is the same |
− | + | check.t <- apply(MA$M,1, function(x){t.test(x, paired=FALSE, | |
− | + | var.equal=TRUE)$statistic}) | |
− | + | as.matrix(check.t) | |
+ | cbind(ordinary.t, check.t) | ||
# Sigma component - sd(x) | # Sigma component - sd(x) | ||
Line 55: | Line 58: | ||
# ======================================================================== | # ======================================================================== | ||
− | # 2) Underlying lm.series fits a linear regression | + | # 2) Underlying lm.series fits a weighted linear regression |
# ======================================================================== | # ======================================================================== | ||
Line 87: | Line 90: | ||
MA <- MA.RG(RG) | MA <- MA.RG(RG) | ||
+ | |||
design <- rep(1, preps) | design <- rep(1, preps) | ||
fit <- lmFit(MA, design=design, weights=MA$weights) | fit <- lmFit(MA, design=design, weights=MA$weights) | ||
Line 93: | Line 97: | ||
topTable(fit, adjust="none") | topTable(fit, adjust="none") | ||
+ | fit$sigma | ||
# Sigma component - sd(x) | # Sigma component - sd(x) | ||
− | MA$M[MA$weights==0] <- NA | + | tmp <- MA$M |
− | + | tmp[MA$weights==0] <- NA | |
− | apply( | + | apply(tmp, 1, sd, na.rm=TRUE) |
# Stderr component - se(x_bar) | # Stderr component - se(x_bar) | ||
drop(fit$stdev.unscaled) * fit$sigma | drop(fit$stdev.unscaled) * fit$sigma | ||
− | apply( | + | apply(tmp, 1, function(x,...){sd(x,...)/sqrt(length(x[!is.na(x)]))}, na.rm=TRUE) |
# ======================================================================== | # ======================================================================== | ||
Line 151: | Line 156: | ||
topTable(fit) | topTable(fit) | ||
− | fit$ | + | # Block structure and 0 correlation identical to no blocking |
+ | fit2 <- lmFit(MA, block=block, correlation=0) | ||
+ | fit2 <- eBayes(fit2) | ||
+ | topTable(fit2) | ||
+ | |||
+ | fit3 <- lmFit(MA, correlation=0, block=block) | ||
+ | fit3 <- eBayes(fit3) | ||
+ | topTable(fit3) | ||
+ | |||
+ | for( cor in seq(0, 0.99, by=0.1) ) { | ||
+ | fit <- lmFit(MA, block=block, correlation=cor) | ||
+ | fit <- eBayes(fit) | ||
+ | cat(sprintf("%4.2f", fit$p.value), "\n") | ||
+ | } | ||
+ | |||
i <- 4 | i <- 4 | ||
Line 233: | Line 252: | ||
# Checking t-statistics (not paired because we are using M values) | # Checking t-statistics (not paired because we are using M values) | ||
− | + | check.t <- apply(exprs(eset),1, function(x){# var.equal T/F is the same | |
− | + | n <- length(x) | |
− | + | t.test(x[(n/2+1):n], x[1:(n/2)], | |
− | + | paired=FALSE, var.equal=TRUE)$statistic}) | |
+ | check.t | ||
# ----------------------- Subsetting out sigma component example ------------------------ # | # ----------------------- Subsetting out sigma component example ------------------------ # | ||
− | MA <- new("MAList") | + | #MA <- new("MAList") |
− | MA$M <- matrix(rnorm(100),nc=4) | + | #MA$M <- matrix(rnorm(100),nc=4) |
fit <- lmFit(MA) | fit <- lmFit(MA) | ||
Line 247: | Line 267: | ||
# Select first gene | # Select first gene | ||
− | as.numeric(rownames(topTable(fit))[1]) | + | rowID <- as.numeric(rownames(topTable(fit))[1]) |
− | fit$sigma[ | + | fit$sigma[ rowID ] <- NA |
topTable(fit, number=1) | topTable(fit, number=1) | ||
Line 256: | Line 276: | ||
topTable(fit, number=1) | topTable(fit, number=1) | ||
− | # Check | + | # Check gene 1 is now gene 4 |
topTable(fit,number=25) | topTable(fit,number=25) | ||
+ | |||
+ | |||
+ | x <- rep(1:4, 2:5) |
Latest revision as of 07:58, 29 May 2007
Code snipits and programs written in R, S or S-PLUS library(limma) options(digits=2) packageDescription("limma", field="Version")
- ========================================================================
- 0) cDNA example analysing M matrix in MAList
- ========================================================================
simData <- function() {
- Generating a RGList for transformation into MAList
set.seed(2) n <<- 4 preps <<- 4
RG <- new("RGList") RG$R <- matrix(rnorm(n*preps,8,2), nc=preps) RG$G <- matrix(rnorm(n*preps,8,2), nc=preps)
- Ordering genes same ass topTable for convenience
RG <<- RG[c(4,2,1,3),]
MA <<- MA.RG(RG) colnames(MA$M) <<- paste("array", 1:preps, sep="") rownames(MA$M) <<- paste("gene", 1:n, sep="") MA$M invisible() }
simData()
design <- rep(1, preps) fit <- lmFit(MA, design=design, weights=NULL) fit$coeff fit <- eBayes(fit) topTable(fit, adjust="none")
- Checking M values
cbind(rowMeans=rowMeans(MA$M), fit$coef)
- Ordinary t-statistics use unequal variance
ordinary.t <- fit$coef / fit$stdev.unscaled / fit$sigma ordinary.t
- Checking t-statistics, var.equal T/F is the same
check.t <- apply(MA$M,1, function(x){t.test(x, paired=FALSE,
var.equal=TRUE)$statistic})
as.matrix(check.t) cbind(ordinary.t, check.t)
- Sigma component - sd(x)
fit$sigma apply(MA$M, 1, sd)
- Stderr component - se(x_bar)
drop(fit$stdev.unscaled) * fit$sigma apply(MA$M, 1, function(x){sd(x)/sqrt(length(x))})
- ========================================================================
- 2) Underlying lm.series fits a weighted linear regression
- ========================================================================
x <- as.matrix(design) Y <- t(MA$M)
- lm.series, no weights usage lm.fit(x,Y)
lmfit <- lm.fit(x,Y) lmfit$coef
- Same as rowmeans if no weights vector provided
rowMeans(MA$M)
- lmFit coefficients
c(fit$coef)
- gene 1, lm.fit(x,y)
lmfit <- lm.fit(x,Y) lmfit$coef
- first principles
solve(t(x)%*%x)%*%t(x)%*%Y
- ========================================================================
- 3) Adding weights to genes
- ========================================================================
RG$weights <- matrix(1, n,preps) RG$weights[row(RG$weights)<col(RG$weights)] <- 0 RG$weights
MA <- MA.RG(RG)
design <- rep(1, preps) fit <- lmFit(MA, design=design, weights=MA$weights) fit$coeff fit <- eBayes(fit) topTable(fit, adjust="none")
fit$sigma
- Sigma component - sd(x)
tmp <- MA$M tmp[MA$weights==0] <- NA apply(tmp, 1, sd, na.rm=TRUE)
- Stderr component - se(x_bar)
drop(fit$stdev.unscaled) * fit$sigma apply(tmp, 1, function(x,...){sd(x,...)/sqrt(length(x[!is.na(x)]))}, na.rm=TRUE)
- ========================================================================
- 3) First principles using lm.wfit
- ========================================================================
- fit on first gene only
- y <- MA$M[1,]
W <- MA$weights
- weighted loop over all four genes lm.wfit(x,y,w) ( remember Y is t(MA$M) )
for(i in 1:preps) {
print(lm.wfit(x, Y[,i], W[i,])$coef)
} fit$coef
- Equivalent subsetting/averaging out 0 weight information
for(i in 1:preps) {
print(Y[,i] %*% W[i,] / sum(W[i,]))
}
- first principles on gene 2
i <- 2 w <- t(MA$weights[i,]) y <- MA$M[i,] solve(t(x)%*%t(w)%*%w%*%x)%*%t(x)%*%t(w)%*%w%*%y
- Modifying the first weight
w[1] <- 0.5 print(lm.wfit(x, y, w)$coef)
- verification by multiplying weights by y
sum(w/sum(w) * y)
- ========================================================================
- Adding blocks, duplicateCorrelation calls randomizedBlockFit (statmod)
- corfit <- duplicateCorrelation(MA, ndups=1, block=c(1,1,2,2))
- fit <- lmFit(MA, block=c(1,1,2,2), correlation=corfit$consensus)
- ========================================================================
- ========================================================================
- 3) Analysis using limma
- ========================================================================
block <- rep(1:2, each=2) corfit <- duplicateCorrelation(MA, ndups=1, block=block) fit <- lmFit(MA, block=block, correlation=corfit$consensus,
weights=RG$weights)
design <- rep(1, preps) fit$coeff fit <- eBayes(fit) topTable(fit)
- Block structure and 0 correlation identical to no blocking
fit2 <- lmFit(MA, block=block, correlation=0) fit2 <- eBayes(fit2) topTable(fit2)
fit3 <- lmFit(MA, correlation=0, block=block) fit3 <- eBayes(fit3) topTable(fit3)
for( cor in seq(0, 0.99, by=0.1) ) {
fit <- lmFit(MA, block=block, correlation=cor) fit <- eBayes(fit) cat(sprintf("%4.2f", fit$p.value), "\n")
}
i <- 4
ndups <- 1
narrays <- preps
A <- factor(rep(1:narrays, rep(ndups, narrays)))
block <- rep(1:2, each=2)
y <- MA$M[i,]
X <- as.matrix(design)
- under independence Z <- model.matrix(~0+factor(1:preps))
Z <- model.matrix(~0+block)
w <- MA$weights[i,] s <- randomizedBlockFit(y, X, Z, only.varcomp = TRUE,
maxit = 20)$varcomp
s2 <- randomizedBlockFit(y, X, Z, w, only.varcomp = TRUE,
maxit = 20)$varcomp
- gls.series by first principles
ub <- unique(block) nblocks <- length(ub) Z <- matrix(block, narrays, nblocks) == matrix(ub, narrays,
nblocks, byrow = TRUE)
V <- Z %*% (corfit$cor * t(Z)) diag(V) <- 1 wrs <- 1/sqrt(drop(MA$weights[i, ]))
cholV <- chol(V) y2 <- backsolve(cholV, y, transpose = TRUE) X2 <- backsolve(cholV, X, transpose = TRUE)
out <- lm.fit(X2, y2)
- Verification
fit$coef[4] out$coef
- ========================================================================
- Section from lm.series which calls lm.fit, and lm.wfit
- ========================================================================
- y <- as.vector(M[i, ])
- obs <- is.finite(y)
- if (sum(obs) > 0) {
- X <- design[obs, , drop = FALSE]
- y <- y[obs]
- if (is.null(weights))
- out <- lm.fit(X, y)
- else {
- w <- as.vector(weights[i, obs])
- out <- lm.wfit(X, y, w)
- }
- ========================================================================
fit <- lmFit(MA) names(fit)
- ebayes adds df's, ted) (moderated) lods, F, and pvalues (usually adjusted)
fit2 <- ebayes(fit) names(fit2)
- ----------------------- Affymetrix approach for t-tests ------------------------ #
library(affy) eset <- new("exprSet") eset@exprs <- cbind(log2(RG$R), log2(RG$G))
design <- cbind(rep(1, preps*2), rep(0:1, each=preps)) fit <- lmFit(eset, design=design, weights=NULL) fit$coeff fit <- eBayes(fit) topTable(fit, coef=2, adjust="none")
fit$coef # First coef is M of group 1, second coef is the mean differences between groups for(i in 1:n) {
print(t.test(eset@exprs[i,1:4], eset@exprs[i,5:8])$est)
}
ordinary.t <- fit$coef / fit$stdev.unscaled / fit$sigma ordinary.t
- Checking t-statistics (not paired because we are using M values)
check.t <- apply(exprs(eset),1, function(x){# var.equal T/F is the same
n <- length(x) t.test(x[(n/2+1):n], x[1:(n/2)], paired=FALSE, var.equal=TRUE)$statistic})
check.t
- ----------------------- Subsetting out sigma component example ------------------------ #
- MA <- new("MAList")
- MA$M <- matrix(rnorm(100),nc=4)
fit <- lmFit(MA) fit <- eBayes(fit) topTable(fit, number=1)
- Select first gene
rowID <- as.numeric(rownames(topTable(fit))[1]) fit$sigma[ rowID ] <- NA
topTable(fit, number=1)
- First gene now different
fit <- eBayes(fit) topTable(fit, number=1)
- Check gene 1 is now gene 4
topTable(fit,number=25)
x <- rep(1:4, 2:5)