AffyVsInHouse.R

From Organic Design wiki
Revision as of 02:16, 29 May 2007 by Sven (talk | contribs) ({{R}})
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

Code snipits and programs written in R, S or S-PLUS library(limma) library(affy)

packageDescription("limma", field="Version") packageDescription("affy", field="Version")

vignette("affy")

  1. -------------------------------- Affymetrix --------------------------------- #

if(1) { # Change for HortResearch

 dataDir <- "/Volumes/HD2/Max Planck/Data/Affy/DayNight/Celfiles"

} else {

 dataDir  <- "/Users/admin/Desktop/DayNight/Celfiles/"
 Sys.putenv("DISPLAY"=":0")

}

dset<- ReadAffy(filenames=file.path(dataDir, dir(dataDir, pattern=".CEL")), widget = F) # loads CEL files into an affybatch object

un <- ".CEL" # remove extra names sampleNames(dset) <- gsub(un, "", sampleNames(dset))

  1. Obtaining indexes of sampleNames (affy slides) of interest

technicalreps <- grep("00 G048", sampleNames(dset)) techset <- dset[,technicalreps]

  1. Normalization

erma <- rma(techset)

  1. --------------------------------- In house ---------------------------------- #

library(limma)

if(1) { # Change for HortResearch

 dataDir <- "/Volumes/HD2/Max\ Planck/HortResearch/VariabilityStudy/Data"

}else {

 dataDir <- "/Users/admin/Desktop/Directories/VariabilityStudy/Data"

} files <- dir(dataDir, pattern="gpr")

  1. Examine genepix, genepix.median

RG <- read.maimages(files, path=dataDir, source="genepix", wt.fun=wtflags(0))

  1. Visually AC3 and AC4 most similar

pairs(log2(RG$R), pch=".") pairs(log2(RG$G), pch=".") dev.off() RG <- RG[,c("AC3","AC4")]

  1. All spots

nrow(RG)

  1. Good spots

apply(RG$weights, 2, sum)

  1. Bad spots

nrow(RG) - apply(RG$weights, 2, sum)

  1. Normalization (loess, printtiploess)

MA <- normalizeWithinArrays(RG, method="loess", bc.method="none") MA <- normalizeBetweenArrays(MA, method="scale") RG.norm <- RG.MA(MA)

  1. summary stats

iqr <- function(x, qlims=c(0.25, 0.75)) {

 IQR <- quantile(x, qlims[2]) - quantile(x, qlims[1])
 return(IQR)

}

  1. Use median and mad for skewed chisq distributions

calcStats <- function(x, type="median", format = "%2.3f", addText = TRUE) {

 xStat <- c()
 if(type=="median") {
   medx  <- sprintf(format, median(x))
   if(addText){
     xStat <- paste(expression(median(x)), "=", medx, sep=" ")
   } else {
     xStat <- medx
   }
 } else {
   madx  <- sprintf(format, mad(x))
   if(addText){
     xStat <- paste(expression(mad(x)), "=", madx, sep=" ")
   } else {
     xStat <- madx
   }
 }
 return(xStat)

}

  1. Test

calcStats(1:10, type="median") median(1:10) calcStats(1:10, type="mad") mad(1:10)

  1. --------------------------- Comparison plots -------------------------------- #
  1. Setup

size <- 8 AffyMain <- "Affymetrix technical replication" InHouseMain <- "In house technical replication"

graphics.off() X11(xpos=0, ypos=0, width=size, height=size) X11(xpos=600, ypos=0, width=size, height=size) X11(xpos=0, ypos=600+45, width=size/2, height=size/2)

dev.list()

  1. Pairs plots

pairs(log2(RG.norm$R), pch=".") pairs(log2(RG.norm$G), pch=".")

dev.set(2) plot(exprs(erma), main = AffyMain, pch=".") dev.set(3) plot(log2(RG.norm$R), main = InHouseMain, pch=".")

  1. Histograms
  2. Mean distributions

emean <- apply(exprs(erma), 1, mean) Rmean <- apply(log2(RG.norm$R), 1, mean) Gmean <- apply(log2(RG.norm$G), 1, mean)

RG1mean <- apply(log2(cbind(RG.norm$R[,1], RG.norm$G[,2])),1, mean) RG2mean <- apply(log2(cbind(RG.norm$R[,2], RG.norm$G[,1])),1, mean)

dev.set(2) hist(emean, main=AffyMain, xlim=c(4,16), xlab="Average signal") text(x=15, y=1700, label=calcStats(emean, type="median"), adj=c(1,0)) text(x=15, y=1550, label=calcStats(emean, type="mad"), adj=c(1,0))

dev.set(3) hist(Rmean, main=InHouseMain, xlim=c(4,16), xlab="Cy5 Average signal") text(x=8, y=1700, label=calcStats(Rmean, type="median", format="%2.1f"), adj=c(1,0)) text(x=8, y=1550, label=calcStats(Rmean, type="mad", format="%2.1f"), adj=c(1,0))

Sys.sleep(1) hist(Gmean, main=InHouseMain, xlim=c(4,16), xlab="Cy3 Average signal") text(x=8, y=1700, label=calcStats(Gmean, type="median", format="%2.1f"), adj=c(1,0)) text(x=8, y=1550, label=calcStats(Gmean, type="mad", format="%2.1f"), adj=c(1,0))

dev.set(4)

  1. Dye pair not much different in mean signal

hist(RG1mean, main=InHouseMain, xlim=c(4,16), xlab="Cy3/Cy5 Average signal") text(x=11, y=1700, label=calcStats(RG1mean, type="median"), adj=c(1,0)) text(x=11, y=1550, label=calcStats(RG1mean, type="mad"), adj=c(1,0))

hist(RG2mean, main=InHouseMain, xlim=c(4,16), xlab="Cy3/Cy5 Average signal") text(x=11, y=1700, label=calcStats(RG2mean, type="median"), adj=c(1,0)) text(x=11, y=1550, label=calcStats(RG2mean, type="mad"), adj=c(1,0))

  1. SD distributions

esd <- apply(exprs(erma), 1, sd) Rsd <- apply(log2(RG.norm$R), 1, sd) Gsd <- apply(log2(RG.norm$G), 1, sd)

RG1sd <- apply(log2(cbind(RG.norm$R[,1], RG.norm$G[,2])), 1, sd) RG2sd <- apply(log2(cbind(RG.norm$R[,2], RG.norm$G[,1])), 1, sd)


dev.set(2) hist(esd, main=AffyMain, breaks = 12, xlim=c(0,1.5), xlab="Standard deviation") text(x=1, y=8000, label=calcStats(esd, type="median"), adj=c(1,0)) text(x=1, y=7300, label=calcStats(esd, type="mad"), adj=c(1,0))

dev.set(3) hist(Rsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3 interspot standard deviation") text(x=1, y=3000, label=calcStats(Rsd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(Rsd, type="mad"), adj=c(1,0)) Sys.sleep(1) hist(Gsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy5 interspot standard deviation") text(x=1, y=3000, label=calcStats(Gsd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(Gsd, type="mad"), adj=c(1,0))

dev.set(4)

  1. Dye pair seems to add about another 10% noise (0.107/0.113, 0.107/0.114)

hist(RG1sd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3/Cy5 interspot standard deviation") text(x=1, y=3000, label=calcStats(RG1sd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(RG1sd, type="mad"), adj=c(1,0))

hist(RG2sd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3/Cy5 interspot standard deviation") text(x=1, y=3000, label=calcStats(RG2sd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(RG2sd, type="mad"), adj=c(1,0))

  1. CV

dev.set(2) hist(esd/emean, main=AffyMain, breaks = 12, xlim=c(0,0.2), xlab="Coefficient of variation") text(x=0.15, y=6000, label=calcStats(esd/emean, type="median"), adj=c(1,0)) text(x=0.15, y=5500, label=calcStats(esd/emean, type="mad"), adj=c(1,0))

dev.set(3) hist(Rsd/Rmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy5 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(Rsd/Rmean, type="median"), adj=c(1,0)) text(x=0.15, y=2700, label=calcStats(Rsd/Rmean, type="mad"), adj=c(1,0)) Sys.sleep(1) hist(Gsd/Gmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(Gsd/Gmean, type="median"), adj=c(1,0)) text(x=0.15, y=2700, label=calcStats(Gsd/Gmean, type="mad"), adj=c(1,0))

dev.set(4)

  1. Checking a Dye swap, also about a 10% error added dye to dye swap (0.009/0.01)

hist(RG1sd/RG1mean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3/Cy5 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(RG1sd/RG1mean, type="median"), adj=c(1,0)) text(x=0.15, y=2700, label=calcStats(RG1sd/RG1mean, type="mad"), adj=c(1,0))

hist(RG2sd/RG2mean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3/Cy5 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(RG2sd/RG1mean, type="median"), adj=c(1,0)) text(x=0.15, y=2600, label=calcStats(RG2sd/RG1mean, type="mad"), adj=c(1,0))


  1. ------------------------- Summary diagnostics ------------------------------- #
  2. Means

signal <-

 c(
   calcStats(emean, type="median", addText=FALSE),
   calcStats(Rmean, type="median", addText=FALSE),
   calcStats(Gmean, type="median", addText=FALSE),
   calcStats(RG1mean, type="median", addText=FALSE),
   calcStats(RG2mean, type="median", addText=FALSE),
   )

noise <-

 c(
   calcStats(emean, type="mad", addText=FALSE),
   calcStats(Rmean, type="mad", addText=FALSE),
   calcStats(Gmean, type="mad", addText=FALSE),
   calcStats(RG1mean, type="mad", addText=FALSE),
   calcStats(RG2mean, type="mad", addText=FALSE)
   )

xmean <- cbind(signal, noise) rownames(xmean) <- c("Affy", "Cy5", "Cy3","Cy3/Cy5 1", "Cy3/Cy5 2")

  1. SD's

signal <-

 c(
   calcStats(esd, type="median", addText=FALSE),
   calcStats(Rsd, type="median", addText=FALSE),
   calcStats(Gsd, type="median", addText=FALSE),
   calcStats(RG1sd, type="median", addText=FALSE),
   calcStats(RG2sd, type="median", addText=FALSE),
   )

noise <-

 c(
   calcStats(esd, type="mad", addText=FALSE),
   calcStats(Rsd, type="mad", addText=FALSE),
   calcStats(Gsd, type="mad", addText=FALSE),
   calcStats(RG1sd, type="mad", addText=FALSE),
   calcStats(RG2sd, type="mad", addText=FALSE)
   )

xsd <- cbind(signal, noise) rownames(xsd) <- c("Affy", "Cy5", "Cy3","Cy3/Cy5 1", "Cy3/Cy5 2")

  1. CV's

signal <-

 c(
   calcStats(esd/emean, type="median", addText=FALSE),
   calcStats(Rsd/Rmean, type="median", addText=FALSE),
   calcStats(Gsd/Gmean, type="median", addText=FALSE),
   calcStats(RG1sd/RG1mean, type="median", addText=FALSE),
   calcStats(RG2sd/RG2mean, type="median", addText=FALSE),
   )

noise <-

 c(
   calcStats(esd/emean, type="mad", addText=FALSE),
   calcStats(Rsd/Rmean, type="mad", addText=FALSE),
   calcStats(Gsd/Gmean, type="mad", addText=FALSE),
   calcStats(RG1sd/RG1mean, type="mad", addText=FALSE),
   calcStats(RG2sd/RG2mean, type="mad", addText=FALSE)
   )

xcv <- cbind(signal, noise) rownames(xcv) <- c("Affy", "Cy5", "Cy3","Cy3/Cy5 1", "Cy3/Cy5 2")

print(xmean) print(xsd) print(xcv)