Difference between revisions of "AffyVsInHouse.R"

From Organic Design wiki
(Added Cy3/Cy5 comparison)
(Formatting output changes)
Line 16: Line 16:
 
}
 
}
  
dset<- ReadAffy(filenames=file.path(dataDir, dir(dataDir,
+
dset<- ReadAffy(filenames=file.path(dataDir, dir(dataDir, pattern=".CEL")), widget = F)        # loads CEL files into an affybatch object
pattern=".CEL")), widget = F)        # loads CEL files into an affybatch object
 
  
 
un <- ".CEL"                        # remove extra names
 
un <- ".CEL"                        # remove extra names
Line 45: Line 44:
 
pairs(log2(RG$R), pch=".")
 
pairs(log2(RG$R), pch=".")
 
pairs(log2(RG$G), pch=".")
 
pairs(log2(RG$G), pch=".")
 
+
dev.off()
 
RG <- RG[,c("AC3","AC4")]
 
RG <- RG[,c("AC3","AC4")]
  
Line 57: Line 56:
 
# Normalization (loess, printtiploess)
 
# Normalization (loess, printtiploess)
 
MA      <- normalizeWithinArrays(RG, method="loess", bc.method="none")
 
MA      <- normalizeWithinArrays(RG, method="loess", bc.method="none")
MA      <- noralizeBetweenArrays(MA, method="scale")
+
MA      <- normalizeBetweenArrays(MA, method="scale")
 
RG.norm <- RG.MA(MA)
 
RG.norm <- RG.MA(MA)
  
Line 67: Line 66:
  
 
# Use median and mad for skewed chisq distributions
 
# Use median and mad for skewed chisq distributions
calcStats <- function(x, type="median", format = "%2.3f") {
+
calcStats <- function(x, type="median", format = "%2.3f", addText = TRUE) {
 
   xStat <- c()
 
   xStat <- c()
 
   if(type=="median") {
 
   if(type=="median") {
 
     medx  <- sprintf(format, median(x))
 
     medx  <- sprintf(format, median(x))
     xStat <- paste(expression(median(x)), "=", medx, sep=" ")
+
     if(addText){
 +
      xStat <- paste(expression(median(x)), "=", medx, sep=" ")
 +
    } else {
 +
      xStat <- medx
 +
    }
 
   } else {
 
   } else {
 
     madx  <- sprintf(format, mad(x))
 
     madx  <- sprintf(format, mad(x))
     xStat <- paste(expression(mad(x)), "=", madx, sep=" ")
+
     if(addText){
 +
      xStat <- paste(expression(mad(x)), "=", madx, sep=" ")
 +
    } else {
 +
      xStat <- madx
 +
    }
 
   }
 
   }
 
   return(xStat)
 
   return(xStat)
Line 94: Line 101:
 
X11(xpos=0, ypos=0, width=size, height=size)
 
X11(xpos=0, ypos=0, width=size, height=size)
 
X11(xpos=600, ypos=0, width=size, height=size)
 
X11(xpos=600, ypos=0, width=size, height=size)
 +
X11(xpos=0, ypos=600+45, width=size/2, height=size/2)
 +
 
dev.list()
 
dev.list()
  
Line 133: Line 142:
 
adj=c(1,0))
 
adj=c(1,0))
  
 +
dev.set(4)
 
# Dye pair not much different in mean signal
 
# Dye pair not much different in mean signal
 
hist(RG1mean, main=InHouseMain, xlim=c(4,16), xlab="Cy3/Cy5 Average signal")
 
hist(RG1mean, main=InHouseMain, xlim=c(4,16), xlab="Cy3/Cy5 Average signal")
text(x=8, y=1700, label=calcStats(RG1mean, type="median"), adj=c(1,0))
+
text(x=11, y=1700, label=calcStats(RG1mean, type="median"), adj=c(1,0))
text(x=8, y=1550, label=calcStats(RG1mean, type="mad"),  adj=c(1,0))
+
text(x=11, y=1550, label=calcStats(RG1mean, type="mad"),  adj=c(1,0))
  
 
hist(RG2mean, main=InHouseMain, xlim=c(4,16), xlab="Cy3/Cy5 Average signal")
 
hist(RG2mean, main=InHouseMain, xlim=c(4,16), xlab="Cy3/Cy5 Average signal")
text(x=8, y=1700, label=calcStats(RG2mean, type="median"), adj=c(1,0))
+
text(x=11, y=1700, label=calcStats(RG2mean, type="median"), adj=c(1,0))
text(x=8, y=1550, label=calcStats(RG2mean, type="mad"),  adj=c(1,0))
+
text(x=11, y=1550, label=calcStats(RG2mean, type="mad"),  adj=c(1,0))
  
 
# SD distributions
 
# SD distributions
Line 165: Line 175:
 
text(x=1, y=2700, label=calcStats(Gsd, type="mad"),  adj=c(1,0))
 
text(x=1, y=2700, label=calcStats(Gsd, type="mad"),  adj=c(1,0))
  
 +
dev.set(4)
 
# Dye pair seems to add about another 10% noise (0.107/0.113, 0.107/0.114)
 
# Dye pair seems to add about another 10% noise (0.107/0.113, 0.107/0.114)
 
hist(RG1sd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3/Cy5 interspot standard deviation")
 
hist(RG1sd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3/Cy5 interspot standard deviation")
Line 190: Line 201:
 
text(x=0.15, y=2700, label=calcStats(Gsd/Gmean, type="mad"),  adj=c(1,0))
 
text(x=0.15, y=2700, label=calcStats(Gsd/Gmean, type="mad"),  adj=c(1,0))
  
 +
dev.set(4)
 
# Checking a Dye swap, also about a 10% error added dye to dye swap (0.009/0.01)
 
# Checking a Dye swap, also about a 10% error added dye to dye swap (0.009/0.01)
 
hist(RG1sd/RG1mean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3/Cy5 interspot coefficient of variation")
 
hist(RG1sd/RG1mean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3/Cy5 interspot coefficient of variation")
Line 197: Line 209:
 
hist(RG2sd/RG2mean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3/Cy5 interspot coefficient of variation")
 
hist(RG2sd/RG2mean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3/Cy5 interspot coefficient of variation")
 
text(x=0.15, y=3000, label=calcStats(RG2sd/RG1mean, type="median"), adj=c(1,0))
 
text(x=0.15, y=3000, label=calcStats(RG2sd/RG1mean, type="median"), adj=c(1,0))
text(x=0.15, y=2700, label=calcStats(RG2sd/RG1mean, type="mad"),  adj=c(1,0))
+
text(x=0.15, y=2600, label=calcStats(RG2sd/RG1mean, type="mad"),  adj=c(1,0))
 +
 
 +
 
 +
# ------------------------- Summary diagnostics ------------------------------- #
 +
# Means
 +
signal <-
 +
  c(
 +
    calcStats(emean, type="median", addText=FALSE),
 +
    calcStats(Rmean, type="median", addText=FALSE),
 +
    calcStats(Gmean, type="median", addText=FALSE),
 +
    calcStats(RG1mean, type="median", addText=FALSE),
 +
    calcStats(RG2mean, type="median", addText=FALSE),
 +
    )
 +
noise <-
 +
  c(
 +
    calcStats(emean, type="mad", addText=FALSE),
 +
    calcStats(Rmean, type="mad", addText=FALSE),
 +
    calcStats(Gmean, type="mad", addText=FALSE),
 +
    calcStats(RG1mean, type="mad", addText=FALSE),
 +
    calcStats(RG2mean, type="mad", addText=FALSE)
 +
    )
 +
 
 +
xmean <- cbind(signal, noise)
 +
rownames(xmean) <- c("Affy", "Cy5", "Cy3","Cy3/Cy5 1", "Cy3/Cy5 2")
 +
 
 +
#SD's
 +
signal <-
 +
  c(
 +
    calcStats(esd, type="median", addText=FALSE),
 +
    calcStats(Rsd, type="median", addText=FALSE),
 +
    calcStats(Gsd, type="median", addText=FALSE),
 +
    calcStats(RG1sd, type="median", addText=FALSE),
 +
    calcStats(RG2sd, type="median", addText=FALSE),
 +
    )
 +
noise <-
 +
  c(
 +
    calcStats(esd, type="mad", addText=FALSE),
 +
    calcStats(Rsd, type="mad", addText=FALSE),
 +
    calcStats(Gsd, type="mad", addText=FALSE),
 +
    calcStats(RG1sd, type="mad", addText=FALSE),
 +
    calcStats(RG2sd, type="mad", addText=FALSE)
 +
    )
  
 +
xsd <- cbind(signal, noise)
 +
rownames(xsd) <- c("Affy", "Cy5", "Cy3","Cy3/Cy5 1", "Cy3/Cy5 2")
  
# ---------------------- Empirical validation of Chisq ------------------------ #
+
# CV's
df <- 10
+
signal <-  
ncp <- 0
+
  c(
empiricalSD <- rchisq(50000, df=df, ncp=ncp)
+
    calcStats(esd/emean, type="median", addText=FALSE),
sqrt(var(empiricalSD))
+
    calcStats(Rsd/Rmean, type="median", addText=FALSE),
sd(empiricalSD)
+
    calcStats(Gsd/Gmean, type="median", addText=FALSE),
mad(empiricalSD)
+
    calcStats(RG1sd/RG1mean, type="median", addText=FALSE),
 +
    calcStats(RG2sd/RG2mean, type="median", addText=FALSE),
 +
    )
 +
noise <-  
 +
  c(
 +
    calcStats(esd/emean, type="mad", addText=FALSE),
 +
    calcStats(Rsd/Rmean, type="mad", addText=FALSE),
 +
    calcStats(Gsd/Gmean, type="mad", addText=FALSE),
 +
    calcStats(RG1sd/RG1mean, type="mad", addText=FALSE),
 +
    calcStats(RG2sd/RG2mean, type="mad", addText=FALSE)
 +
    )
  
sqrt(2 * df)
+
xcv <- cbind(signal, noise)
 +
rownames(xcv) <- c("Affy", "Cy5", "Cy3","Cy3/Cy5 1", "Cy3/Cy5 2")
  
hist(empiricalSD)
+
print(xmean)
qqplot(empiricalSD, esd)
+
print(xsd)
 +
print(xcv)

Revision as of 21:11, 17 December 2006

library(limma) library(affy)

packageDescription("limma", field="Version") packageDescription("affy", field="Version")

vignette("affy")

  1. -------------------------------- Affymetrix --------------------------------- #

if(1) { # Change for HortResearch

 dataDir <- "/Volumes/HD2/Max Planck/Data/Affy/DayNight/Celfiles"

} else {

 dataDir  <- "/Users/admin/Desktop/DayNight/Celfiles/"
 Sys.putenv("DISPLAY"=":0")

}

dset<- ReadAffy(filenames=file.path(dataDir, dir(dataDir, pattern=".CEL")), widget = F) # loads CEL files into an affybatch object

un <- ".CEL" # remove extra names sampleNames(dset) <- gsub(un, "", sampleNames(dset))

  1. Obtaining indexes of sampleNames (affy slides) of interest

technicalreps <- grep("00 G048", sampleNames(dset)) techset <- dset[,technicalreps]

  1. Normalization

erma <- rma(techset)

  1. --------------------------------- In house ---------------------------------- #

library(limma)

if(1) { # Change for HortResearch

 dataDir <- "/Volumes/HD2/Max\ Planck/HortResearch/VariabilityStudy/Data"

}else {

 dataDir <- "/Users/admin/Desktop/Directories/VariabilityStudy/Data"

} files <- dir(dataDir, pattern="gpr")

  1. Examine genepix, genepix.median

RG <- read.maimages(files, path=dataDir, source="genepix", wt.fun=wtflags(0))

  1. Visually AC3 and AC4 most similar

pairs(log2(RG$R), pch=".") pairs(log2(RG$G), pch=".") dev.off() RG <- RG[,c("AC3","AC4")]

  1. All spots

nrow(RG)

  1. Good spots

apply(RG$weights, 2, sum)

  1. Bad spots

nrow(RG) - apply(RG$weights, 2, sum)

  1. Normalization (loess, printtiploess)

MA <- normalizeWithinArrays(RG, method="loess", bc.method="none") MA <- normalizeBetweenArrays(MA, method="scale") RG.norm <- RG.MA(MA)

  1. summary stats

iqr <- function(x, qlims=c(0.25, 0.75)) {

 IQR <- quantile(x, qlims[2]) - quantile(x, qlims[1])
 return(IQR)

}

  1. Use median and mad for skewed chisq distributions

calcStats <- function(x, type="median", format = "%2.3f", addText = TRUE) {

 xStat <- c()
 if(type=="median") {
   medx  <- sprintf(format, median(x))
   if(addText){
     xStat <- paste(expression(median(x)), "=", medx, sep=" ")
   } else {
     xStat <- medx
   }
 } else {
   madx  <- sprintf(format, mad(x))
   if(addText){
     xStat <- paste(expression(mad(x)), "=", madx, sep=" ")
   } else {
     xStat <- madx
   }
 }
 return(xStat)

}

  1. Test

calcStats(1:10, type="median") median(1:10) calcStats(1:10, type="mad") mad(1:10)

  1. --------------------------- Comparison plots -------------------------------- #
  1. Setup

size <- 8 AffyMain <- "Affymetrix technical replication" InHouseMain <- "In house technical replication"

graphics.off() X11(xpos=0, ypos=0, width=size, height=size) X11(xpos=600, ypos=0, width=size, height=size) X11(xpos=0, ypos=600+45, width=size/2, height=size/2)

dev.list()

  1. Pairs plots

pairs(log2(RG.norm$R), pch=".") pairs(log2(RG.norm$G), pch=".")

dev.set(2) plot(exprs(erma), main = AffyMain, pch=".") dev.set(3) plot(log2(RG.norm$R), main = InHouseMain, pch=".")

  1. Histograms
  2. Mean distributions

emean <- apply(exprs(erma), 1, mean) Rmean <- apply(log2(RG.norm$R), 1, mean) Gmean <- apply(log2(RG.norm$G), 1, mean)

RG1mean <- apply(log2(cbind(RG.norm$R[,1], RG.norm$G[,2])),1, mean) RG2mean <- apply(log2(cbind(RG.norm$R[,2], RG.norm$G[,1])),1, mean)

dev.set(2) hist(emean, main=AffyMain, xlim=c(4,16), xlab="Average signal") text(x=15, y=1700, label=calcStats(emean, type="median"), adj=c(1,0)) text(x=15, y=1550, label=calcStats(emean, type="mad"), adj=c(1,0))

dev.set(3) hist(Rmean, main=InHouseMain, xlim=c(4,16), xlab="Cy5 Average signal") text(x=8, y=1700, label=calcStats(Rmean, type="median", format="%2.1f"), adj=c(1,0)) text(x=8, y=1550, label=calcStats(Rmean, type="mad", format="%2.1f"), adj=c(1,0))

Sys.sleep(1) hist(Gmean, main=InHouseMain, xlim=c(4,16), xlab="Cy3 Average signal") text(x=8, y=1700, label=calcStats(Gmean, type="median", format="%2.1f"), adj=c(1,0)) text(x=8, y=1550, label=calcStats(Gmean, type="mad", format="%2.1f"), adj=c(1,0))

dev.set(4)

  1. Dye pair not much different in mean signal

hist(RG1mean, main=InHouseMain, xlim=c(4,16), xlab="Cy3/Cy5 Average signal") text(x=11, y=1700, label=calcStats(RG1mean, type="median"), adj=c(1,0)) text(x=11, y=1550, label=calcStats(RG1mean, type="mad"), adj=c(1,0))

hist(RG2mean, main=InHouseMain, xlim=c(4,16), xlab="Cy3/Cy5 Average signal") text(x=11, y=1700, label=calcStats(RG2mean, type="median"), adj=c(1,0)) text(x=11, y=1550, label=calcStats(RG2mean, type="mad"), adj=c(1,0))

  1. SD distributions

esd <- apply(exprs(erma), 1, sd) Rsd <- apply(log2(RG.norm$R), 1, sd) Gsd <- apply(log2(RG.norm$G), 1, sd)

RG1sd <- apply(log2(cbind(RG.norm$R[,1], RG.norm$G[,2])), 1, sd) RG2sd <- apply(log2(cbind(RG.norm$R[,2], RG.norm$G[,1])), 1, sd)


dev.set(2) hist(esd, main=AffyMain, breaks = 12, xlim=c(0,1.5), xlab="Standard deviation") text(x=1, y=8000, label=calcStats(esd, type="median"), adj=c(1,0)) text(x=1, y=7300, label=calcStats(esd, type="mad"), adj=c(1,0))

dev.set(3) hist(Rsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3 interspot standard deviation") text(x=1, y=3000, label=calcStats(Rsd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(Rsd, type="mad"), adj=c(1,0)) Sys.sleep(1) hist(Gsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy5 interspot standard deviation") text(x=1, y=3000, label=calcStats(Gsd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(Gsd, type="mad"), adj=c(1,0))

dev.set(4)

  1. Dye pair seems to add about another 10% noise (0.107/0.113, 0.107/0.114)

hist(RG1sd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3/Cy5 interspot standard deviation") text(x=1, y=3000, label=calcStats(RG1sd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(RG1sd, type="mad"), adj=c(1,0))

hist(RG2sd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3/Cy5 interspot standard deviation") text(x=1, y=3000, label=calcStats(RG2sd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(RG2sd, type="mad"), adj=c(1,0))

  1. CV

dev.set(2) hist(esd/emean, main=AffyMain, breaks = 12, xlim=c(0,0.2), xlab="Coefficient of variation") text(x=0.15, y=6000, label=calcStats(esd/emean, type="median"), adj=c(1,0)) text(x=0.15, y=5500, label=calcStats(esd/emean, type="mad"), adj=c(1,0))

dev.set(3) hist(Rsd/Rmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy5 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(Rsd/Rmean, type="median"), adj=c(1,0)) text(x=0.15, y=2700, label=calcStats(Rsd/Rmean, type="mad"), adj=c(1,0)) Sys.sleep(1) hist(Gsd/Gmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(Gsd/Gmean, type="median"), adj=c(1,0)) text(x=0.15, y=2700, label=calcStats(Gsd/Gmean, type="mad"), adj=c(1,0))

dev.set(4)

  1. Checking a Dye swap, also about a 10% error added dye to dye swap (0.009/0.01)

hist(RG1sd/RG1mean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3/Cy5 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(RG1sd/RG1mean, type="median"), adj=c(1,0)) text(x=0.15, y=2700, label=calcStats(RG1sd/RG1mean, type="mad"), adj=c(1,0))

hist(RG2sd/RG2mean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3/Cy5 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(RG2sd/RG1mean, type="median"), adj=c(1,0)) text(x=0.15, y=2600, label=calcStats(RG2sd/RG1mean, type="mad"), adj=c(1,0))


  1. ------------------------- Summary diagnostics ------------------------------- #
  2. Means

signal <-

 c(
   calcStats(emean, type="median", addText=FALSE),
   calcStats(Rmean, type="median", addText=FALSE),
   calcStats(Gmean, type="median", addText=FALSE),
   calcStats(RG1mean, type="median", addText=FALSE),
   calcStats(RG2mean, type="median", addText=FALSE),
   )

noise <-

 c(
   calcStats(emean, type="mad", addText=FALSE),
   calcStats(Rmean, type="mad", addText=FALSE),
   calcStats(Gmean, type="mad", addText=FALSE),
   calcStats(RG1mean, type="mad", addText=FALSE),
   calcStats(RG2mean, type="mad", addText=FALSE)
   )

xmean <- cbind(signal, noise) rownames(xmean) <- c("Affy", "Cy5", "Cy3","Cy3/Cy5 1", "Cy3/Cy5 2")

  1. SD's

signal <-

 c(
   calcStats(esd, type="median", addText=FALSE),
   calcStats(Rsd, type="median", addText=FALSE),
   calcStats(Gsd, type="median", addText=FALSE),
   calcStats(RG1sd, type="median", addText=FALSE),
   calcStats(RG2sd, type="median", addText=FALSE),
   )

noise <-

 c(
   calcStats(esd, type="mad", addText=FALSE),
   calcStats(Rsd, type="mad", addText=FALSE),
   calcStats(Gsd, type="mad", addText=FALSE),
   calcStats(RG1sd, type="mad", addText=FALSE),
   calcStats(RG2sd, type="mad", addText=FALSE)
   )

xsd <- cbind(signal, noise) rownames(xsd) <- c("Affy", "Cy5", "Cy3","Cy3/Cy5 1", "Cy3/Cy5 2")

  1. CV's

signal <-

 c(
   calcStats(esd/emean, type="median", addText=FALSE),
   calcStats(Rsd/Rmean, type="median", addText=FALSE),
   calcStats(Gsd/Gmean, type="median", addText=FALSE),
   calcStats(RG1sd/RG1mean, type="median", addText=FALSE),
   calcStats(RG2sd/RG2mean, type="median", addText=FALSE),
   )

noise <-

 c(
   calcStats(esd/emean, type="mad", addText=FALSE),
   calcStats(Rsd/Rmean, type="mad", addText=FALSE),
   calcStats(Gsd/Gmean, type="mad", addText=FALSE),
   calcStats(RG1sd/RG1mean, type="mad", addText=FALSE),
   calcStats(RG2sd/RG2mean, type="mad", addText=FALSE)
   )

xcv <- cbind(signal, noise) rownames(xcv) <- c("Affy", "Cy5", "Cy3","Cy3/Cy5 1", "Cy3/Cy5 2")

print(xmean) print(xsd) print(xcv)