Difference between revisions of "AffyVsInHouse.R"

From Organic Design wiki
 
(Added summary stats etc)
Line 57: Line 57:
 
MA      <- normalizeWithinArrays(RG, method="loess", bc.method="none")
 
MA      <- normalizeWithinArrays(RG, method="loess", bc.method="none")
 
RG.norm <- RG.MA(MA)
 
RG.norm <- RG.MA(MA)
 +
 +
# summary stats
 +
iqr <- function(x, qlims=c(0.25, 0.75)) {
 +
  IQR <- quantile(x, qlims[2]) - quantile(x, qlims[1])
 +
  return(IQR)
 +
}
 +
 +
# Use median and mad for skewed chisq distributions
 +
median(esd)
 +
mean(esd)
 +
 +
mad(esd)
 +
sd(esd)
 +
sqrt(var(esd))
 +
iqr(esd)
 +
 +
calcStats <- function(x, type="median", format = "%4.3f") {
 +
  xStat <- c()
 +
  if(type=="median") {
 +
    medx  <- sprintf(format, median(x))
 +
    xStat <- paste(expression(median(x)), "=", medx, sep=" ")
 +
  } else {
 +
    madx  <- sprintf(format, mad(x))
 +
    xStat <- paste(expression(mad(x)), "=", madx, sep=" ")
 +
  }
 +
  return(xStat)
 +
}
 +
# Test
 +
calcStats(1:10, type="median")
 +
median(1:10)
 +
calcStats(1:10, type="mad")
 +
mad(1:10)
  
 
# --------------------------- Comparison plots -------------------------------- #
 
# --------------------------- Comparison plots -------------------------------- #
Line 87: Line 119:
 
dev.set(2)
 
dev.set(2)
 
hist(emean, main=AffyMain, xlim=c(4,16), xlab="Average signal")
 
hist(emean, main=AffyMain, xlim=c(4,16), xlab="Average signal")
 +
text(x=15, y=1700, label=calcStats(emean, type="median"), adj=c(1,0))
 +
text(x=15, y=1550, label=calcStats(emean, type="mad"),  adj=c(1,0))
 +
 
dev.set(3)
 
dev.set(3)
 
hist(Rmean, main=InHouseMain, xlim=c(4,16), xlab="Cy5 Average signal")
 
hist(Rmean, main=InHouseMain, xlim=c(4,16), xlab="Cy5 Average signal")
 +
text(x=8, y=1700, label=calcStats(Rmean, type="median", format="%2.1f"), adj=c(1,0))
 +
text(x=8, y=1550, label=calcStats(Rmean, type="mad", format="%2.1f"),  adj=c(1,0))
 +
 
Sys.sleep(1)
 
Sys.sleep(1)
 
hist(Gmean, main=InHouseMain, xlim=c(4,16), xlab="Cy3 Average signal")
 
hist(Gmean, main=InHouseMain, xlim=c(4,16), xlab="Cy3 Average signal")
 +
text(x=8, y=1700, label=calcStats(Gmean, type="median", format="%2.1f"), adj=c(1,0))
 +
text(x=8, y=1550, label=calcStats(Gmean, type="mad", format="%2.1f"),  adj=c(1,0))
  
 
# SD distributions
 
# SD distributions
Line 99: Line 139:
 
dev.set(2)
 
dev.set(2)
 
hist(esd, main=AffyMain, breaks = 12, xlim=c(0,1.5), xlab="Standard deviation")
 
hist(esd, main=AffyMain, breaks = 12, xlim=c(0,1.5), xlab="Standard deviation")
 +
text(x=1, y=8000, label=calcStats(esd, type="median"), adj=c(1,0))
 +
text(x=1, y=7300, label=calcStats(esd, type="mad"),  adj=c(1,0))
 +
 
dev.set(3)
 
dev.set(3)
hist(Rsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3 interspot standard deviation")  
+
hist(Rsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3 interspot standard deviation")
 +
text(x=1, y=3000, label=calcStats(Rsd, type="median"), adj=c(1,0))
 +
text(x=1, y=2700, label=calcStats(Rsd, type="mad"),  adj=c(1,0))
 
Sys.sleep(1)
 
Sys.sleep(1)
 
hist(Gsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy5 interspot standard deviation")
 
hist(Gsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy5 interspot standard deviation")
 
+
text(x=1, y=3000, label=calcStats(Gsd, type="median"), adj=c(1,0))
# summary stats
+
text(x=1, y=2700, label=calcStats(Gsd, type="mad"),  adj=c(1,0))
iqr <- function(x, qlims=c(0.25, 0.75)) {
 
  IQR <- quantile(x, qlims[2]) - quantile(x, qlims[1])
 
  return(IQR)
 
}
 
 
 
mean(esd)
 
mean(Rsd)
 
mean(Gsd)
 
 
 
sqrt(mean(esd^2))
 
sqrt(mean(Rsd^2))
 
sqrt(mean(Gsd^2))
 
 
 
iqr(esd)
 
iqr(Rsd)
 
iqr(Gsd)
 
 
 
  
 
# CV
 
# CV
 
dev.set(2)
 
dev.set(2)
 
hist(esd/emean, main=AffyMain, breaks = 12, xlim=c(0,0.2), xlab="Coefficient of variation")
 
hist(esd/emean, main=AffyMain, breaks = 12, xlim=c(0,0.2), xlab="Coefficient of variation")
 +
text(x=0.15, y=6000, label=calcStats(esd/emean, type="median"), adj=c(1,0))
 +
text(x=0.15, y=5500, label=calcStats(esd/emean, type="mad"),  adj=c(1,0))
 +
 
dev.set(3)
 
dev.set(3)
 
hist(Rsd/Rmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy5 interspot coefficient of variation")
 
hist(Rsd/Rmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy5 interspot coefficient of variation")
 +
text(x=0.15, y=3000, label=calcStats(Rsd/Rmean, type="median"), adj=c(1,0))
 +
text(x=0.15, y=2700, label=calcStats(Rsd/Rmean, type="mad"),  adj=c(1,0))
 
Sys.sleep(1)
 
Sys.sleep(1)
 
hist(Gsd/Gmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3 interspot coefficient of variation")
 
hist(Gsd/Gmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3 interspot coefficient of variation")
 +
text(x=0.15, y=3000, label=calcStats(Gsd/Gmean, type="median"), adj=c(1,0))
 +
text(x=0.15, y=2700, label=calcStats(Gsd/Gmean, type="mad"),  adj=c(1,0))
 +
 +
# ---------------------- Empirical validation of Chisq ------------------------ #
 +
df <- 10
 +
ncp <- 0
 +
empiricalSD <- rchisq(50000, df=df, ncp=ncp)
 +
sqrt(var(empiricalSD))
 +
sd(empiricalSD)
 +
mad(empiricalSD)
 +
 +
sqrt(2 * df)
 +
 +
hist(empiricalSD)
 +
qqplot(empiricalSD, esd)

Revision as of 07:55, 17 December 2006

library(limma) library(affy)

packageDescription("limma", field="Version") packageDescription("affy", field="Version")

vignette("affy")

  1. -------------------------------- Affymetrix --------------------------------- #

if(0) { # Change for HortResearch

 dataDir <- "/Volumes/HD2/Max Planck/Data/Affy/DayNight/Celfiles"

} else {

 dataDir  <- "/Users/admin/Desktop/DayNight/Celfiles/"
 Sys.putenv("DISPLAY"=":0")

}

dset<- ReadAffy(filenames=file.path(dataDir, dir(dataDir, pattern=".CEL")), widget = F) # loads CEL files into an affybatch object

un <- ".CEL" # remove extra names sampleNames(dset) <- gsub(un, "", sampleNames(dset))

  1. Obtaining indexes of sampleNames (affy slides) of interest

technicalreps <- grep("00 G048", sampleNames(dset)) techset <- dset[,technicalreps]

  1. Normalization

erma <- rma(techset)

  1. --------------------------------- In house ---------------------------------- #

library(limma)

if(0) { # Change for HortResearch

}else {

 dataDir <- "/Users/admin/Desktop/Directories/VariabilityStudy/Data"

} files <- dir(dataDir, pattern="gpr")

  1. Examine genepix, genepix.median

RG <- read.maimages(files, path=dataDir, source="genepix", wt.fun=wtflags(0))

  1. Visually AC3 and AC4 most similar

pairs(log2(RG$R), pch=".") pairs(log2(RG$G), pch=".")

RG <- RG[,c("AC3","AC4")]

  1. All spots

nrow(RG)

  1. Good spots

apply(RG$weights, 2, sum)

  1. Bad spots

nrow(RG) - apply(RG$weights, 2, sum)

  1. Normalization (loess, printtiploess)

MA <- normalizeWithinArrays(RG, method="loess", bc.method="none") RG.norm <- RG.MA(MA)

  1. summary stats

iqr <- function(x, qlims=c(0.25, 0.75)) {

 IQR <- quantile(x, qlims[2]) - quantile(x, qlims[1])
 return(IQR)

}

  1. Use median and mad for skewed chisq distributions

median(esd) mean(esd)

mad(esd) sd(esd) sqrt(var(esd)) iqr(esd)

calcStats <- function(x, type="median", format = "%4.3f") {

 xStat <- c()
 if(type=="median") {
   medx  <- sprintf(format, median(x))
   xStat <- paste(expression(median(x)), "=", medx, sep=" ")
 } else {
   madx  <- sprintf(format, mad(x))
   xStat <- paste(expression(mad(x)), "=", madx, sep=" ")
 }
 return(xStat)

}

  1. Test

calcStats(1:10, type="median") median(1:10) calcStats(1:10, type="mad") mad(1:10)

  1. --------------------------- Comparison plots -------------------------------- #
  1. Setup

size <- 8 AffyMain <- "Affymetrix technical replication" InHouseMain <- "In house technical replication"

graphics.off() X11(xpos=0, ypos=0, width=size, height=size) X11(xpos=600, ypos=0, width=size, height=size) dev.list()

  1. Pairs plots

pairs(log2(RG.norm$R), pch=".") pairs(log2(RG.norm$G), pch=".")

dev.set(2) plot(exprs(erma), main = AffyMain, pch=".") dev.set(3) plot(log2(RG.norm$R), main = InHouseMain, pch=".")

  1. Histograms
  2. Mean distributions

emean <- apply(exprs(erma), 1, mean) Rmean <- apply(log2(RG.norm$R), 1, mean) Gmean <- apply(log2(RG.norm$G), 1, mean)

dev.set(2) hist(emean, main=AffyMain, xlim=c(4,16), xlab="Average signal") text(x=15, y=1700, label=calcStats(emean, type="median"), adj=c(1,0)) text(x=15, y=1550, label=calcStats(emean, type="mad"), adj=c(1,0))

dev.set(3) hist(Rmean, main=InHouseMain, xlim=c(4,16), xlab="Cy5 Average signal") text(x=8, y=1700, label=calcStats(Rmean, type="median", format="%2.1f"), adj=c(1,0)) text(x=8, y=1550, label=calcStats(Rmean, type="mad", format="%2.1f"), adj=c(1,0))

Sys.sleep(1) hist(Gmean, main=InHouseMain, xlim=c(4,16), xlab="Cy3 Average signal") text(x=8, y=1700, label=calcStats(Gmean, type="median", format="%2.1f"), adj=c(1,0)) text(x=8, y=1550, label=calcStats(Gmean, type="mad", format="%2.1f"), adj=c(1,0))

  1. SD distributions

esd <- apply(exprs(erma), 1, sd) Rsd <- apply(log2(RG.norm$R), 1, sd) Gsd <- apply(log2(RG.norm$G), 1, sd)

dev.set(2) hist(esd, main=AffyMain, breaks = 12, xlim=c(0,1.5), xlab="Standard deviation") text(x=1, y=8000, label=calcStats(esd, type="median"), adj=c(1,0)) text(x=1, y=7300, label=calcStats(esd, type="mad"), adj=c(1,0))

dev.set(3) hist(Rsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy3 interspot standard deviation") text(x=1, y=3000, label=calcStats(Rsd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(Rsd, type="mad"), adj=c(1,0)) Sys.sleep(1) hist(Gsd, main=InHouseMain, breaks = 100, xlim=c(0,1.5), xlab="Cy5 interspot standard deviation") text(x=1, y=3000, label=calcStats(Gsd, type="median"), adj=c(1,0)) text(x=1, y=2700, label=calcStats(Gsd, type="mad"), adj=c(1,0))

  1. CV

dev.set(2) hist(esd/emean, main=AffyMain, breaks = 12, xlim=c(0,0.2), xlab="Coefficient of variation") text(x=0.15, y=6000, label=calcStats(esd/emean, type="median"), adj=c(1,0)) text(x=0.15, y=5500, label=calcStats(esd/emean, type="mad"), adj=c(1,0))

dev.set(3) hist(Rsd/Rmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy5 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(Rsd/Rmean, type="median"), adj=c(1,0)) text(x=0.15, y=2700, label=calcStats(Rsd/Rmean, type="mad"), adj=c(1,0)) Sys.sleep(1) hist(Gsd/Gmean, main=InHouseMain, breaks=80, xlim=c(0,0.2), xlab="Cy3 interspot coefficient of variation") text(x=0.15, y=3000, label=calcStats(Gsd/Gmean, type="median"), adj=c(1,0)) text(x=0.15, y=2700, label=calcStats(Gsd/Gmean, type="mad"), adj=c(1,0))

  1. ---------------------- Empirical validation of Chisq ------------------------ #

df <- 10 ncp <- 0 empiricalSD <- rchisq(50000, df=df, ncp=ncp) sqrt(var(empiricalSD)) sd(empiricalSD) mad(empiricalSD)

sqrt(2 * df)

hist(empiricalSD) qqplot(empiricalSD, esd)