GDNA analysis.R
Code snipits and programs written in R, S or S-PLUS library(limma) source("plotDensities.R") dataDir <- "/Volumes/HD2/Data/Rosaceae/GPR" files <- dir(dataDir, pattern=".gpr") printRun <- substr(files,0,2)
for( i in unique(printRun) ) {
fname <- paste("RG.",i,sep="") tmp <- read.maimages(files=files[printRun==i], path=dataDir, source="genepix")
- assign(paste("RG.",i,sep=""), tmp)
assign(fname, tmp)
} save.image()
- Read in mapping file
mappingfname <- "gDNACy3Cy5mapping.csv" targets <- read.csv(file.path(dataDir, mappingfname), as.is=TRUE)
targets <- targets[-(61:64),]
for( i in unique(printRun)[-2]) {
RGname <- paste("RG.",i, sep="") OBJ <- get(RGname)
- Grab names of mapping files
Cy3files <- targets$gDNA.Cy3[grep(i, targets$gDNA.Cy3)] Cy5files <- targets$gDNA.Cy5[grep(i, targets$gDNA.Cy5)]
- Check files are in loaded RG.AC object
Cy5common <- Cy5files[ Cy5files %in% colnames(OBJ) ] Cy3common <- Cy3files[ Cy3files %in% colnames(OBJ) ]
- Subset
- ? plotDensities
- singlechannels: vector of integers indicating which individual-channels
- will be selected to be plotted. Values correspond to the
- columns of the matrix of 'cbind(R,G)' and range between
- '1:ncol(R)' for red channels and '(
- (ncol(R)+1):(ncol(R)+ncol(G)) )' for the green channels in
- 'object'. Defaults to all channels.
graphics.off() X11(width=6, height=6, xpos=1, ypos=1) X11(width=6, height=6, xpos=1, ypos=493) X11(width=6, height=6, xpos=450, ypos=1) X11(width=6, height=6, xpos=450, ypos=493)
ylim <- c(0,0.3)
dev.set(2) nCy5 <- ncol(OBJ[,Cy5common]) nCy3 <- ncol(OBJ[,Cy3common])
plotDensities(OBJ[,Cy5common], singlechannels=1:nCy5, ylim=ylim) mtext(paste(RGname, "Red Genomic"),3)
dev.set(3) plotDensities(OBJ[,Cy5common], singlechannels=(nCy5+1):(2*nCy5), ylim=ylim) mtext(paste(RGname,"Green RNA"),3)
dev.set(4) plotDensities(OBJ[,Cy3common], singlechannels=(nCy3+1):(2*nCy3), ylim=ylim) mtext(paste(RGname,"Green Genomic"),3)
dev.set(5) plotDensities(OBJ[,Cy3common], singlechannels=1:nCy3, ylim=ylim) mtext(paste(RGname,"Red RNA"),3) Sys.sleep(5) }
ls(pattern="RG\\.")
identical(RG.AC$genes, RG.AF$genes) # AC same as AF print only identical(RG.AF$genes, RG.AL$genes) identical(RG.AL$genes, RG.AO$genes) identical(RG.AO$genes, RG.AQ$genes) identical(RG.AQ$genes, RG.AU$genes) identical(RG.AU$genes, RG.BC$genes) identical(RG.BC$genes, RG.BE$genes) identical(RG.BE$genes, RG.BM$genes)
- Merging the print runs
tmp <- merge.RGList(RG.AC, RG.AF) tmp <- merge.RGList(tmp, RG.AL) tmp <- merge.RGList(tmp, RG.AO) tmp <- merge.RGList(tmp, RG.AQ) tmp <- merge.RGList(tmp, RG.AU) tmp <- merge.RGList(tmp, RG.BC) tmp <- merge.RGList(tmp, RG.BE) RG <- merge.RGList(tmp, RG.BM)
match(substr(colnames(RG),1,2), substr(ls(pattern="RG\\."),4,5))
- Quick check that merging is working
cbind(RG.AC$R[match(RG.AC$genes$Name, tmp$genes$Name),], tmp$R[match(tmp$genes$Name, RG.AC$genes$Name),])[1:1000,c("AC1", "AC1")]
- Robert Schaffers approach to normalizing gDNA/RNA dye swap pairs
- ----------------------------------------------------------------------------- #
- -------------------- Genomic DNA Normalisation Script ---------------------- #
- ------ Need microarray .gpr files from Genepix or other software package ---- #
- ------ SlideSummary file set up as described in limma ---------------------- #
- ------ >= R-2.0.0 (URL http://cran.r-project.org/) ------------------------ #
- -- limma package version 1.8.6 or above (URL http://www.bioconductor.org/) - #
- ----------------------------------------------------------------------------- #
gDNA_reference_norm<-function(Rawdata){ ### Select the gDNA channels and RNA channels into two separate files gDNA<-cbind(Rawdata$R[,targets[grep("gDNA",targets[,4],),1]], Rawdata$G[,targets[grep("gDNA",targets[,3],),1]]) RNA<-cbind(Rawdata$G[,targets[grep("RNA",targets[,3],),1]], Rawdata$R[,targets[grep("RNA",targets[,4],),1]])
### reorder so dye swaps are next to each other
gDNA<-gDNA[,match(targets[,2],dimnames(gDNA)2)] RNA<-RNA[,match(targets[,2],dimnames(RNA)2)]
### floor intensities that are less than 100 to 100
temp<-gDNA < 100 gDNA[temp] <- 100 temp<-RNA < 100 RNA[temp] <- 100
### flagged spots converted to NA (remove from analysis)
gDNA[Rawdata$weights == 0] <- NA RNA[Rawdata$weights == 0] <- NA
### global mean normalise
for (i in 1:length(gDNA[1,])){ gDNA[,i]<- 2^(log(gDNA[,i],2)- mean(log(gDNA[,i],2),na.rm=T)) }
for (i in 1:length(RNA[1,])){ RNA[,i]<- 2^(log(RNA[,i],2)- mean(log(RNA[,i],2),na.rm=T)) }
### normalise Quantiles (script within limma)
gDNAnorm<-normalizeQuantiles(gDNA) RNAnorm<-normalizeQuantiles(RNA)
### create ratios of RNA compared to gDNA (in log base 2 space)
RNAgDNArat<-RNAnorm/gDNAnorm RNAgDNAratlog<-log(RNAgDNArat,2)
### loess smooth M values
RNAgDNAratloglo<-matrix(data=NA,length(RNAgDNAratlog[,1]),length(RNAgDNAratlog[1,])) for (i in seq(1,length(RNAgDNAratlog[1,])-1,by=2)){ tempc<-matrix(data=NA,length(RNAgDNAratlog[,1]),2) tempa<-RNAgDNAratlog[,c(i,i+1)] tempb<-is.na(tempa) tempnam<-paste("a",1:length(RNAgDNAratlog[,1]),sep="") row.names(tempa)<-tempnam tempa[tempb[,1],1]<- tempa[tempb[,1],2] tempa[tempb[,2],2]<- tempa[tempb[,2],1] tempa<-na.omit(tempa) Mav <- (tempa[,1] + tempa[,2])/2 Mrat <- tempa[,1] - tempa[,2] Mrat.out <- Mrat - loess( Mrat ~ Mav,span=0.4,degree=2)$fitted Mrat.out<-Mrat.out[match(tempnam,dimnames(tempa)1)] Mav<-Mav[match(tempnam,dimnames(tempa)1)] tempc[,1] <- Mav + Mrat.out/2 tempc[,2] <- Mav - Mrat.out/2 tempc[tempb]<- NA RNAgDNAratloglo[,c(i,i+1)] <- tempc }
RNAgDNAratlo<-2^RNAgDNAratloglo
### Create Absolute RNA intensities
gDNAmedVals<-apply(gDNAnorm,1,median,na.rm=T)
AbsRNANorm<-RNAgDNAratlo*gDNAmedVals
return(AbsRNANorm) }
- Script
library(limma)
auxillaryDir <- "/Volumes/HD2/Data/MDMib7FourSlides/Auxillary" dataDir <- "/Volumes/HD2/Data/MDMib7FourSlides/GPR"
- set the directory to a working directory containing gpr files and slide summary (eg labelled microarray1)
- read in summary (script within limma)
targets <- readTargets(file.path(auxillaryDir, "targets.txt"))
- Read in datafiles (this example is with genepix, other formats available)
RG <- read.maimages(targets$filename, path=dataDir, source="genepix.median", wt.fun=wtflags(0))
- normalise
RGmatrix <- gDNA_reference_norm(RG)
mergeRG <- function(R, G, Rb, Gb) {
- ----------------------------------------------------------------------------- #
- Function for combining elements of two similar RGLists
- together for the purposes of exploratory graphical investigation
- Author: Marcus Davy
- ----------------------------------------------------------------------------- #
obj <- new("RGList") obj$R <- R obj$G <- G if(!(missing(Rb))) { obj$Rb <- Rb } if(!(missing(Gb))) { obj$Gb <- Gb } return(obj)
}
RGnorm <- mergeRG(R = RGmatrix[,c(1,3)], G = RGmatrix[,c(2,4)])
plot(log2(RGnorm$R[,1]), log2(RGnorm$G[,1]))
plotMA(RGnorm)