GDNA analysis.R

From Organic Design wiki

Code snipits and programs written in R, S or S-PLUS library(limma) source("plotDensities.R") dataDir <- "/Volumes/HD2/Data/Rosaceae/GPR" files <- dir(dataDir, pattern=".gpr") printRun <- substr(files,0,2)


for( i in unique(printRun) ) {

 fname <- paste("RG.",i,sep="")
 tmp <- read.maimages(files=files[printRun==i], path=dataDir, source="genepix")
  1. assign(paste("RG.",i,sep=""), tmp)
 assign(fname, tmp)

} save.image()

  1. Read in mapping file

mappingfname <- "gDNACy3Cy5mapping.csv" targets <- read.csv(file.path(dataDir, mappingfname), as.is=TRUE)

targets <- targets[-(61:64),]

for( i in unique(printRun)[-2]) {

 RGname <- paste("RG.",i, sep="")
 OBJ <- get(RGname)
  1. Grab names of mapping files
 Cy3files <- targets$gDNA.Cy3[grep(i, targets$gDNA.Cy3)]
 Cy5files <- targets$gDNA.Cy5[grep(i, targets$gDNA.Cy5)]
  1. Check files are in loaded RG.AC object
 Cy5common <- Cy5files[ Cy5files %in% colnames(OBJ)  ]
 Cy3common <- Cy3files[ Cy3files %in% colnames(OBJ)  ]


  1. Subset
  2. ? plotDensities
  3. singlechannels: vector of integers indicating which individual-channels
  4. will be selected to be plotted. Values correspond to the
  5. columns of the matrix of 'cbind(R,G)' and range between
  6. '1:ncol(R)' for red channels and '(
  7. (ncol(R)+1):(ncol(R)+ncol(G)) )' for the green channels in
  8. 'object'. Defaults to all channels.

graphics.off() X11(width=6, height=6, xpos=1, ypos=1) X11(width=6, height=6, xpos=1, ypos=493) X11(width=6, height=6, xpos=450, ypos=1) X11(width=6, height=6, xpos=450, ypos=493)

 ylim <- c(0,0.3)
 

dev.set(2) nCy5 <- ncol(OBJ[,Cy5common]) nCy3 <- ncol(OBJ[,Cy3common])

plotDensities(OBJ[,Cy5common], singlechannels=1:nCy5, ylim=ylim) mtext(paste(RGname, "Red Genomic"),3)

dev.set(3) plotDensities(OBJ[,Cy5common], singlechannels=(nCy5+1):(2*nCy5), ylim=ylim) mtext(paste(RGname,"Green RNA"),3)

dev.set(4) plotDensities(OBJ[,Cy3common], singlechannels=(nCy3+1):(2*nCy3), ylim=ylim) mtext(paste(RGname,"Green Genomic"),3)

dev.set(5) plotDensities(OBJ[,Cy3common], singlechannels=1:nCy3, ylim=ylim) mtext(paste(RGname,"Red RNA"),3) Sys.sleep(5) }


ls(pattern="RG\\.")

identical(RG.AC$genes, RG.AF$genes) # AC same as AF print only identical(RG.AF$genes, RG.AL$genes) identical(RG.AL$genes, RG.AO$genes) identical(RG.AO$genes, RG.AQ$genes) identical(RG.AQ$genes, RG.AU$genes) identical(RG.AU$genes, RG.BC$genes) identical(RG.BC$genes, RG.BE$genes) identical(RG.BE$genes, RG.BM$genes)

  1. Merging the print runs

tmp <- merge.RGList(RG.AC, RG.AF) tmp <- merge.RGList(tmp, RG.AL) tmp <- merge.RGList(tmp, RG.AO) tmp <- merge.RGList(tmp, RG.AQ) tmp <- merge.RGList(tmp, RG.AU) tmp <- merge.RGList(tmp, RG.BC) tmp <- merge.RGList(tmp, RG.BE) RG <- merge.RGList(tmp, RG.BM)

match(substr(colnames(RG),1,2), substr(ls(pattern="RG\\."),4,5))


  1. Quick check that merging is working

cbind(RG.AC$R[match(RG.AC$genes$Name, tmp$genes$Name),], tmp$R[match(tmp$genes$Name, RG.AC$genes$Name),])[1:1000,c("AC1", "AC1")]

  1. Robert Schaffers approach to normalizing gDNA/RNA dye swap pairs
  1. ----------------------------------------------------------------------------- #
  2. -------------------- Genomic DNA Normalisation Script ---------------------- #
  3. ------ Need microarray .gpr files from Genepix or other software package ---- #
  4. ------ SlideSummary file set up as described in limma ---------------------- #
  5. ------ >= R-2.0.0 (URL http://cran.r-project.org/) ------------------------ #
  6. -- limma package version 1.8.6 or above (URL http://www.bioconductor.org/) - #
  7. ----------------------------------------------------------------------------- #

gDNA_reference_norm<-function(Rawdata){ ### Select the gDNA channels and RNA channels into two separate files gDNA<-cbind(Rawdata$R[,targets[grep("gDNA",targets[,4],),1]], Rawdata$G[,targets[grep("gDNA",targets[,3],),1]]) RNA<-cbind(Rawdata$G[,targets[grep("RNA",targets[,3],),1]], Rawdata$R[,targets[grep("RNA",targets[,4],),1]])

### reorder so dye swaps are next to each other

gDNA<-gDNA[,match(targets[,2],dimnames(gDNA)2)] RNA<-RNA[,match(targets[,2],dimnames(RNA)2)]

### floor intensities that are less than 100 to 100

temp<-gDNA < 100 gDNA[temp] <- 100 temp<-RNA < 100 RNA[temp] <- 100

### flagged spots converted to NA (remove from analysis)

gDNA[Rawdata$weights == 0] <- NA RNA[Rawdata$weights == 0] <- NA

### global mean normalise

for (i in 1:length(gDNA[1,])){ gDNA[,i]<- 2^(log(gDNA[,i],2)- mean(log(gDNA[,i],2),na.rm=T)) }

for (i in 1:length(RNA[1,])){ RNA[,i]<- 2^(log(RNA[,i],2)- mean(log(RNA[,i],2),na.rm=T)) }

### normalise Quantiles (script within limma)

gDNAnorm<-normalizeQuantiles(gDNA) RNAnorm<-normalizeQuantiles(RNA)

### create ratios of RNA compared to gDNA (in log base 2 space)

RNAgDNArat<-RNAnorm/gDNAnorm RNAgDNAratlog<-log(RNAgDNArat,2)

### loess smooth M values

RNAgDNAratloglo<-matrix(data=NA,length(RNAgDNAratlog[,1]),length(RNAgDNAratlog[1,])) for (i in seq(1,length(RNAgDNAratlog[1,])-1,by=2)){ tempc<-matrix(data=NA,length(RNAgDNAratlog[,1]),2) tempa<-RNAgDNAratlog[,c(i,i+1)] tempb<-is.na(tempa) tempnam<-paste("a",1:length(RNAgDNAratlog[,1]),sep="") row.names(tempa)<-tempnam tempa[tempb[,1],1]<- tempa[tempb[,1],2] tempa[tempb[,2],2]<- tempa[tempb[,2],1] tempa<-na.omit(tempa) Mav <- (tempa[,1] + tempa[,2])/2 Mrat <- tempa[,1] - tempa[,2] Mrat.out <- Mrat - loess( Mrat ~ Mav,span=0.4,degree=2)$fitted Mrat.out<-Mrat.out[match(tempnam,dimnames(tempa)1)] Mav<-Mav[match(tempnam,dimnames(tempa)1)] tempc[,1] <- Mav + Mrat.out/2 tempc[,2] <- Mav - Mrat.out/2 tempc[tempb]<- NA RNAgDNAratloglo[,c(i,i+1)] <- tempc }

RNAgDNAratlo<-2^RNAgDNAratloglo

### Create Absolute RNA intensities

gDNAmedVals<-apply(gDNAnorm,1,median,na.rm=T)

AbsRNANorm<-RNAgDNAratlo*gDNAmedVals

return(AbsRNANorm) }



      1. Script

library(limma)

auxillaryDir <- "/Volumes/HD2/Data/MDMib7FourSlides/Auxillary" dataDir <- "/Volumes/HD2/Data/MDMib7FourSlides/GPR"

      1. set the directory to a working directory containing gpr files and slide summary (eg labelled microarray1)
      1. read in summary (script within limma)

targets <- readTargets(file.path(auxillaryDir, "targets.txt"))

      1. Read in datafiles (this example is with genepix, other formats available)

RG <- read.maimages(targets$filename, path=dataDir, source="genepix.median", wt.fun=wtflags(0))

      1. normalise

RGmatrix <- gDNA_reference_norm(RG)

mergeRG <- function(R, G, Rb, Gb) {

  1. ----------------------------------------------------------------------------- #
  2. Function for combining elements of two similar RGLists
  3. together for the purposes of exploratory graphical investigation
  4. Author: Marcus Davy
  5. ----------------------------------------------------------------------------- #
 obj <- new("RGList")
 obj$R <- R
 obj$G <- G
 if(!(missing(Rb))) {
   obj$Rb <- Rb
 }
 if(!(missing(Gb))) {
   obj$Gb <- Gb
 }
 return(obj)

}


RGnorm <- mergeRG(R = RGmatrix[,c(1,3)], G = RGmatrix[,c(2,4)]) plot(log2(RGnorm$R[,1]), log2(RGnorm$G[,1])) plotMA(RGnorm)