NutriTools.R

From Organic Design wiki

Code snipits and programs written in R, S or S-PLUS checkall <- function() {

 require(codetools)
 require(limma)
 for(i in ls(env=.GlobalEnv)) {
   cat(i, sep = "\n")
   checkUsage(get(i))
   cat(paste(rep("-", 80), collapse="", sep=""), sep="\n")
 }
 return("done")

} clusterPlot <- function(x, rowNames = NULL, dmethod = "euclidian", hmethod="complete", ...) {

 oldpar <- par(mfrow=c(2,1))  
 if(is.character(rowNames) & (length(rowNames) == nrow(x))) {
   rownames(x) <- rowNames
 }
 xDist <- dist(x, method=dmethod)
 
  1. 1) Tree dendrogram
 xClust <- hclust(xDist, method=hmethod)
 plclust(xClust, sub="")
 
  1. 2) Image plot...
 image(x = 1:nrow(x), y = 1:ncol(x), z = x[xClust$order, ], col=heat.colors(nrow(x)), ...)
 par(oldpar)
 invisible()

} corMA <- function(filter, MA, targets, plot=FALSE) {

 tCols <- colnames(targets)
 e <- new.env()
 for (i in tCols) {
   ei <- targetsi
 }
  toSub <- eval(parse(text = filter), envir = e)
  1. return(MA$M[,toSub])
 MAsub <- MA$M[,toSub]
 if(plot) {
   pairs(MAsub, pch=".")
 }
 return(list(targets=targets[toSub,], cor=cov2cor(var(MAsub, na.rm=TRUE))))

} createPlots <- function(RG, plotDir=getwd(),

                       bc.correct     = c("none","subtract","minimum", "movingmin"),
                       method.within  = c("none", "loess", "printtiploess"),
                       method.between = c("none","scale", "quantile")) {
 if(exists("RG")) {
  1. 1) Create directories
   dir.create(file.path(plotDir, "Densityplots"), recursive=TRUE)
   for(i in c("ImagePlots","MAplots")) {
     dir.create(file.path(plotDir, i), recursive=TRUE)
     if(i == "ImagePlots") {
       for(x in bc.correct) {
         for(y in method.within) {
           for(z in method.between) {
             for( j in c("R","G", "M","A")) {
               dir.create(file.path(plotDir, i, paste(x,"-",y,"-",z, sep=""),j), recursive=TRUE)
               if(x=="none" & y =="none" & z =="none") {
                 for(k in c("Rb", "Gb")) {
                   dir.create(file.path(plotDir, i, paste(x,"-",y,"-",z, sep=""),k), recursive=TRUE)
                 }
               }
             }
           }
         }
       }
     }
     if(i == "MAplots") {
       for(x in bc.correct) {
         for(y in method.within) {
           for(z in method.between) {
             dir.create(file.path(plotDir, i, paste(x,"-",y,"-",z, sep="")), recursive=TRUE)
           }
         }
       }
     }
   }
  1. 2) Create plots
   for(i in bc.correct) {
     for( j in method.within) {
       MA <- normalizeWithinArrays(RG, method=j, bc.method=i)
       for (k in method.between) {
         MA.Between <- normalizeBetweenArrays(MA, method=k)
         print(paste("[ Graphing:",i,j,k,"]", sep=" "))
         # plotDensities
         png(file.path(plotDir, "Densityplots", paste(i,"-",j,"-",k,".png", sep="")))
         plotDensities(MA.Between)
         dev.off()
         # plotMA3by2
         plotMA3by2(MA.Between, path=file.path(plotDir, "MAplots", paste(i,"-",j,"-",k, sep="")))
         # Imageplots
         imageplot3by2(MA.Between, z="M", path = file.path(plotDir,"Imageplots",  paste(i,"-",j,"-",k, sep=""), "M"))
         imageplot3by2(MA.Between, z="A", path = file.path(plotDir,"Imageplots",  paste(i,"-",j,"-",k, sep=""), "A"))
         imageplot3by2(RG.MA(MA.Between), z="G", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""),"G"),
                       low="black", high="green")
         imageplot3by2(RG.MA(MA.Between), z="R", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""),"R"),
                       low="black", high="red")
         if(i=="none" & j=="none" & k=="none") {
           imageplot3by2(RG, z="Gb", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""),"Gb"),
                         low="black", high="green")
           imageplot3by2(RG, z="Rb", path = file.path(plotDir,"Imageplots", paste(i,"-",j,"-",k, sep=""),"Rb"),
                         low="black", high="red")
         }
       }
     }
   }
 }
 return("done")

} dir.exists <- function(x, curdir = ".", rootdir) {

  1. -------------------------------------------------------------------------------------- #
  2. Author : Marcus Davy
  3. This is a recursive program to take in a string such as:
  4. foo/fodda/fi, and create subdirectories foo, fodda, and fi
  5. from the current directory location (if they don't already exist)
  6. -------------------------------------------------------------------------------------- #
 if( missing(rootdir)) {
   rootdir <- getwd()
 }
 setwd(curdir)

 x <- unlist(strsplit(x,"/"))
 if( !file.exists(x[1]) ){
   dir.create(x[1])
   print(paste("Creating", file.path(getwd(), x[1])))
 }
 curdir <- x[1]
 if(length(x)==1) {
   setwd(rootdir)
 } else {
   return(dir.exists(x[-1], curdir = curdir,  rootdir = rootdir))
 }

} fixLabel <- function(x, string, sep=".") {

  1. ----------------------------------------------------------------------------- #
  2. Function for fixing labels for limma package functions:
  3. modelMatrix and make Contrasts e.g. 14DAFB converts to DAFB.14
  4. Author: Marcus Davy
  5. ----------------------------------------------------------------------------- #
 beginning <- regexpr(string, x)
 ending    <- nchar(x)
 labelstart   <- substr(x, beginning, ending)
 labelend <- substr(x, 0, beginning - 1)
 newlabel <- paste(labelstart, labelend, sep=sep)
 
 return(newlabel)

} formatTopTable <- function(OBJ, ...) {

 if(!is(OBJ, "MArrayLM")) {
   return("OBJ is not of MArrayLM class")
 }       
 genelist <- topTable(OBJ, ...)
 # Remove the B statistic column
 genelist <- genelist[,-grep("^B$", colnames(genelist))]
 rawt <- OBJ$coef / OBJ$stdev.unscaled / OBJ$sigma
 colnames(genelist)[grep("^t$", colnames(genelist))] <- "Mod.t"
 return(cbind(genelist[,1:(grep("^Mod.t$", colnames(genelist))-1)],
              "Raw.t" = rawt[as.numeric(rownames(genelist))],
              genelist[,grep("^Mod.t$", colnames(genelist)):length(colnames(genelist))])
        )

}

"gprDiagnostics" <-

 function(fnames, path = ".", annotation = c("Block","Column","Row", "Name", "ID")) {
   skipcol <- c()
   dimensions <- data.frame(matrix(NA,nc=2, nr=length(fnames)))
   dimnames(dimensions) <- list(fnames, c("nrows", "ncols"))
   diagnostics <- list()
   fullfnames <- file.path(path, fnames)
   for(i in 1:length(fullfnames)){
     y <- readLines(fullfnames[i], n = 100)
     skipcol[fnames[i]] <- grep(annotation[1], y)[1] - 1
     h <- scan(fullfnamesi, quiet=TRUE, what=character(1), sep="\t", skip = skipcol[fnames[i]], quote="\"", nlines=1)    
     names(h) <- h
     h[] <- rep("NULL", length(h))
     h[annotation] <- NA
     dimensions[fnamesi,] <- dim(read.table(fullfnames[i], sep="\t", quote="\"", colClasses=h, skip=skipcol[fnames[i]], header=T))
   }
   list(skipcol, dimensions)
 }

"getWikiCells" <-

 function (ids, repository = "ug") 

{

 if (is.list(ids)) {
   out <- vector()
   temp <- lapply(ids, getQueryLink, repository = repository)
   for (i in seq(along = ids)) {
     if (temp[i] != " ") {

out[i] <- paste("

[", tempi, " ", idsi, "]

", sep = "", collapse = "")

     }
     else out[i] <- temp[i]
   }
 }
 else {
   temp <- getQueryLink(ids, repository)
   blanks <- temp == " "
   out <- paste("[", temp, " ", ids, "]", sep = "")
   out[blanks] <- " "
 }
 return(out)

} "getWikiRows" <-

 function (ids, repository = "ug") 

{

 out <- paste("|", getWikiCells(ids, repository), sep = " ")
 return(out)

} hr <- function(n=80) {

 return(paste(rep("-", n), collapse=""))

} mergeRG <- function(R, G, Rb, Gb) {

  1. ----------------------------------------------------------------------------- #
  2. Function for combining elements of two similar RGLists
  3. together for the purposes of exploratory graphical investigation
  4. Author: Marcus Davy
  5. ----------------------------------------------------------------------------- #
 obj <- new("RGList")
 obj$R <- R
 obj$G <- G
 if(!(missing(Rb))) {
   obj$Rb <- Rb
 }
 if(!(missing(Gb))) {
   obj$Gb <- Gb
 }
 return(obj)

} NApadding <-

 function (OBJ, method = "weights") 

{

 choices = c("weights","unspotted")
 method <- match.arg(method, choices)
 if(method == "weights") {
   ind <- (OBJ$weights == 0)
 }
 if(method =="unspotted") {
   if(is.null(OBJ$genes$ID)) {
     stop("Expecting genepix RGList for method 'unspotted'")
   }
   ind <- (is.na(OBJ$genes$ID) | OBJ$genes$ID=="")
 }
 
 if (is(OBJ, "RGList")) {
   OBJ$R[ind] <- NA
   OBJ$G[ind] <- NA
   OBJ$Gb[ind] <- NA
   OBJ$Rb[ind] <- NA
 }
 else {
   OBJ$M[ind] <- NA
   OBJ$A[ind] <- NA
 }
 
 return(OBJ)

} "padGPR" <-

 function (RG) 

{

 mapGPRind <- function(RG) {
   N <- apply(RG$genes[, c("Block", "Column", "Row")], 2, 
              max)
   ind <- N["Column"] * (RG$genes[, "Row"] - 1) + RG$genes[, 
                                                           "Column"] + (RG$genes[, "Block"] - 1) * (N["Row"] * 
                                                                                                    N["Column"])
   return(ind)
 }
 OUT <- new("RGList")
 browser()
 OUT$R <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG)))
 OUT$Rb <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG)))
 OUT$G <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG)))
 OUT$Gb <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG)))
 OUT$weights <- matrix(NA, nr = prod(unlist(RG$printer)), nc = ncol(RG), dimnames=list(NULL, colnames(RG)))
 OUT$genes <- data.frame(matrix(NA, nr = prod(unlist(RG$printer)), 
                                nc = ncol(RG$genes), dimnames=list(NULL, colnames(RG$genes))))
 OUT$targets <- RG$targets
 OUT$source <- RG$source
 OUT$printer <- RG$printer
 ind <- mapGPRind(RG)
 OUT$R[ind, ] <- RG$R
 OUT$Rb[ind, ] <- RG$Rb
 OUT$G[ind, ] <- RG$G
 OUT$Gb[ind, ] <- RG$Gb
 OUT$weights[ind, ] <- RG$weights
 OUT$genes[ind, ] <- RG$genes
 return(OUT)

} readAgilent <- function(dataDir="/Volumes/HD2/Data/Nutrigenomics/MultipleScans/Agilent"){

 require(limma)
  1. 1) Read in data using limma
 if(exists("targets")) {
   RGsub <- read.maimages(targets$Filename, path = dataDir, source="agilent", name=targets$Filetype)
 } else {
   RGsub <- read.maimages(dir(dataDir, pattern=".txt"), path = dataDir, source="agilent")
 }
  1. 2) Find Agilent indices from x ,y coords

getInd <- function(x,y, nx, byrow=T) {

 return( (x-1)* nx + y)

}

 ind <- getInd(RGsub$genes$Row, RGsub$genes$Col, nx=215)
  1. 3) Construct complete RGList
 RGfull <- new("RGList")
 RGfull$R <-RGfull$G <-RGfull$Rb <-RGfull$Gb <- matrix(NA, nc=ncol(RGsub), nr=max(ind))
 RGfull$genes <- data.frame(matrix(NA, nc=7, nr=max(ind), dimnames = list(NULL, names(RGsub$genes))))
 matrixNames <- dimnames(RGsub$R)
  1. 4) Populate complete list
 RGfull$R[ind,]     <- RGsub$R
 RGfull$Rb[ind,]    <- RGsub$Rb
 RGfull$G[ind,]     <- RGsub$G
 RGfull$Gb[ind,]    <- RGsub$Gb
 RGfull$genes[ind,] <- RGsub$genes
 RGfull$targets <- RGsub$targets
 dimnames(RGfull$R) <- dimnames(RGfull$Rb) <- dimnames(RGfull$G) <- dimnames(RGfull$Gb) <- matrixNames
  1. Checking the rows that needed padding
 rowstopad <- as.numeric(rownames(RGsub$genes) [ !(rownames(RGsub$genes) %in% ind) ])
 rowstopad
  1. Indexing looks ok
 RGfull[rowstopad,]
 RGfull$printer <- structure(list(ngrid.r=1, ngrid.c=1, nspot.r=105, nspot.c=215), class = "PrintLayout")
 rm(RGsub)
 return(RGfull)

} "readGPR" <-

 function (sourceType = "median", dataDir = "/Volumes/HD2/Data/Nutrigenomics/MultipleScans/GPR", 
           ...) 

{

 require(limma)
 print(sessionInfo())
 source.type <- switch(sourceType, mean = "genepix", median = "genepix.median")
 print(paste("source =", source.type))
 if (exists("targets")) {
   RG <- read.maimages(targets$Filename, path = dataDir, 
                       source = source.type, wt.fun = wtflags(0), names = targets$Filename, ...)
 }
 else {
   RG <- read.maimages(dir(dataDir, pattern = ".gpr"), path = dataDir, 
                       source = source.type, wt.fun = wtflags(0), ...)
 }
 RG$printer <- getLayout(RG$genes)
 return(RG)

}

  1. Temporarily modified toptable to allow unsorted output (masks limma version in Package:limma)

"toptable" <-

 function (fit, coef = 1, number = 10, genelist = NULL, A = NULL,
           eb = NULL, adjust.method = "BH", sort.by = "B", resort.by = NULL,
           ...)

{

 if (is.null(eb)) {
   fit$coefficients <- as.matrix(fit$coefficients)[, coef]
   fit$stdev.unscaled <- as.matrix(fit$stdev.unscaled)[,
                                                       coef]
   eb <- ebayes(fit, ...)
   coef <- 1
 }
 M <- as.matrix(fit$coefficients)[, coef]
 if (length(M) < number)
   number <- length(M)
 if (number < 1)
   return(data.frame())
 if (is.null(A)) {
   if (sort.by == "A")
     stop("Cannot sort by A-values as these have not been given")
 }
 else {
   if (NCOL(A) > 1)
     A <- rowMeans(A, na.rm = TRUE)
 }
 tstat <- as.matrix(eb$t)[, coef]
 P.Value <- as.matrix(eb$p.value)[, coef]
 B <- as.matrix(eb$lods)[, coef]
 sort.by <- match.arg(sort.by, c("M", "A", "P", "p", "T",
                                 "t", "B", "none"))
 ord <- switch(sort.by, M = order(abs(M), decreasing = TRUE),
               A = order(A, decreasing = TRUE), P = order(P.Value, decreasing = FALSE),
               p = order(P.Value, decreasing = FALSE), T = order(abs(tstat),
                                                         decreasing = TRUE), t = order(abs(tstat), decreasing = TRUE),
               B = order(B, decreasing = TRUE), none = 1:number)
 top <- ord[1:number]
 adj.P.Value <- p.adjust(P.Value, method = adjust.method)
 if (is.null(genelist))
   tab <- data.frame(M = M[top])
 else {
   if (is.null(dim(genelist)))
     tab <- data.frame(ProbeID = I(genelist[top]), M = M[top])
   else tab <- data.frame(genelist[top, , drop = FALSE],
                          M = M[top])
 }
 if (!is.null(A))
   tab <- data.frame(tab, A = A[top])
 tab <- data.frame(tab, t = tstat[top], P.Value = P.Value[top],
                   adj.P.Val = adj.P.Value[top], B = B[top])
 rownames(tab) <- as.character(1:length(M))[top]
 if (!is.null(resort.by)) {
 resort.by <- match.arg(resort.by, c("M", "A", "P", "p",
                                       "T", "t", "B"))
   ord <- switch(resort.by, M = order(tab$M, decreasing = TRUE),
                 A = order(tab$A, decreasing = TRUE), P = order(tab$P.Value,
                                                        decreasing = FALSE), p = order(tab$P.Value, decreasing = FALSE),
                 T = order(tab$t, decreasing = TRUE), t = order(tab$t,
                                                        decreasing = TRUE), B = order(tab$lods, decreasing = TRUE))
   tab <- tab[ord, ]
 }
 tab

}

"ttFilter" <-

 function( filter = "abs(logFC) > 0.69 & abs(t) > 2", fit, sort.by="t",  number = nrow(fit), ...) {
  1. ----------------------------------------------------------------------------- #
  2. Function for filtering out unwanted rows in a topTable output
  3. Author: Marcus Davy
  4. ----------------------------------------------------------------------------- #
   tt <- topTable(fit, sort.by = sort.by, number = number, ...)
   tCols <- colnames(tt)
   # create an enviromnent for topTable columns
   e <- new.env()
   for(i in tCols) {
     ei <- tti
   }
  # Obtain logical from filter
   toSub <- eval(parse(text=filter), envir=e)
   return( tt[toSub, ] )
 }

"wikitable" <-

 function (genelist, filename, title, othernames, table.head, 
           table.center = TRUE, repository = "ll") 

{

 nl <- "\n"
 outfile <- file(filename, "w")
 if (!(missing(title) | is.null(title))) {
   cat("=", title, "=", nl, file = outfile, sep = "")
 }
 cat("{| border=\"0\"", file = outfile, sep = "\n")
 if (!(missing(table.head) | is.null(table.head))) {
   headout <- paste("| ", "", table.head, "", sep="")
   cat("|-  align=\"center\" style=\"background:#277ec2; color:white\"", headout, file = outfile, sep = "\n")
   }
 if (is.list(genelist)) {
   if (all.equal(rep(length(genelist1), length(genelist)), 
                 unlist(lapply(genelist, length), use.names = FALSE))) {
     nrows <- length(genelist1)
   } else {
     stop("The lists of genes to annotate must all be of equal length.")
   }
 } else {
   if(!is.null(genelist)) { # nrows from either genelist or othernames
     nrows <- length(genelist)
   } else {
     nrows <- length(othernames1)
   }
 }
 if(!is.null(repository)) {
   if (is.list(repository)) {
     rows <- ""
     for (i in seq(along = repository)) {
       rows <- paste(rows, getWikiRows(genelisti, repositoryi), sep="\n")
     }
   } else {
     rows <- getWikiRows(genelist, repository, sep="\n")
   }
 } else {
   rows <- NULL
 }
   if (!missing(othernames)) {
     if (is.list(othernames)) {
       others <- ""
       for (nm in othernames) {
         if (is.matrix(nm)) {
           for (i in 1:dim(nm)[2]) {
             others <- paste(others, "\n| ", nm[, i], sep = "")
           }
       }
       if (is.list(nm)) {
         out <- vector()
         for (j in seq(along = nm)) {

out[j] <- paste("

", nmj, "

", sep = "",

                           collapse = "")
         }
         out <- paste("\n| ", out, sep = "")
         others <- paste(others, out, sep = "")
       }
       if ((is.vector(nm) || is.factor(nm)) && !is.list(nm)) 
         others <- paste(others, "\n| ", nm, sep = "")
     }
   } else {
     others <- NULL
   }
   if( !(is.null(rows) | is.null(others))) {
     if (length(rows) != length(others)) 
       stop(paste("There are", length(rows), "rows in your genelist, but", 
                  length(others), "rows in othernames.\n This will not give", 
                  "good results!\n"))
  }
     rows <- paste(rows, others)
     
 }
 for (i in 1:nrows) cat("|- align=\"left\" bgcolor=\"#f2f2f2\"", rows[i], nl, file = outfile, 
                        sep = "")
 cat("|}", file = outfile, sep="")
 close(outfile)

}