RP.R

From Organic Design wiki
  1. ----------------------- From BioC version 1.9 ---------------------- #

RP <- function (data, cl, num.perm = 100, logged = TRUE, na.rm = FALSE,

   gene.names = NULL, plot = FALSE, rand = NULL) 

{

   # Expects a matrix, warns if a vector
   if (is.vector(data)) {
       cat("Warning: There is only one gene in the data set", 
           "\n", "\n")
       data = matrix(data, nrow = 1)
   }
   # Looks like code taken from sam analysis
   # Comapre cl to data to make sure compatible
   total.sam <- length(cl)
   total.sam2 <- dim(data)[2]
   if (total.sam != total.sam2) 
       stop("Number of classes should match the columns in the data")
   # xyCall generates a list of indices based on structure of cl
   xy.out <- xyCall(cl)
   x <- xy.out$x
   y <- xy.out$y
   num.gene <- dim(data)[1]
   data <- as.matrix(data)
   # Force dataset to be numeric
   mode(data) <- "numeric"
   NA.genes <- NULL
   if (any(is.na(data))) {
   # Could simplify here by using which(..., arr.ind=TRUE)
       NA.genes <- unique(ceiling(which(is.na(t(data)))/ncol(data)))
       cat("Warning: There are", length(NA.genes), "genes with at least one missing value.", 
           "\n", "\n")
       if (na.rm) 
           data[NA.genes, ] <- NaReplace(data[NA.genes, ])
       if (!na.rm) 
           cat(" This value is not used to compute rank product.", 
               "\n", "\n")
   }
   if (!is.null(rand)) {
       set.seed(rand)
   }
   if (!is.null(y)) {
       num.class <- 2
       data1 <- as.matrix(data[, x])
       data2 <- as.matrix(data[, y])
       data1.ave = apply(data1, 1, mean)
       data2.ave = apply(data2, 1, mean)
       if (logged) {
           fold.change = data1.ave - data2.ave
       }
       else {
           fold.change = data1.ave/data2.ave
       }
   }
   if (is.null(y)) {
       num.class <- 1
       data1 <- as.matrix(data[, x])
       data2 <- as.matrix(data[, y])
       fold.change = apply(data1, 1, mean)
   }
   RP.ori.upin2 <- RankProd1(data1, data2, logged, num.class, 
       rev.sorting = FALSE)
   rank.ori.upin2 <- rank(RP.ori.upin2$RP)
   RP.ori.downin2 <- RankProd1(data1, data2, logged, num.class, 
       rev.sorting = TRUE)
   rank.ori.downin2 <- rank(RP.ori.downin2$RP)
   RP.perm.upin2 <- matrix(NA, num.gene, num.perm)
   RP.perm.downin2 <- matrix(NA, num.gene, num.perm)
   cat("Starting", num.perm, "permutations...", "\n")
   for (p in 1:num.perm) {
       temp.data <- Newdata(data1, data2, num.class)
       new.data1 <- temp.data$new.data1
       new.data2 = temp.data$new.data2
       RP.perm.upin2[, p] <- RankProd1(new.data1, new.data2, 
           logged, num.class, rev.sorting = FALSE)$RP
       RP.perm.downin2[, p] <- RankProd1(new.data1, new.data2, 
           logged, num.class, rev.sorting = TRUE)$RP
   }
   cat("Computing pfp ..", "\n")
   temp1 <- as.vector(cbind(RP.ori.upin2$RP, RP.perm.upin2))
   temp2 <- rank(temp1)[1:num.gene]
   order.temp <- match(temp2, sort(temp2))
   count.perm <- (sort(temp2) - c(1:num.gene))[order.temp]
   exp.count <- count.perm/num.perm
   pval.upin2 <- count.perm/(num.perm * num.gene)
   pfp.upin2 <- exp.count/rank.ori.upin2
   if (plot) {
       par(mfrow = c(2, 1))
       plot(rank.ori.upin2, pfp.upin2, xlab = "sorted gene rank of the original rank product", 
           ylab = "estimated PFP")
       title("Identification of Up-regulated genes under class 2")
   }
   rm(temp1, temp2, order.temp, count.perm, exp.count)
   temp1 <- as.vector(cbind(RP.ori.downin2$RP, RP.perm.downin2))
   temp2 <- rank(temp1)[1:num.gene]
   order.temp <- match(temp2, sort(temp2))
   count.perm <- (sort(temp2) - c(1:num.gene))[order.temp]
   exp.count <- count.perm/num.perm
   pval.downin2 <- count.perm/(num.perm * num.gene)
   pfp.downin2 <- exp.count/rank.ori.downin2
   if (plot) {
       plot(rank.ori.downin2, pfp.downin2, xlab = "sorted gene rank of the original rank product", 
           ylab = "estimated PFP")
       title("Identification of down-regulated genes under class 2")
   }
   rm(temp1, temp2, order.temp, count.perm, exp.count)
   cat("Outputing the results ..", "\n")
   pfp = data.frame(pfp.upin2, pfp.downin2)
   colnames(pfp) = c("class1 < class2", "class1 > class 2")
   pval = data.frame(pval.upin2, pval.downin2)
   colnames(pval) = c("class1 < class2", "class1 > class 2")
   RPs = data.frame(RP.ori.upin2$RP, RP.ori.downin2$RP)
   colnames(RPs) = c("class1 < class2", "class1 > class 2")
   RPrank = data.frame(rank.ori.upin2, rank.ori.downin2)
   colnames(RPrank) = c("class1 < class2", "class1 > class 2")
   Orirank = list(RP.ori.upin2$rank.all, RP.ori.downin2$rank.all)
   names(Orirank) = c("class1 < class2", "class1 > class 2")
   fold.change = t(t(fold.change))
   colnames(fold.change) = "log/unlog(class1/class2)"
   if (!is.null(gene.names)) {
       rownames(pfp) = gene.names
       rownames(pval) = gene.names
       rownames(RPs) = gene.names
       rownames(RPrank) = gene.names
       rownames(fold.change) = gene.names
   }
   list(pfp = pfp, pval = pval, RPs = RPs, RPrank = RPrank, 
       Orirank = Orirank, AveFC = fold.change)

}