RankProduct.R

From Organic Design wiki

Code snipits and programs written in R, S or S-PLUS library(RankProd) library(limma)


  1. ------------------------------ Generated dataset ---------------------------- #

set.seed(1) ncols <- 4 X <- round(matrix(rnorm(20), nc=ncols),2)

set.seed(2) num.perm <- 100 num.gene <- nrow(X) RP.out <- RP(X, rep(1,4), num.perm=num.perm) RP.out

  1. ------------------------------ First Principles ----------------------------- #

AveFC <- apply(X,1, mean) OrirankUp <- apply(X, 2, rank) RPs <- apply(OrirankUp, 1, prod) ^ (1/ncol(X)) RPrank <- rank(RPs)

  1. Ripped from RP code

RP.perm.upin2 <- matrix(NA, num.gene, num.perm) RP.perm.downin2 <- matrix(NA, num.gene, num.perm) cat("Starting", num.perm, "permutations...", "\n") set.seed(2) for (p in 1:num.perm) {

 temp.data <- Newdata(X, NULL, num.class=1)
 new.data1 <- temp.data$new.data1
 new.data2 = temp.data$new.data2
 RP.perm.upin2[, p] <- RankProd1(new.data1, new.data2, 
                                 TRUE, num.class=1, rev.sorting = FALSE)$RP
 RP.perm.downin2[, p] <- RankProd1(new.data1, new.data2, 
                                   TRUE, num.class=1, rev.sorting = TRUE)$RP

}

temp1 <- as.vector(cbind(RPs, RP.perm.upin2)) temp2 <- rank(temp1)[1:num.gene]

  1. Original code: match(temp2, sort(temp2))

order.temp <- order(temp2)

  1. Original code: count.perm <- (sort(temp2) - c(1:num.gene))[order.temp]

count.perm <- temp2 - order.temp exp.count <- count.perm/num.perm

pval <- count.perm/(num.perm * num.gene) pfp <- exp.count/rank(RPs)

  1. Check values

RP.out$pfp[,1] pfp RP.out$pval[,1] pval RP.out$RPs[,1] RPs RP.out$Orirank1 OrirankUp RP.out$AveFC[,1] AveFC

  1. --------- testing with letters (alpha numerically ranked)

X[] <- LETTERS[OrirankUp] X

OrirankUp <- apply(X, 2, rank) RPs <- apply(OrirankUp, 1, prod) ^ (1/ncol(X)) RPrank <- rank(RPs)

  1. From RP code

RP.perm.upin2 <- matrix(NA, num.gene, num.perm) RP.perm.downin2 <- matrix(NA, num.gene, num.perm) cat("Starting", num.perm, "permutations...", "\n") set.seed(2) for (p in 1:num.perm) {

 temp.data <- Newdata(X, NULL, num.class=1)
 new.data1 <- temp.data$new.data1
 new.data2 = temp.data$new.data2
 RP.perm.upin2[, p] <- RankProd1(new.data1, new.data2, 
                                 TRUE, num.class=1, rev.sorting = FALSE)$RP
 RP.perm.downin2[, p] <- RankProd1(new.data1, new.data2, 
                                   TRUE, num.class=1, rev.sorting = TRUE)$RP

}

temp1 <- as.vector(cbind(RPs, RP.perm.upin2)) temp2 <- rank(temp1)[1:num.gene]

  1. Original code: match(temp2, sort(temp2))

order.temp <- order(temp2)

  1. Original code: count.perm <- (sort(temp2) - c(1:num.gene))[order.temp]

count.perm <- temp2 - order.temp exp.count <- count.perm/num.perm

pval <- count.perm/(num.perm * num.gene) pfp <- exp.count/rank(RPs)

  1. Check values for character version of X

RP.out$pfp[,1] pfp RP.out$pval[,1] pval RP.out$RPs[,1] RPs RP.out$Orirank1 OrirankUp

  1. --------------------------- Simulated GGB datasets -------------------------- #

simulateGGB <- function(m=1000, nreps=5, pi0 =0.95,

                       params = c(2.74886, 1.36546, 4.12844), seed=1){
  1. p (genes) by n (slides) matrix
  2. m=p keeping fdr notation
 set.seed(seed)
 p <- m
 n   <- nreps * 2
 m1 <- round(m * (1-pi0))
 m0 <- p - m1
 
 a.shape  <- params[1]
 a0.shape <- params[2] 
 scale    <- params[3] 
 
 DEscales <- rgamma(2*m1, shape=a0.shape, rate=scale)
 EEscales <- rgamma(m0 , shape=a0.shape, rate=scale)
 
 scales   <- c(rep(DEscales, each=nreps), rep(EEscales, each=2*nreps))      
 X <- rgamma(n* p, a.shape, rate=scales) 
 dim(X) <- c(n,p)
 
 X <- t(X)
 require(limma)
 RG <- new("RGList")
 RG$R <- X[,1:nreps]
 RG$G <- X[,((nreps+1):n)]
 RG$genes <- as.data.frame(cbind(Name = paste("A", seq(m), sep="")),
                           stringsAsFactors = FALSE)
 MA <- MA.RG(RG)
 return(MA)

}

MA <- simulateGGB(params=c(12.53,0.82, 0.37), pi0 =1 - 0.05, seed=1) fit <- lmFit(MA, design=rep(1,ncol(MA))) fit <- eBayes(fit) tt1 <- topTable(fit, sort.by="t", number=50) tt1all <- topTable(fit, sort.by="t", number=nrow(MA)) t1 <- fit$t

MA <- simulateGGB(params=c(12.53,0.82, 0.37), pi0 =1 - 0.05, seed=2) fit <- lmFit(MA, design=rep(1,ncol(MA))) fit <- eBayes(fit) tt2 <- topTable(fit, sort.by="t", number=50) tt2all <- topTable(fit, sort.by="t", number=nrow(MA)) t2 <- fit$t


  1. Checking intersection between top 50 genes - 29 common

tt1Ind <- as.numeric(rownames(tt1)) tt2Ind <- as.numeric(rownames(tt2)) common <- intersect(tt1Ind, tt2Ind) length(common)

  1. Doesn't take into account the sign - 27 common

common <- intersect(paste(sign(tt1$logFC), tt2Ind), paste(sign(tt2$logFC), tt2Ind)) length(common)


x <- tt1all$Name y <- tt2all$Name n <- nrow(MA)

common <- rep(NA, n)

 for( j in 1:n ) {
   commonNames <- intersect(x[seq(j)], y[seq(j)])
   common[j] <- length(commonNames)
 }

plot(common, xlim=c(0,n), ylim=c(0,n), type="s", main=comparison) abline(v=50, col="red")

  1. Constructing matrix of ordered lists from either genes Name, or row position in
  2. topTable output
  1. X <- cbind(rownames(tt1all), rownames(tt2all))

X <- cbind(t1, t2) rownames(X) <- MA$genes$Name

RP.out <- RP(X, rep(1,2), num.perm=num.perm, logged=FALSE)

  1. Selection by cutoff

tG <- topGene(RP.out, cutoff = 0.01, method="pval", logged=FALSE,

       logbase=2, gene.names=rownames(X)) 

tG$Table1 tG$Table2

common <- intersect(paste(sign(tt1$logFC), tt2Ind), paste(sign(tt2$logFC), tt2Ind)) length(common) common

tt1[tt1$Name=="A18",] tt2[tt2$Name=="A18",]

rownames(tG$Table1) rownames(tG$Table2)

  1. Find where rankprod candidates match in x and y

xx <- match(rownames(tG$Table1), x) names(xx) <- rownames(tG$Table1) xx yy <- match(rownames(tG$Table1), y) names(yy) <- rownames(tG$Table1) yy

xx <- match(rownames(tG$Table2), x) names(xx) <- rownames(tG$Table2) xx yy <- match(rownames(tG$Table2), y) names(yy) <- rownames(tG$Table2) yy

  1. ----------------------- Rank Product first principles ----------------------- #

OrirankUp <- apply(X, 2, function(x){rank(as.numeric(x))}) RPs <- apply(OrirankUp, 1, prod) ^ (1/ncol(X)) RPrank <- rank(RPs)

num.gene <- 1000 num.perm <- 1000

  1. From RP code

RP.perm.upin2 <- matrix(NA, num.gene, num.perm) RP.perm.downin2 <- matrix(NA, num.gene, num.perm) cat("Starting", num.perm, "permutations...", "\n") set.seed(2) for (p in 1:num.perm) {

 temp.data <- Newdata(X, NULL, num.class=1)
 new.data1 <- temp.data$new.data1
 new.data2 = temp.data$new.data2
 RP.perm.upin2[, p] <- RankProd1(new.data1, new.data2, 
                                 TRUE, num.class=1, rev.sorting = FALSE)$RP
 RP.perm.downin2[, p] <- RankProd1(new.data1, new.data2, 
                                   TRUE, num.class=1, rev.sorting = TRUE)$RP

}

temp1 <- as.vector(cbind(RPs, RP.perm.upin2)) temp2 <- rank(temp1)[1:num.gene]

  1. Original code: match(temp2, sort(temp2))

order.temp <- order(temp2)

  1. Original code: count.perm <- (sort(temp2) - c(1:num.gene))[order.temp]

count.perm <- temp2 - order.temp exp.count <- count.perm/num.perm

pval <- count.perm/(num.perm * num.gene) pfp <- exp.count/rank(RPs)

plot(pval[order(pval)]) abline(v=50, col="red")

pval[order(pval)]

order(pval)

plot(pfp[order(pfp)]) abline(v=50, col="red")

order(pval)[1:50]