RankProduct.R
Code snipits and programs written in R, S or S-PLUS library(RankProd) library(limma)
- ------------------------------ Generated dataset ---------------------------- #
set.seed(1) ncols <- 4 X <- round(matrix(rnorm(20), nc=ncols),2)
set.seed(2) num.perm <- 100 num.gene <- nrow(X) RP.out <- RP(X, rep(1,4), num.perm=num.perm) RP.out
- ------------------------------ First Principles ----------------------------- #
AveFC <- apply(X,1, mean) OrirankUp <- apply(X, 2, rank) RPs <- apply(OrirankUp, 1, prod) ^ (1/ncol(X)) RPrank <- rank(RPs)
- Ripped from RP code
RP.perm.upin2 <- matrix(NA, num.gene, num.perm) RP.perm.downin2 <- matrix(NA, num.gene, num.perm) cat("Starting", num.perm, "permutations...", "\n") set.seed(2) for (p in 1:num.perm) {
temp.data <- Newdata(X, NULL, num.class=1) new.data1 <- temp.data$new.data1 new.data2 = temp.data$new.data2 RP.perm.upin2[, p] <- RankProd1(new.data1, new.data2, TRUE, num.class=1, rev.sorting = FALSE)$RP RP.perm.downin2[, p] <- RankProd1(new.data1, new.data2, TRUE, num.class=1, rev.sorting = TRUE)$RP
}
temp1 <- as.vector(cbind(RPs, RP.perm.upin2)) temp2 <- rank(temp1)[1:num.gene]
- Original code: match(temp2, sort(temp2))
order.temp <- order(temp2)
- Original code: count.perm <- (sort(temp2) - c(1:num.gene))[order.temp]
count.perm <- temp2 - order.temp exp.count <- count.perm/num.perm
pval <- count.perm/(num.perm * num.gene) pfp <- exp.count/rank(RPs)
- Check values
RP.out$pfp[,1] pfp RP.out$pval[,1] pval RP.out$RPs[,1] RPs RP.out$Orirank1 OrirankUp RP.out$AveFC[,1] AveFC
- --------- testing with letters (alpha numerically ranked)
X[] <- LETTERS[OrirankUp] X
OrirankUp <- apply(X, 2, rank) RPs <- apply(OrirankUp, 1, prod) ^ (1/ncol(X)) RPrank <- rank(RPs)
- From RP code
RP.perm.upin2 <- matrix(NA, num.gene, num.perm) RP.perm.downin2 <- matrix(NA, num.gene, num.perm) cat("Starting", num.perm, "permutations...", "\n") set.seed(2) for (p in 1:num.perm) {
temp.data <- Newdata(X, NULL, num.class=1) new.data1 <- temp.data$new.data1 new.data2 = temp.data$new.data2 RP.perm.upin2[, p] <- RankProd1(new.data1, new.data2, TRUE, num.class=1, rev.sorting = FALSE)$RP RP.perm.downin2[, p] <- RankProd1(new.data1, new.data2, TRUE, num.class=1, rev.sorting = TRUE)$RP
}
temp1 <- as.vector(cbind(RPs, RP.perm.upin2)) temp2 <- rank(temp1)[1:num.gene]
- Original code: match(temp2, sort(temp2))
order.temp <- order(temp2)
- Original code: count.perm <- (sort(temp2) - c(1:num.gene))[order.temp]
count.perm <- temp2 - order.temp exp.count <- count.perm/num.perm
pval <- count.perm/(num.perm * num.gene) pfp <- exp.count/rank(RPs)
- Check values for character version of X
RP.out$pfp[,1] pfp RP.out$pval[,1] pval RP.out$RPs[,1] RPs RP.out$Orirank1 OrirankUp
- --------------------------- Simulated GGB datasets -------------------------- #
simulateGGB <- function(m=1000, nreps=5, pi0 =0.95,
params = c(2.74886, 1.36546, 4.12844), seed=1){
- p (genes) by n (slides) matrix
- m=p keeping fdr notation
set.seed(seed) p <- m n <- nreps * 2
m1 <- round(m * (1-pi0)) m0 <- p - m1 a.shape <- params[1] a0.shape <- params[2] scale <- params[3] DEscales <- rgamma(2*m1, shape=a0.shape, rate=scale) EEscales <- rgamma(m0 , shape=a0.shape, rate=scale) scales <- c(rep(DEscales, each=nreps), rep(EEscales, each=2*nreps)) X <- rgamma(n* p, a.shape, rate=scales) dim(X) <- c(n,p) X <- t(X) require(limma) RG <- new("RGList") RG$R <- X[,1:nreps] RG$G <- X[,((nreps+1):n)] RG$genes <- as.data.frame(cbind(Name = paste("A", seq(m), sep="")), stringsAsFactors = FALSE) MA <- MA.RG(RG) return(MA)
}
MA <- simulateGGB(params=c(12.53,0.82, 0.37), pi0 =1 - 0.05, seed=1) fit <- lmFit(MA, design=rep(1,ncol(MA))) fit <- eBayes(fit) tt1 <- topTable(fit, sort.by="t", number=50) tt1all <- topTable(fit, sort.by="t", number=nrow(MA)) t1 <- fit$t
MA <- simulateGGB(params=c(12.53,0.82, 0.37), pi0 =1 - 0.05, seed=2) fit <- lmFit(MA, design=rep(1,ncol(MA))) fit <- eBayes(fit) tt2 <- topTable(fit, sort.by="t", number=50) tt2all <- topTable(fit, sort.by="t", number=nrow(MA)) t2 <- fit$t
- Checking intersection between top 50 genes - 29 common
tt1Ind <- as.numeric(rownames(tt1)) tt2Ind <- as.numeric(rownames(tt2)) common <- intersect(tt1Ind, tt2Ind) length(common)
- Doesn't take into account the sign - 27 common
common <- intersect(paste(sign(tt1$logFC), tt2Ind), paste(sign(tt2$logFC), tt2Ind)) length(common)
x <- tt1all$Name
y <- tt2all$Name
n <- nrow(MA)
common <- rep(NA, n)
for( j in 1:n ) { commonNames <- intersect(x[seq(j)], y[seq(j)]) common[j] <- length(commonNames) }
plot(common, xlim=c(0,n), ylim=c(0,n), type="s", main=comparison) abline(v=50, col="red")
- Constructing matrix of ordered lists from either genes Name, or row position in
- topTable output
- X <- cbind(rownames(tt1all), rownames(tt2all))
X <- cbind(t1, t2) rownames(X) <- MA$genes$Name
RP.out <- RP(X, rep(1,2), num.perm=num.perm, logged=FALSE)
- Selection by cutoff
tG <- topGene(RP.out, cutoff = 0.01, method="pval", logged=FALSE,
logbase=2, gene.names=rownames(X))
tG$Table1 tG$Table2
common <- intersect(paste(sign(tt1$logFC), tt2Ind), paste(sign(tt2$logFC), tt2Ind)) length(common) common
tt1[tt1$Name=="A18",] tt2[tt2$Name=="A18",]
rownames(tG$Table1) rownames(tG$Table2)
- Find where rankprod candidates match in x and y
xx <- match(rownames(tG$Table1), x) names(xx) <- rownames(tG$Table1) xx yy <- match(rownames(tG$Table1), y) names(yy) <- rownames(tG$Table1) yy
xx <- match(rownames(tG$Table2), x) names(xx) <- rownames(tG$Table2) xx yy <- match(rownames(tG$Table2), y) names(yy) <- rownames(tG$Table2) yy
- ----------------------- Rank Product first principles ----------------------- #
OrirankUp <- apply(X, 2, function(x){rank(as.numeric(x))}) RPs <- apply(OrirankUp, 1, prod) ^ (1/ncol(X)) RPrank <- rank(RPs)
num.gene <- 1000 num.perm <- 1000
- From RP code
RP.perm.upin2 <- matrix(NA, num.gene, num.perm) RP.perm.downin2 <- matrix(NA, num.gene, num.perm) cat("Starting", num.perm, "permutations...", "\n") set.seed(2) for (p in 1:num.perm) {
temp.data <- Newdata(X, NULL, num.class=1) new.data1 <- temp.data$new.data1 new.data2 = temp.data$new.data2 RP.perm.upin2[, p] <- RankProd1(new.data1, new.data2, TRUE, num.class=1, rev.sorting = FALSE)$RP RP.perm.downin2[, p] <- RankProd1(new.data1, new.data2, TRUE, num.class=1, rev.sorting = TRUE)$RP
}
temp1 <- as.vector(cbind(RPs, RP.perm.upin2)) temp2 <- rank(temp1)[1:num.gene]
- Original code: match(temp2, sort(temp2))
order.temp <- order(temp2)
- Original code: count.perm <- (sort(temp2) - c(1:num.gene))[order.temp]
count.perm <- temp2 - order.temp exp.count <- count.perm/num.perm
pval <- count.perm/(num.perm * num.gene) pfp <- exp.count/rank(RPs)
plot(pval[order(pval)]) abline(v=50, col="red")
pval[order(pval)]
order(pval)
plot(pfp[order(pfp)]) abline(v=50, col="red")
order(pval)[1:50]