|
# eliminating indirect hits based on bayesian networks ideas |
|
|
|
library(fgsea) |
|
library(data.table) |
|
|
|
eliminatePathways <- function(universe, pathways, pathway2name, isNonRandomPval, pvalThreshold=1e-3) { |
|
messagef <- function(...) message(sprintf(...)) |
|
pathways <- lapply(pathways, intersect, universe) |
|
mainPathways <- c() |
|
|
|
res <- list() |
|
|
|
pp <- names(pathway2name)[3] # for debug |
|
|
|
for (pp in names(pathway2name)) { |
|
messagef("Testing pathway '%s'", pathway2name[pp]) |
|
|
|
if (length(mainPathways) == 0) { |
|
# nothing to compare with |
|
mainPathways <- pp |
|
next |
|
} |
|
|
|
interesting <- TRUE |
|
pp1 <- mainPathways[1] # for debug |
|
# We want our new pathway to give us additional information compared to all others that we have already |
|
for (pp1 in mainPathways) { |
|
notExplainsPval <- min(isNonRandomPval(universe = pathways[[pp1]], |
|
p = pathways[[pp]]), |
|
isNonRandomPval(universe = setdiff(universe, pathways[[pp1]]), |
|
p= pathways[[pp]])) |
|
res <- c(res, list(data.frame( |
|
checking=pp, |
|
reference=pp1, |
|
pval=notExplainsPval))) |
|
|
|
if (notExplainsPval > pvalThreshold) { |
|
|
|
messagef("Not adding pathway '%s', because we already have '%s'", |
|
pathway2name[pp], |
|
pathway2name[pp1]) |
|
|
|
eqPval <- min( |
|
isNonRandomPval(universe = setdiff(universe, pathways[[pp]]), |
|
pathways[[pp1]]), |
|
isNonRandomPval(universe = pathways[[pp]], |
|
pathways[[pp1]])) |
|
|
|
res <- c(res, list(data.frame( |
|
checking=pp1, |
|
reference=pp, |
|
pval=eqPval))) |
|
|
|
if (eqPval <= pvalThreshold) { |
|
messagef("And they are not equivalent: %s", eqPval) |
|
} else { |
|
messagef("They are equivalent: %s", eqPval) |
|
} |
|
interesting = FALSE |
|
break |
|
} |
|
} |
|
|
|
if (interesting) { |
|
messagef("Adding pathway '%s', because we can", |
|
pathway2name[pp]) |
|
mainPathways <- c(mainPathways, pp) |
|
|
|
# OK it gives use new info, may be now we don't need some of already added pathways? |
|
|
|
# Now roles have changed, we test every interesting pathways agains out newly added one |
|
for (pp1 in setdiff(mainPathways, pp)) { |
|
notExplainsPval <- min( |
|
isNonRandomPval(universe = setdiff(universe, pathways[[pp]]), |
|
pathways[[pp1]]), |
|
isNonRandomPval(universe = pathways[[pp]], |
|
pathways[[pp1]])) |
|
res <- c(res, list(data.frame( |
|
checking=pp1, |
|
reference=pp, |
|
pval=notExplainsPval))) |
|
if (notExplainsPval > pvalThreshold) { |
|
messagef("Removing pathway %s, because we now have %s", |
|
pathway2name[pp1], |
|
pathway2name[pp]) |
|
mainPathways <- setdiff(mainPathways, pp1) |
|
} |
|
} |
|
} |
|
} |
|
|
|
list(mainPathways=mainPathways, |
|
table=rbindlist(res)) |
|
} |
|
|
|
fgseaIsNonRandomPval <- function(pathways, ranks, nperm) { |
|
isNonRandomPval <- function(universe, p) { |
|
pathway <- intersect(p, universe) |
|
if (length(pathway) == 0 || length(pathway) == length(universe)) { |
|
return(1) |
|
} |
|
fgsea(pathways = list(pathway), |
|
stats=ranks[names(ranks) %in% universe], |
|
nperm = nperm)$pval |
|
} |
|
} |
|
|
|
|
|
data("exampleRanks") |
|
data("examplePathways") |
|
|
|
fgseaRes <- fgsea(pathways = examplePathways, |
|
stats = exampleRanks, |
|
minSize=15, |
|
maxSize=500, |
|
nperm=10000) |
|
|
|
# checking pathways in the order of decreasing significance |
|
pathway2name <- fgseaRes[padj < 1e-2][order(pval), setNames(pathway, pathway)] |
|
|
|
# this process is slow, takes ~5 minutes |
|
date() |
|
elimRes <- eliminatePathways(universe=names(exampleRanks), |
|
pathways=examplePathways, |
|
pathway2name=pathway2name, |
|
isNonRandomPval=fgseaIsNonRandomPval(examplePathways, exampleRanks, nperm=5000)) |
|
date() |
|
|
|
elimRes$mainPathways |
|
|
|
# should display something like: |
|
# 5990980_Cell_Cycle |
|
# 5991023_Metabolism_of_carbohydrates |
|
# 5991130_Programmed_Cell_Death |
|
# 5991851_Mitotic_Prometaphase |
|
# 5991078_Metabolism_of_nucleotides |
|
# 5991662_Cholesterol_biosynthesis |
|
# 5992313_Chromatin_modifying_enzymes |
|
|
|
# elimeRes$table contains results of testing pathways against other pathways (~log) |
I am not sure for GSEA results...
But for GO enrichment analysis with goseq, I usually remove the too specific and too general terms for plots. I have written a R package called gogadget (gogadget: an R package for go analysis visualization and interpretation ), with a filter function.
But there are more tools available such as REVIGO or GO trimming.
Good luck!