Hi,
Has anyone used lwaldron/metaphlanToPhyloseq.R (
metaphlanToPhyloseq <- function( | |
metaphlandir, | |
metadat=NULL, | |
simplify=TRUE){ | |
## tax is a matrix or data.frame with the table of taxonomic abundances, rows are taxa, columns are samples | |
## metadat is an optional data.frame of specimen metadata, rows are samples, columns are variables | |
## if simplify=TRUE, use only the most detailed level of taxa names in the final object | |
## metaphlanToPhyloseq("~/Downloads/metaphlan_bugs_list") | |
.getMetaphlanTree <- function(removeGCF=TRUE, simplify=TRUE){ | |
if (!requireNamespace("ape")) { | |
stop("Please install the ape package to read Newick trees") | |
} | |
nwkfile <- bzfile(system.file("extdata/metaphlan2_selected.tree.reroot.nwk.bz2", | |
package="curatedMetagenomicData")) | |
tree <- ape::read.tree(nwkfile) | |
close(nwkfile) | |
if(removeGCF) | |
tree$tip.label <- sub("\|GCF_[0-9]+$", "", tree$tip.label) | |
if(simplify) | |
tree$tip.label <- gsub(".+\|", "", tree$tip.label) | |
return(tree) | |
} | |
.joinListOfMatrices <- function(obj) { | |
rnames <- Reduce(union, lapply(obj, rownames)) | |
cnames <- names(obj) | |
if (!all(isUnique(cnames))) { | |
stop("Column names are not unique.") | |
} | |
output <- matrix(0, | |
nrow = length(rnames), | |
ncol = length(cnames), | |
dimnames = list(rnames, cnames) | |
) | |
for (i in seq_along(obj)) { | |
output[match(rownames(obj[[i]]), rownames(output)), i] <- obj[[i]][, 1] | |
} | |
return(output) | |
} | |
fnames <- list.files(metaphlandir) | |
bug.l <- lapply(fnames, function(x){ | |
res <- read.delim(file.path(metaphlandir, x), stringsAsFactors = FALSE, row.names = 1) | |
colnames(res) <- x | |
return(res) | |
}) | |
names(bug.l) <- fnames | |
tax <- .joinListOfMatrices(bug.l) | |
xnames = rownames(tax) | |
shortnames = gsub(".+\|", "", xnames) | |
if(simplify){ | |
rownames(tax) = shortnames | |
} | |
x2 = strsplit(xnames, split="|", fixed=TRUE) | |
taxmat = matrix(NA, ncol=max(sapply(x2, length)), nrow=length(x2)) | |
colnames(taxmat) = c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species", "Strain")[1:ncol(taxmat)] | |
rownames(taxmat) = rownames(tax) | |
for (i in 1:nrow(taxmat)){ | |
taxmat[i, 1:length(x2[[i]])] <- x2[[i]] | |
} | |
taxmat = gsub("[a-z]__", "", taxmat) | |
taxmat = phyloseq::tax_table(taxmat) | |
otutab = phyloseq::otu_table(tax, taxa_are_rows=TRUE) | |
tree <- .getMetaphlanTree(simplify=simplify) | |
if(is.null(metadat)){ | |
metadat <- data.frame(file=fnames, row.names=fnames, stringsAsFactors = FALSE) | |
} | |
res = phyloseq::phyloseq(taxmat, otutab, phyloseq::sample_data(metadat), tree) | |
return(res) | |
} |
I'm unsure of what the input files should be and there is no help information on GitHub. I'm wanting to use it to input my metaphlan3 merged abundance table into phyloseq.
Thanks in advance!
Thank you for getting back to me. I've tried this and get this error:
This is the metaphlan csv :
I think my comment was of a previous version, I corrected the answer. In this function you will need more functions from
curatedMetagenomicData
regarding the tree.Ok, so I should run through the ‘curatedMetagenomicData’ pipeline with my data?
Just load before
curatedMetagenomicData
withlibrary(curatedMetagenomicData)
before run the functionmetaphlanToPhyloseq()
with your folder and metadata files.Thank you so much for your help!
I'm still getting an error.
I have a directory called metaphlan_profiling and it contains 22 .txt outputs from metaphlan
I read in the metadata information (I'm not sure if this should be a data frame)
I think run the function
I get this error :