plyranges:: summarise() not recognizing previous summarized column (counts)
1
0
Entering edit mode
3 months ago
Alewa ▴ 130

I'm calculating proportions of cpgs methylated per chromosome but i get the error and not sure how to resolve it

> cpgr %>% group_by(seqnames) %>% summarise(cnts = n(), prop = cnts/seqlengths(cpgr))


this is the full code

library(BSgenome.Hsapiens.UCSC.hg19)
library(plyranges)

##############################
#get specific cpgs
##############################
chrs <- names(Hsapiens)[1:24] #get only
length_chrs <- seqlengths(Hsapiens)[1:24] #get lenghts of chroms of intrest

cgs <- lapply(chrs, function(x) start(matchPattern("CG", Hsapiens[[x]]))) #cpgs loci per chrm

cpgr <- do.call(c, lapply(1:24, function(x) GRanges(names(Hsapiens)[x], IRanges(cgs[[x]], width = 2)))) %>% as_granges()
seqlengths(cpgr) <- length_chrs #set chrom lengths from information from hg19

#calculate the proportions of cpgs per chrom
cpgr %>% group_by(seqnames) %>% summarise(cnts = n(), prop = cnts/seqlengths(cpgr)) #this fails

cpgr %>% group_by(seqnames) %>% summarise(cnts = n()) %>% mutate(chrom_lengths_ref = as.vector(unlist(length_chrs))) %>% summarise(prop = cnts/seqlengths(cpgr))  #this also fails

cpg R plyranges • 248 views
1
Entering edit mode
3 months ago
Alewa ▴ 130

solved, thanks!

#calculate the proportions of cpgs per chrom
counts_cpgr <- cpgr %>% group_by(seqnames) %>% summarise(cnts = n())
counts_cpgr %>% as.data.frame()  %>% mutate(chrom_lengths_ref = as.vector(unlist(length_chrs)), prop_cpg = cnts/chrom_lengths_ref)