|
library(GenomicRanges) |
|
library(matrixStats) |
|
|
|
ranges1 <- GRanges(seqnames = c("chr1", "chr2", "chr3"), |
|
ranges = IRanges(start=c(1,100,1000), |
|
end = c(10,150,5000))) |
|
|
|
ranges2 <- GRanges(seqnames = c("chr1", "chr2", "chr3"), |
|
ranges = IRanges(start=c(11,110,2000), |
|
end = c(20,130,3000))) |
|
|
|
GetPercentOverlap <- function(query, |
|
subject){ |
|
|
|
#/ find overlap positions |
|
ovlap <- findOverlaps(query, subject) |
|
|
|
#/ subset the input ranges to those ranges that have overlaps with the second ranges object |
|
r1 <- ranges(query[ovlap@from] ) |
|
r2 <- ranges(subject[ovlap@to]) |
|
|
|
#/ new GRanges object only with the overlapping intervals |
|
gr<- |
|
GRanges(seqnames = as.character(seqnames(query[ovlap@from])), |
|
ranges = IRanges(start = matrixStats::rowMaxs(as.matrix(data.frame(r1=start(r1), r2=start(r2)))), |
|
end = matrixStats::rowMins(as.matrix(data.frame(r1=end(r1), r2=end(r2)))))) |
|
|
|
mcols(gr)$percentOverlap <- 100 * width(gr) / width(r1) |
|
|
|
#/ add original query ranges |
|
ranges(gr) <- ranges(r1) |
|
|
|
#/ add subject coordinates to mcols(gr) |
|
mcols(gr)$subject <- r2 |
|
gr |
|
} |
|
|
Thanks @ATpoint.
For the minOverlap takes the number of bases you wish to consider as a valid overlap. So if you have regions of uniform length (say 1000bp) and you wish to consider the overlap with 500bp as significant overlap, one must set the value to 500L.It's your above-mentioned script that helped me reach this conclusion