Hello,
I am currently trying to execute a paired analysis using edgeR. My data has the read counts for ten paired samples with only two possible treatments--having a disease or not having a disease. I am looking to compare individual samples to one another and calculate differential expression. For example, 1 to 1, 2 to 2, 3 to 3, etc. I based my script on example 4.1 in the edgeR user guide. However, when I execute the script as it is currently, I am getting strange FDR values that are not consistent with what I would expect. I have attached the script below, any help or advice on how to improve the script or make more meaningful comparisons would be greatly appreciated.
Thank you in advance
library(edgeR)
library(statmod)
library(readxl)
library(BiocManager)
countdata <- read.delim("Desktop/Counts.txt", check.names=FALSE, stringsAsFactors=FALSE)
head(countdata)
y <- DGEList(counts=countdata[,2:21], genes=countdata[,1])
o <- order(rowSums(y$counts), decreasing=TRUE)
y <- y[o,]
d <- duplicated(y$genes)
y <- y[!d,]
nrow(y)
keep <- filterByExpr(y)
table(keep)
y<- y[keep, ,keep.lib.sizes=FALSE]
nrow(y)
y$samples$lib.size <- colSums(y$counts)
row.names(y$counts)<-row.names(y$genes)
y <- calcNormFactors(y)
y$samples
plotMDS(y)
Patient <- factor(c(1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10))
Treatment <- factor(c("T","C","T","C","T","C","T","C","T","C","T","C","T","C","T","C","T","C","T","C"))
data.frame(Sample=colnames(y),Patient,Treatment)
design <- model.matrix(~Patient+Treatment)
rownames(design) <- colnames(y)
design
y <- estimateDisp(y, design, robust=TRUE)
y$common.dispersion
fit <- glmFit(y, design)
lrt <- glmLRT(fit)
TopTags(lrt) summary(decideTests(lrt))