I'm another wet-lab biologist who is diving into RNA-seq analysis in R. After a horrific learning curve I'm now getting better, but I need your help. I have RNA-seq data for bacteria-infected host cells at four time-points (2, 4, 6, 8 hpi), with uninfected controls at each time-point. I want DE genes between infected and uninfected samples at each time-point, but don't really care about comparisons between time-points. The problem is that I can only get 1-2 DEG, which I know is not correct. I suspect my problem is with my design matrix, but am not sure. I am wondering if someone could have a quick look at my (summarized) pipeline below and nudge me in the right direction?
Many thanks for your help.
# Import count matrix (after filtering of dodgy and low-read samples)
> counts <- read.csv(file = "counts.csv", sep = ",", header = TRUE, row.names = 1, stringsAsFactors = FALSE)
# Import meta table
> meta <- read.csv(file = "meta.csv", header = TRUE, stringsAsFactors = FALSE) > meta sampleName treatment time 2hpi_Infection_01 Infection_2 2hpi 2hpi_Infection_02 Infection_2 2hpi 2hpi_Infection_03 Infection_2 2hpi 2hpi_Mock_01 Mock_Infection_2 2hpi 2hpi_Mock_02 Mock_Infection_2 2hpi 2hpi_Mock_03 Mock_Infection_2 2hpi 4hpi_Infection_01 Infection_4 4hpi 4hpi_Infection_02 Infection_4 4hpi 4hpi_Infection_03 Infection_4 4hpi 4hpi_Mock_01 Mock_Infection_4 4hpi 4hpi_Mock_02 Mock_Infection_4 4hpi 4hpi_Mock_03 Mock_Infection_4 4hpi 6hpi_Infection_01 Infection_6 6hpi 6hpi_Infection_02 Infection_6 6hpi 6hpi_Infection_03 Infection_6 6hpi 6hpi_Mock_01 Mock_Infection_6 6hpi 6hpi_Mock_02 Mock_Infection_6 6hpi 6hpi_Mock_03 Mock_Infection_6 6hpi 8hpi_Infection_1 Infection_8 8hpi 8hpi_Infection_2 Infection_8 8hpi 8hpi_Infection_3 Infection_8 8hpi 8hpi_Mock_01 Mock_Infection_8 8hpi 8hpi_Mock_02 Mock_Infection_8 8hpi 8hpi_Mock_03 Mock_Infection_8 8hpi
# Create DGEList and perform TMM normalisation
> group <- as.factor(meta$treatment) > dge <- DGEList(counts, group = group) > dge$meta <- meta > dge <- calcNormFactors(dge)
# Create design matrix and contrasts. I'm not interested in comparisions between time-points so I
# separated everything out to make the comparisions simpler... (see below).
> design <- model.matrix(~ group + 0) > rownames(design) <- colnames(counts) > contrasts <- makeContrasts(Infected_vs_Mock_2 = groupInfection_2 - groupMock_Infection_2, Infected_vs_Mock_4 = groupCInfection_4 - groupMock_Infection_4, Infected_vs_Mock_6 = groupInfection_6 - groupMock_Infection_6, Infected_vs_Mock_8 = groupInfection_8 - groupMock_Infection_8, levels = design)
> y <- voomWithQualityWeights(counts = dge, design = design, normalize.method = "cyclicloess", plot = TRUE) > fit <- lmFit(y, design) > fit <- contrasts.fit(fit, contrasts) > fit <- eBayes(fit) > summary(decideTests(fit))
# Print list of differentially expressed genes
> top_2 <- topTable(fit, coef = "Infected_vs_Mock_2", adjust = "fdr", sort.by = "P", lfc = 2, p.value = 0.05, number = Inf) > top_4 <- topTable(fit, coef = "Infected_vs_Mock_4", adjust = "fdr", sort.by = "P", lfc = 2, p.value = 0.05, number = Inf) > top_6 <- topTable(fit, coef = "Infected_vs_Mock_6", adjust = "fdr", sort.by = "P", lfc = 2, p.value = 0.05, number = Inf) > top_8 <- topTable(fit, coef = "Infected_vs_Mock_8", adjust = "fdr", sort.by = "P", lfc = 2, p.value = 0.05, number = Inf)