Question

Limma differential expression analysis across subtypes

0

Entering edit mode

10 months ago

melissachua90 ▴ 60

I want to retrieve the ANOVA-based differential expression between KIRP1a, KIRP1b, KIRP1c, KIRP2a, KIRP2b, and KIRP2c groups to identify biomarkers for each group.

My code below returned deg that are statistically significant but my heatmap (z-scaled) did not show any observable differences across the subtypes.

Is there an issue with limma code?

group <- factor(clin.info$subtype, levels=c("1a", "1b", "1c", "2a", "2b", "2c"))
design <- model.matrix(~group)
rownames(design) <- rownames(clin.info)
colnames(design) <- c("KIRP1a", "KIRP1b", "KIRP1c", "KIRP2a", "KIRP2b", "KIRP2c")

mrna.dge <- DGEList(counts=mat, group=group, remove.zeros=T)
keep <- filterByExpr(mrna.dge, design, min.count=50)
mrna.dge <- mrna.dge[keep,,keep.lib.sizes=FALSE]
mrna.dge <- calcNormFactors(mrna.dge)

v <- voom(mrna.dge, design, plot=TRUE)
fit <- lmFit(v, design)
contrasts.matrix <- makeContrasts("KIRP1a-KIRP1b", "KIRP1a-KIRP1c", "KIRP1a-KIRP2a", "KIRP1a-KIRP2b", "KIRP1a-KIRP2c", "KIRP1b-KIRP1c", "KIRP1b-KIRP2a", "KIRP1b-KIRP2b", "KIRP1b-KIRP2c", "KIRP1c-KIRP2a", "KIRP1c-KIRP2b", "KIRP1c-KIRP2c", "KIRP2a-KIRP2b", "KIRP2a-KIRP2c", "KIRP2b-KIRP2c", levels=colnames(design))
fit2 <- contrasts.fit(fit, contrasts.matrix)
efit <- eBayes(fit2, trend=TRUE)

top.ranked.mrna <- topTable(efit, adjust.method="BH", p.value=0.05, number=Inf)

deg <- v$E[rownames(v$E) %in% rownames(top.ranked.mrna),]
deg <- deg[order(match(rownames(deg),rownames(top.ranked.mrna))),]

Input:

dput(mat[1:20,1:20])
structure(c(2583, 1360, 1722, 2590, 4478, 1564, 463, 2347, 2467, 
7020.91, 3227, 4716, 5240, 2279.04, 2240, 2618, 13148, 2202, 
6013, 1524, 4720, 2013, 2615, 2562, 4518, 1300, 636, 2504, 1937, 
2328.95, 5165, 6354, 4381, 957.21, 3274, 1999, 1467, 10893, 1729, 
1184, 2768, 2388, 3338, 2715, 5936, 2405, 1010, 4546, 3300, 1941, 
7047, 7309, 58, 1509.53, 5776, 2627, 26281, 4210, 2979, 772, 
1689, 1366, 1407, 3010, 4139, 672, 333, 1772, 872, 2005, 1704, 
4440, 1690, 914.06, 1779, 1266, 4715, 1801, 1760, 1228, 3618, 
1928, 2144, 3096, 4825, 1257, 1701, 2586, 4340, 2491.91, 2022, 
3102, 4289, 875.24, 2096, 1918, 1443, 2344, 2843, 4531, 2936, 
1338, 1957, 1180, 5419, 785, 1685, 2939, 1220, 1251.95, 8810, 
4346, 4017, 2240.73, 3202, 1269, 5575, 3407, 1276, 306, 2889, 
2877, 4046, 10246, 10136, 3848.9, 1638, 3368, 4955, 6752.8, 2340, 
2882, 8117, 11895.08, 3060, 8352, 42026, 5959, 2547, 11283, 3441, 
1990, 2177, 3708, 4412, 2445, 1296, 2567, 7109, 2101.96, 3357, 
3033, 23338, 773.54, 2634, 1730, 4076, 12475, 440, 3734, 2354, 
1971, 1954, 3901, 7452, 1621, 1663, 6607, 7042, 2396.97, 2750, 
8182, 32687, 1392.2, 1825, 5934, 5520, 4346, 4304, 3562, 9455, 
3503, 2853, 3180, 7491, 2593, 1132, 3667, 1569, 1982.91, 9183, 
11425, 101, 2704.9, 2063, 4284, 83201, 3977, 1541, 41, 1495, 
2128, 2410, 2823, 4972, 1885, 498, 2198, 1382, 2375.96, 2041, 
2392, 671, 2106.5, 2228, 3549, 1022, 3825, 1466, 530, 1776, 539, 
1314, 1323, 1405, 324, 617, 1799, 267, 607, 508, 3166, 1316, 
609, 1334, 977, 543, 1114, 698, 211, 5089, 1167, 1984, 2649, 
4904, 1999, 1415, 3950, 1005, 2335, 4258, 2551, 5704, 1262.89, 
2473, 1704, 13564, 5657, 1941, 1031, 409, 1626, 2055, 1867, 9169, 
2193, 1051, 5418, 10556, 1471.95, 1919, 2634, 2855, 2077.01, 
3088, 2278, 29390, 5413, 906, 4104, 1908, 1086, 1769, 2562, 5130, 
1483, 340, 2856, 1447, 5589.95, 2492, 3121, 2546, 1867.33, 1807, 
4363, 6303, 1725, 3403, 115, 1980, 3042, 2040, 6031, 8613, 2544, 
2492, 3244, 14574, 5972.64, 4345, 6637, 15937, 6983.94, 3255, 
1764, 20080, 3549, 948, 4593, 3357, 899, 1182, 2393, 3589, 1003, 
3797, 2896, 7163, 1927.93, 2049, 2486, 12389, 1235.82, 2010, 
2006, 1026, 2592, 2966, 5589, 3749, 2139, 2756, 5537, 5421, 2216, 
1638, 3535, 4765, 2116, 2186, 7265, 4608, 3230.19, 4147, 4101, 
5052, 6825, 1134, 2077, 1198, 1942, 3534, 4988, 6122, 4154, 3954, 
3603, 47622, 6097.76, 7273, 11667, 33619, 3647.39, 4561, 4815, 
23369, 12155, 1497, 3239, 4801, 1821, 1992, 5675, 6114, 2866, 
631, 2112, 1125, 7291.51, 7144, 13832, 12439, 730.17, 2610, 6179, 
9147, 13289, 2301, 7482), dim = c(20L, 20L), dimnames = list(
    c("A4GALT", "AACS", "AAGAB", "AAK1", "AAMP", "AASDHPPT", 
    "AASS", "AATF", "ABAT", "ABCA1", "ABCA2", "ABCA3", "ABCB1", 
    "ABCB6", "ABCB8", "ABCC1", "ABCC3", "ABCC4", "ABCC5", "ABCC6"
    ), c("TCGA.2K.A9WE.01", "TCGA.2Z.A9J1.01", "TCGA.2Z.A9J3.01", 
    "TCGA.2Z.A9J5.01", "TCGA.2Z.A9J6.01", "TCGA.2Z.A9J7.01", 
    "TCGA.2Z.A9J8.01", "TCGA.2Z.A9JD.01", "TCGA.2Z.A9JI.01", 
    "TCGA.2Z.A9JJ.01", "TCGA.2Z.A9JO.01", "TCGA.2Z.A9JQ.01", 
    "TCGA.4A.A93W.01", "TCGA.4A.A93X.01", "TCGA.4A.A93Y.01", 
    "TCGA.5P.A9JU.01", "TCGA.5P.A9JY.01", "TCGA.5P.A9KE.01", 
    "TCGA.A4.7288.01", "TCGA.A4.7583.01")))

dput(clin.info[,c("survival", "subtype")][1:20,])
structure(list(survival = c("lts", "lts", "lts", "lts", "lts", 
"lts", "lts", "lts", "lts", "lts", "lts", "lts", "lts", "lts", 
"lts", "non-lts", "lts", "lts", "lts", "lts"), subtype = c("2a", 
"1a", "1b", "1a", "1a", "1b", "2b", "2b", "2a", "1a", "2b", "1b", 
"1c", "2b", "2a", "2b", "1b", "2b", "2b", "1c")), row.names = c("TCGA.Y8.A8S1.01", 
"TCGA.Y8.A8S0.01", "TCGA.Y8.A8RZ.01", "TCGA.Y8.A8RY.01", "TCGA.Y8.A897.01", 
"TCGA.Y8.A896.01", "TCGA.Y8.A895.01", "TCGA.Y8.A894.01", "TCGA.WN.A9G9.01", 
"TCGA.V9.A7HT.01", "TCGA.UZ.A9Q1.01", "TCGA.UZ.A9Q0.01", "TCGA.UZ.A9PZ.01", 
"TCGA.UZ.A9PX.01", "TCGA.UZ.A9PV.01", "TCGA.UZ.A9PU.01", "TCGA.UZ.A9PS.01", 
"TCGA.UZ.A9PR.01", "TCGA.UZ.A9PP.01", "TCGA.UZ.A9PO.01"), class = "data.frame")

Limma output:

dput(top.ranked.mrna[1:10,])
structure(list(KIRP1a.KIRP1b = c(9.42405436283387, 7.29269231668844, 
8.03279697482522, 12.5001340476173, 9.0302871816407, 9.54742710274648, 
9.35606346377186, 8.06371195257238, 8.63419020629816, 7.47291627197918
), KIRP1a.KIRP1c = c(9.07802598184819, 7.41361282474248, 8.09071496745167, 
12.8736462678662, 8.80429972978358, 9.62195920637429, 9.69510203580979, 
8.24340426901214, 8.33141631803679, 7.48359160149223), KIRP1a.KIRP2a = c(9.12834408727055, 
7.50839449402164, 8.11386423736527, 12.4131030012162, 8.94927134233316, 
9.91067610512489, 9.46882490230897, 8.26136665592561, 8.35812659426814, 
7.66679317178358), KIRP1a.KIRP2b = c(9.06187311630055, 7.50765539090834, 
8.04214262996268, 12.7679191907113, 8.72555669327813, 9.94497527304781, 
9.53158560062783, 8.22551202099074, 8.13776410561987, 7.65052440627734
), KIRP1a.KIRP2c = c(8.91582557125284, 7.16956497018708, 7.77463425904563, 
12.8256782134105, 8.48504489384871, 9.77833159509732, 9.3617374140406, 
7.88867557505257, 7.85589557068256, 7.23014255475153), KIRP1b.KIRP1c = c(-0.346028380985675, 
0.120920508054047, 0.0579179926264444, 0.373512220248984, -0.225987451857122, 
0.0745321036278108, 0.339038572037933, 0.17969231643976, -0.302773888261363, 
0.0106753295130493), KIRP1b.KIRP2a = c(-0.295710275563315, 0.2157021773332, 
0.0810672625400497, -0.0870310464010746, -0.0810158393075476, 
0.363249002378413, 0.112761438537107, 0.197654703353227, -0.276063612030011, 
0.193876899804405), KIRP1b.KIRP2b = c(-0.362181246533318, 0.214963074219906, 
0.00934565513745401, 0.267785143093997, -0.304730488362573, 0.39754817030133, 
0.175522136855975, 0.161800068418357, -0.49642610067829, 0.177608134298163
), KIRP1b.KIRP2c = c(-0.508228791581026, -0.123127346501352, 
-0.258162715779598, 0.325544165793255, -0.545242287791997, 0.230904492350845, 
0.00567395026874198, -0.175036377519812, -0.778294635615595, 
-0.242773717227644), KIRP1c.KIRP2a = c(0.0503181054223593, 0.0947816692791525, 
0.0231492699136053, -0.460543266650059, 0.144971612549574, 0.288716898750602, 
-0.226277133500826, 0.0179623869134667, 0.0267102762313524, 0.183201570291355
), KIRP1c.KIRP2b = c(-0.0161528655476436, 0.0940425661658589, 
-0.0485723374889904, -0.105727077154987, -0.0787430365054509, 
0.323016066673519, -0.163516435181958, -0.0178922480214035, -0.193652212416926, 
0.166932804785113), KIRP1c.KIRP2c = c(-0.162200410595352, -0.244047854555399, 
-0.316080708406043, -0.0479680544557286, -0.319254835934875, 
0.156372388723035, -0.333364621769191, -0.354728693959573, -0.475520747354232, 
-0.253449046740693), KIRP2a.KIRP2b = c(-0.0664709709700029, -0.000739103113293629, 
-0.0717216074025957, 0.354816189495071, -0.223714649055025, 0.0342991679229167, 
0.0627606983188675, -0.0358546349348702, -0.220362488648279, 
-0.0162687655062419), KIRP2a.KIRP2c = c(-0.212518516017711, -0.338829523834552, 
-0.339229978319648, 0.41257521219433, -0.46422644848445, -0.132344510027568, 
-0.107087488268365, -0.372691080873039, -0.502231023585584, -0.436650617032048
), KIRP2b.KIRP2c = c(-0.146047545047708, -0.338090420721258, 
-0.267508370917052, 0.0577590226992585, -0.240511799429424, -0.166643677950484, 
-0.169848186587233, -0.336836445938169, -0.281868534937305, -0.420381851525806
), AveExpr = c(9.22388080135039, 7.41248431865506, 8.09276283396573, 
12.5851196377048, 8.9529137715443, 9.56456171475202, 9.47291271457627, 
8.35545167732154, 8.47121673307697, 7.5301125434927), F = c(3129.04299874973, 
2425.24335168622, 2336.21949550567, 2319.57416953256, 2234.9189889757, 
1974.76306004417, 1942.33446730215, 1924.09013396044, 1882.82748498747, 
1852.45090891657), P.Value = c(5.64642759477374e-188, 4.71742939207491e-177, 
1.87577900641969e-175, 3.79213023968196e-175, 1.47153620280131e-173, 
2.81110986605728e-168, 1.42721731315767e-167, 3.60211263264752e-167, 
3.01887496197671e-166, 1.48694306822952e-165), adj.P.Val = c(3.10271196332817e-184, 
1.29611372547258e-173, 3.43580188009206e-172, 5.20943891676309e-172, 
1.61721828687864e-170, 2.57450811899746e-165, 1.12036559082877e-164, 
2.47420111454976e-164, 1.84319087956245e-163, 7.9894313597801e-163
)), row.names = c("HNRNPK", "TARDBP", "HNRNPL", "ACTB", "HNRNPU", 
"HNRNPA2B1", "ARF1", "PTBP1", "NONO", "U2AF2"), class = "data.frame")

Heatmap:

enter image description here

differential-expression limma • 672 views

ADD COMMENT • link 10 months ago by melissachua90 ▴ 60

score 0 · Answer 1 · 2023-06-23

0

Entering edit mode

10 months ago

ATpoint 82k

Same underlying problem as here: Limma returned only positive logFC values

You're using code that cannot possibly run, so whatever comes out must be nonsense or based on old variables still in the environment.

group <- rep(c("KIRP1a", "KIRP1b", "KIRP1c", "KIRP2a", "KIRP2b", "KIRP2c"), each=2)
design <- model.matrix(~group)

# Will error because KIRP1a is the reference so this becomes the intercept
makeContrasts("KIRP1a-KIRP1b", "KIRP1a-KIRP1c", "KIRP1a-KIRP2a", "KIRP1a-KIRP2b", "KIRP1a-KIRP2c", "KIRP1b-KIRP1c", "KIRP1b-KIRP2a", "KIRP1b-KIRP2b", "KIRP1b-KIRP2c", "KIRP1c-KIRP2a", "KIRP1c-KIRP2b", "KIRP1c-KIRP2c", "KIRP2a-KIRP2b", "KIRP2a-KIRP2c", "KIRP2b-KIRP2c", levels=colnames(design))

With your design the makeContrasts() function will impossibly run. Also, making contrasts for pairwise analysis, but then not using them and going for ANOVA-like stats makes no sense.

Two options:

1) Use pairwise contrasts: That requires ~0+group as design, you make your makeContrasts() as in above code, and then extract the individual contrasts one by one from using topTable(). That will give one data.frame with results per contrast. You could extract DEGs from each object and combine them, make a heatmap with these.

2) Use ANOVA-like testing. For this you can do ~group, but remove the contrast.fit() part. Then you can use topTable without specifying a contrast or coef, so it gives these ANOVA-like testing results.

All this is covered in https://bioconductor.org/packages/release/bioc/vignettes/limma/inst/doc/usersguide.pdf -- please read the limma-voom part. I am not sure trend=TRUE makes a lot of sense for voom, it's at least not standard.

Also, don't use v$E for anything, there is no part in the vignette that recommends that. Instead, use edgeR::cpm(dge, log=TRUE) to make heatmaps after converting to Z-score, see for example: Scaling RNA-Seq data before clustering?

ADD COMMENT • link 10 months ago by ATpoint 82k

0

Entering edit mode

ATpoint

For option 1, what do you mean by extracting DEGs from each object and combining them? Would the code below make sense? Thank you!

mrna.dge <- DGEList(counts=mat, group=group, remove.zeros=T)
keep <- filterByExpr(mrna.dge, design, min.count=50)
mrna.dge <- mrna.dge[keep,,keep.lib.sizes=FALSE]
mrna.dge <- calcNormFactors(mrna.dge)

v <- voom(mrna.dge, design, plot=TRUE)
fit <- lmFit(v, design)
contrasts.matrix <- makeContrasts("KIRP1a-KIRP1b", "KIRP1a-KIRP1c", "KIRP1a-KIRP2a", "KIRP1a-KIRP2b", "KIRP1a-KIRP2c",
                                  "KIRP1b-KIRP1c", "KIRP1b-KIRP2a", "KIRP1b-KIRP2b", "KIRP1b-KIRP2c",
                                  "KIRP1c-KIRP2a", "KIRP1c-KIRP2b", "KIRP1c-KIRP2c",
                                  "KIRP2a-KIRP2b", "KIRP2a-KIRP2c",
                                  "KIRP2b-KIRP2c",
                                  levels=colnames(design))
fit2 <- contrasts.fit(fit, contrasts.matrix)
efit <- eBayes(fit2)

DEGs for each contrast:

KIRP1a.v.KIRP1b.DEG <- topTable(efit, coef=1, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1a.v.KIRP1c.DEG <- topTable(efit, coef=2, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1a.v.KIRP2a.DEG <- topTable(efit, coef=3, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1a.v.KIRP2b.DEG <- topTable(efit, coef=4, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1a.v.KIRP2c.DEG <- topTable(efit, coef=5, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1b.v.KIRP1c.DEG <- topTable(efit, coef=6, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1b.v.KIRP2a.DEG <- topTable(efit, coef=7, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1b.v.KIRP2b.DEG <- topTable(efit, coef=8, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1b.v.KIRP2c.DEG <- topTable(efit, coef=9, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1c.v.KIRP2a.DEG <- topTable(efit, coef=10, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1c.v.KIRP2b.DEG <- topTable(efit, coef=11, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP1c.v.KIRP2c.DEG <- topTable(efit, coef=12, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP2a.v.KIRP2b.DEG <- topTable(efit, coef=13, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP2a.v.KIRP2c.DEG <- topTable(efit, coef=14, adjust.method="BH", sort.by="p", p.value=0.05)
KIRP2b.v.KIRP2c.DEG <- topTable(efit, coef=15, adjust.method="BH", sort.by="p", p.value=0.05)

Combine the DEGs and reorder them:

top.ranked.mrna <- rbind(KIRP1a.v.KIRP1b.DEG, KIRP1a.v.KIRP1c.DEG, KIRP1a.v.KIRP2a.DEG, KIRP1a.v.KIRP2b.DEG, KIRP1a.v.KIRP2c.DEG,
                         KIRP1b.v.KIRP1c.DEG, KIRP1b.v.KIRP2a.DEG, KIRP1b.v.KIRP2b.DEG, KIRP1b.v.KIRP2c.DEG,
                         KIRP1c.v.KIRP2a.DEG, KIRP1c.v.KIRP2b.DEG, KIRP1c.v.KIRP2c.DEG, 
                         KIRP2a.v.KIRP2b.DEG, KIRP2a.v.KIRP2c.DEG)
top.ranked.mrna.ordered <- top.ranked.mrna[order(top.ranked.mrna$adj.P.Val),]

Split up/down regulated DEGs:

# Upregulated DEGs
deg.up <- top.ranked.mrna[top.ranked.mrna$adj.P.Val < 0.05 & top.ranked.mrna$logFC > 0, ]
deg.up <- deg.up[rownames(deg.up) %in% rownames(mat),]
deg.up <- deg.up[order(deg.up$adj.P.Val),]
deg.up.top <- deg.up[1:10,]

# Downregulated DEGs
deg.down <- top.ranked.mrna[top.ranked.mrna$adj.P.Val < 0.05 & top.ranked.mrna$logFC < 0, ]
deg.down <- deg.down[rownames(deg.down) %in% rownames(mat),]
deg.down <- deg.down[order(deg.down$adj.P.Val),]
deg.down.top <- deg.down[1:10,]

# All DEGs 
all.deg <- rbind(deg.up, deg.down)
all.deg <- mat[rownames(mat) %in% rownames(all.deg),]
all.deg <- cpm(all.deg, log = TRUE)[rownames(all.deg) %in% rownames(top.ranked.mrna.ordered),]
all.deg <- all.deg[order(match(rownames(all.deg),rownames(top.ranked.mrna.ordered))),]

# Subset of DEGs for heatmap
top.deg <- rbind(deg.up.top[1:20,], deg.down.top[1:20,])
top.deg <- all.deg[rownames(all.deg) %in% rownames(top.deg),]

Normalize for heatmap:

norm.deg <- zscore(top.deg, dist="chisq")

ADD REPLY • link 10 months ago by melissachua90 ▴ 60