Entering edit mode
7.4 years ago
emblake
▴
90
I have analyzed the following data and have a few questions:
DonorID Group
1 Donor1 T0
2 Donor1 IL2
3 Donor1 IL2.ZA
4 Donor1 IL2.OKT3
5 Donor1 IL2.OKT3.ZA
6 Donor2 T0
7 Donor2 IL2
8 Donor2 IL2.ZA
9 Donor2 IL2.OKT3
10 Donor2 IL2.OKT3.ZA
11 Donor3 T0
12 Donor3 IL2
13 Donor3 IL2.ZA
14 Donor3 IL2.OKT3
15 Donor3 IL2.OKT3.ZA
16 Donor4 T0
17 Donor4 IL2
18 Donor4 IL2.ZA
19 Donor4 IL2.OKT3
20 Donor4 IL2.OKT3.ZA
21 Donor5 T0
22 Donor5 IL2
23 Donor5 IL2.ZA
24 Donor5 IL2.OKT3
25 Donor5 IL2.OKT3.ZA
26 Donor6 T0
27 Donor6 IL2
28 Donor6 IL2.ZA
29 Donor6 IL2.OKT3
30 Donor6 IL2.OKT3.ZA
>read.counts <- read.table("gene_counts_sorted.txt", header=TRUE)
>row.names(read.counts) <- read.counts$Geneid
>read.counts <- read.counts[ , -c(1:6)]
>colnames(read.counts) <- substr(colnames(read.counts),start=1,stop=4)
>head(read.counts)
SM01 SM02 SM03 SM04 SM05 SM06 SM07 SM08 SM09 SM10 SM11
ENSG00000210049 3 0 0 0 0 3 2 0 1 0 1
ENSG00000211459 22691 12565 13539 12681 11088 19204 13729 9507 15937 8022 13655
ENSG00000210077 1 6 5 4 4 2 6 3 3 1 3
ENSG00000210082 110091 88215 94014 75575 62912 99244 105988 75145 128197 73856 87062
ENSG00000276345 0 7 1 2 0 45 47 40 68 35 2
ENSG00000209082 29 91 66 79 44 29 83 48 91 56 29
SM12 SM13 SM14 SM15 SM16 SM17 SM18 SM19 SM20 SM21 SM22
ENSG00000210049 0 0 0 0 2 1 1 0 0 1 2
ENSG00000211459 11671 16176 11632 10562 20922 10334 9286 10838 7985 23120 6394
ENSG00000210077 4 3 3 2 3 6 4 3 0 4 2
ENSG00000210082 82396 139216 99996 87296 130829 61860 55904 89196 60536 146520 42290
ENSG00000276345 2 1 0 0 41 23 26 41 29 1 0
ENSG00000209082 67 73 39 40 47 36 45 60 44 45 50
SM23 SM24 SM25 SM26 SM27 SM28 SM29 SM30
ENSG00000210049 1 0 0 4 0 0 2 2
ENSG00000211459 9203 10932 13105 34180 12067 10515 10435 14079
ENSG00000210077 3 1 7 4 7 5 3 3
ENSG00000210082 57311 85217 103404 188883 106017 94485 93028 111348
ENSG00000276345 1 0 3 1 2 0 3 0
ENSG00000209082 53 73 118 49 116 77 77 100
>sample.type <- factor(c ("T0", "IL2", "IL2ZA", "IL2OKT3", "IL2OKT3ZA","T0", "IL2", "IL2ZA", "IL2OKT3", "IL2OKT3ZA","T0", "IL2", "IL2ZA", "IL2OKT3", "IL2OKT3ZA","T0", "IL2", "IL2ZA", "IL2OKT3", "IL2OKT3ZA","T0", "IL2", "IL2ZA", "IL2OKT3", "IL2OKT3ZA", "T0", "IL2", "IL2ZA", "IL2OKT3", "IL2OKT3ZA"))
>sample.type <- relevel(sample.type, "T0")
>donor <- factor(rep(c("Donor1","Donor2","Donor3","Donor4","Donor5","Donor6"),c(5,5,5,5,5,5)))
>y <- DGEList(counts = read.counts , group = sample.type)
>keep <- rowSums( cpm(y) > 1) >= 6
>y <- y[keep ,]
>y$samples$lib.size <- colSums(y$counts)
>y <- calcNormFactors(y , method = "TMM")
>plotMDS(y, method="bcv", col=as.numeric(y$samples$group))
>legend("center", legend=levels(y$samples$group))
>design <- model.matrix(~group+donor, data=y$samples)
>colnames(design) <- sub("^Group", "", colnames(design))
>design
(Intercept) groupIL2 groupIL2OKT3 groupIL2OKT3ZA groupIL2ZA donorDonor2 donorDonor3
SM01 1 0 0 0 0 0 0
SM02 1 1 0 0 0 0 0
SM03 1 0 0 0 1 0 0
SM04 1 0 1 0 0 0 0
SM05 1 0 0 1 0 0 0
SM06 1 0 0 0 0 1 0
SM07 1 1 0 0 0 1 0
SM08 1 0 0 0 1 1 0
SM09 1 0 1 0 0 1 0
SM10 1 0 0 1 0 1 0
SM11 1 0 0 0 0 0 1
SM12 1 1 0 0 0 0 1
SM13 1 0 0 0 1 0 1
SM14 1 0 1 0 0 0 1
SM15 1 0 0 1 0 0 1
SM16 1 0 0 0 0 0 0
SM17 1 1 0 0 0 0 0
SM18 1 0 0 0 1 0 0
SM19 1 0 1 0 0 0 0
SM20 1 0 0 1 0 0 0
SM21 1 0 0 0 0 0 0
SM22 1 1 0 0 0 0 0
SM23 1 0 0 0 1 0 0
SM24 1 0 1 0 0 0 0
SM25 1 0 0 1 0 0 0
SM26 1 0 0 0 0 0 0
SM27 1 1 0 0 0 0 0
SM28 1 0 0 0 1 0 0
SM29 1 0 1 0 0 0 0
SM30 1 0 0 1 0 0 0
donorDonor4 donorDonor5 donorDonor6
SM01 0 0 0
SM02 0 0 0
SM03 0 0 0
SM04 0 0 0
SM05 0 0 0
SM06 0 0 0
SM07 0 0 0
SM08 0 0 0
SM09 0 0 0
SM10 0 0 0
SM11 0 0 0
SM12 0 0 0
SM13 0 0 0
SM14 0 0 0
SM15 0 0 0
SM16 1 0 0
SM17 1 0 0
SM18 1 0 0
SM19 1 0 0
SM20 1 0 0
SM21 0 1 0
SM22 0 1 0
SM23 0 1 0
SM24 0 1 0
SM25 0 1 0
SM26 0 0 1
SM27 0 0 1
SM28 0 0 1
SM29 0 0 1
SM30 0 0 1
attr(,"assign")
[1] 0 1 1 1 1 2 2 2 2 2
attr(,"contrasts")
attr(,"contrasts")$group
[1] "contr.treatment"
attr(,"contrasts")$donor
[1] "contr.treatment"
>y <- estimateDisp(y, design, robust=TRUE)
>fit <- glmQLFit(y, design, robust=TRUE)
>my.contrasts <- makeContrasts(IL2OKT3vsIL2=groupIL2OKT3-groupIL2, IL2ZAvsIL2=groupIL2ZA-groupIL2, IL2OKT3ZAvsIL2OKT3=groupIL2OKT3ZA-groupIL2OKT3, IL2OKT3ZAvsIL2ZA=groupIL2OKT3ZA-groupIL2ZA, levels=design)
>res.IL2OKT3vsIL2 <- glmQLFTest(fit, contrast=my.contrasts[,"IL2OKT3vsIL2"])
>topTags(res.IL2OKT3vsIL2)
Coefficient: -1*groupIL2 1*groupIL2OKT3
logFC logCPM F PValue FDR
ENSG00000179388 9.4249435 1.00999138 132.28175 1.871660e-10 3.024040e-06
ENSG00000122877 5.7056899 1.69505046 86.08397 6.873421e-10 4.723965e-06
ENSG00000164400 5.2392202 0.01694888 73.55545 8.771365e-10 4.723965e-06
ENSG00000100600 1.2819733 4.82944185 69.21670 3.535977e-09 1.428270e-05
ENSG00000123358 3.8735521 2.08762833 67.99143 8.273365e-09 2.673455e-05
ENSG00000102096 0.7811110 9.04755438 49.25912 1.121108e-07 2.773154e-04
ENSG00000171777 1.2141784 3.72067108 46.33382 1.201466e-07 2.773154e-04
ENSG00000184489 0.8327849 4.06781417 41.09405 3.398334e-07 6.863361e-04
ENSG00000211899 1.4237289 3.31708910 37.53086 1.131164e-06 2.030692e-03
ENSG00000119508 4.8508194 0.48975178 36.55454 1.600683e-06 2.586224e-03
>IL2OKT3vsIL2_results <- topTags(res.IL2OKT3vsIL2, n=Inf)
>write.csv(IL2OKT3vsIL2_results, file="IL2OKT3vsIL2_results.csv")
>IL2OKT3vsIL2_results <- read.csv("~/path/to/IL2OKT3vsIL2_results.csv")
>colnames(IL2OKT3vsIL2_results)
[1] "X" "logFC" "logCPM" "F" "PValue" "FDR"
>names(IL2OKT3vsIL2_results) <- c("GeneID", "logFC", "logCPM", "F", "PValue", "FDR")
>tr.IL2OKT3vsIL2 <- glmTreat(fit, contrast=my.contrasts[,"IL2OKT3vsIL2"], lfc=log2(1.5))
>library(biomaRt)
>ensemblLatest <- useMart("ensembl")
>ensembl_86 <- useMart(biomart="ENSEMBL_MART_ENSEMBL", dataset = "hsapiens_gene_ensembl")
>flt <- listFilters(ensembl_86)
>annotation <- getBM(attributes = c("ensembl_gene_id", "hgnc_symbol", "description"), filters = "ensembl_gene_id", values = IL2OKT3vsIL2_results[,1], mart = ensembl_86)
>IL2OKT3vsIL2_ann_results <- merge(IL2OKT3vsIL2_results, annotation, by.x=1, by.y=1, sort=FALSE)
>logCPM <- cpm(y,prior.count = 2,log = TRUE)
>o <- order(tr.IL2OKT3vsIL2$table$PValue)
>logCPM <- logCPM[o[1:30],]
>logCPM <- t(scale(t(logCPM)))
>library(gplots)
>col.pan <- colorpanel(100,"blue","white","red")
>heatmap.2(logCPM,col=col.pan,Rowv = TRUE,scale = "none",trace = "none",dendrogram = "both",cexRow = 1, cexCol = 1.4,margin = c(10,9), lhei = c(2,10),lwid = c(2,6))
Before I go too far in the analysis comparing the remaining treatment groups, please see the following questions:
- In the MDS plot, how do I set the legend colors to match the sample colors? I'm not sure how to plot each point so that donor and treatment are properly identified.
- Is the design matrix properly setup to compare treatment groups while removing the baseline control ("T0" samples) and accounting for donor effect?
design <- model.matrix(~group+donor, data=y$samples)
andmy.contrasts <- makeContrasts(IL2OKT3vsIL2=groupIL2OKT3-groupIL2, IL2ZAvsIL2=groupIL2ZA-groupIL2, IL2OKT3ZAvsIL2OKT3=groupIL2OKT3ZA-groupIL2OKT3, IL2OKT3ZAvsIL2ZA=groupIL2OKT3ZA-groupIL2ZA, levels=design)
. I'm not interested in DE between donors, only treatments. - In the heatmap, how do I rename the rows and supply a legend to identify the donor?
Thanks in advance!!