Question: DESeq2 unpaired sample RNAseq analysis
0
gravatar for Adeler001
10 weeks ago by
Adeler0010
Adeler0010 wrote:

Hello I'm new to coding on Rstudio.

I'm doing a RNA seq analysis to test for differential gene expression using deseq2, by using unpaired samples. I just wanted to know if I altered the following script template correctly to indicate the following :

  1. the analysis will be using unpaired samples
  2. the order of variable comparison will be affected versus unaffected

please see below the R studio script I used:

 # Import count table
countdata <- read.table("family_13_01_revised_RNA-seq.counts_fixed.txt", header=TRUE, row.names=1)

# Remove .bam or .sam from filenames
colnames(countdata) <- gsub("\\.[sb]am$", "", colnames(countdata))

# Convert to matrix
countdata <- as.matrix(countdata)
head(countdata)

# Assign condition (affected versus unaffected)
condition <- factor(c("affected","affected","affected","unaffected","unaffected","unaffected"),levels=c("affected","unaffected"))
condition <- relevel(condition, ref = "unaffected")

#load DESeq2#
library(DESeq2)

# Create a coldata frame and instantiate the DESeqDataSet
coldata <- data.frame(row.names=colnames(countdata),condition)

dds <- DESeqDataSetFromMatrix(countData=countdata, colData=coldata, design = ~ condition)
dds

#pre-filtering to keep only rows that have at least 1 reads total
keep <- rowSums(counts(dds)) > 1
dds <- dds[keep,]

# Run the DESeq
dds <- DESeq(dds)

# Regularized log transformation for clustering/heatmaps
rld <- rlogTransformation(dds)
head(assay(rld))
hist(assay(rld))

# Colors for plots below
library(RColorBrewer)
(mycols <- brewer.pal(8, "Dark2")[1:length(unique(condition))])

# Sample distance heatmap
sampleDists <- as.matrix(dist(t(assay(rld))))
library(gplots)
png("qc-heatmap_baker.png", w=1000, h=1000, pointsize=20)
heatmap.2(as.matrix(sampleDists), key=F, trace="none",
          col=colorpanel(100, "black", "white"),
          ColSideColors=mycols[condition], RowSideColors=mycols[condition],
          margin=c(10, 10), main="Sample Distance Matrix")
dev.off()

# Principal components analysis
rld_pca <- function (rld, intgroup = "condition", ntop = 500, colors=NULL, legendpos="bottomleft", main="PCA Biplot", textcx=1, ...) {
  require(genefilter)
  require(calibrate)
  require(RColorBrewer)
  rv = rowVars(assay(rld))
  select = order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
  pca = prcomp(t(assay(rld)[select, ]))
  fac = factor(apply(as.data.frame(colData(rld)[, intgroup, drop = FALSE]), 1, paste, collapse = " : "))
  if (is.null(colors)) {
    if (nlevels(fac) >= 3) {
      colors = brewer.pal(nlevels(fac), "Paired")
    }   else {
      colors = c("black", "red")
    }
  }
  pc1var <- round(summary(pca)$importance[2,1]*100, digits=1)
  pc2var <- round(summary(pca)$importance[2,2]*100, digits=1)
  pc1lab <- paste0("PC1 (",as.character(pc1var),"%)")
  pc2lab <- paste0("PC2 (",as.character(pc2var),"%)")
  plot(PC2~PC1, data=as.data.frame(pca$x), bg=colors[fac], pch=21, xlab=pc1lab, ylab=pc2lab, main=main, ...)
  with(as.data.frame(pca$x), textxy(PC1, PC2, labs=rownames(as.data.frame(pca$x)), cex=textcx))
  legend(legendpos, legend=levels(fac), col=colors, pch=20)
  #     rldyplot(PC2 ~ PC1, groups = fac, data = as.data.frame(pca$rld),
  #            pch = 16, cerld = 2, aspect = "iso", col = colours, main = draw.key(key = list(rect = list(col = colours),
  #                                                                                         terldt = list(levels(fac)), rep = FALSE)))
}
png("qc-pca.png", 1000, 1000, pointsize=20)
rld_pca(rld, colors=mycols, intgroup="condition", xlim=c(-30, 30))
dev.off()


# Get differential expression results
res <- results(dds)
table(res$padj<0.05)

## Order by adjusted p-value
res <- res[order(res$padj), ]
## Merge with normalized count data
resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE)
names(resdata)[1] <- "Gene"
head(resdata)
#get significant results (FDR<0.05)
## Write results
write.csv(resdata, file="sig_diffexpr-results.csv")


## MA plot
DESeq2::plotMA(dds, ylim=c(-1,1), cex=1)

## Volcano plot with significant DE genes
volcanoplot <- function (res, lfcthresh=2, sigthresh=0.05, main="Volcano Plot", legendpos="bottomright", labelsig=TRUE, textcx=1, ...) {
  with(res, plot(log2FoldChange, -log10(padj), pch=20, main=main, ...))
  with(subset(res, padj<sigthresh ), points(log2FoldChange, -log10(padj), pch=20, col="red", ...))
  with(subset(res, abs(log2FoldChange)>lfcthresh), points(log2FoldChange, -log10(padj), pch=20, col="orange", ...))
  with(subset(res, padj<sigthresh & abs(log2FoldChange)>lfcthresh), points(log2FoldChange, -log10(padj), pch=20, col="green", ...))
  if (labelsig) {
    require(calibrate)
    with(subset(res, padj<sigthresh & abs(log2FoldChange)>lfcthresh), textxy(log2FoldChange, -log10(padj), labs=Gene, cex=textcx, ...))
  }
  legend(legendpos, xjust=1, yjust=1, legend=c(paste("FDR<",sigthresh,sep=""), paste("|LogFC|>",lfcthresh,sep=""), "Both"), pch=20, col=c("red","orange","green"))
}
png("diffexpr-volcanoplot2.png", 1200, 1000, pointsize=20)
volcanoplot(resdata, lfcthresh=1, sigthresh=0.05, textcx=.5, xlim=c(-10.5, 14))
dev.off()
rna-seq deseq2 • 187 views
ADD COMMENTlink modified 10 weeks ago by RamRS24k • written 10 weeks ago by Adeler0010

Cross-posted: https://support.bioconductor.org/p/124388/

Think about that maybe the lack of answers on BioC might be due to the overflow of non-formatted code. Please use the code option (10101) to highlight code and data examples and think about limiting the shown code to the really important part. If this is about DEG, maybe remove heatmap, volcano, PCA and MA code as this is not of interest per se. Same goes for your question on BioC. Cross-posting is not recommended anyway.

ADD REPLYlink written 10 weeks ago by ATpoint25k

I have added formatting to OP's code. Like you say, providing the entire script doesn't help OP's case here.

ADD REPLYlink written 10 weeks ago by RamRS24k
Please log in to add an answer.

Help
Access

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.
Powered by Biostar version 2.3.0
Traffic: 1012 users visited in the last hour