Question

Tumor-Normal Matched Pairwise DE using DEseq2

1

Entering edit mode

4.9 years ago

kamalikaray1792 ▴ 20

Hi, I want to perform pairwise differential analysis for BRCA tumor subtype Luminal A and normal sample. I have used the following code to download and process the data from TCGA using TCGAbiolinks package in R. How should prepare this data to run paired differential analysis in Deseq2/edgeR.

 library(TCGAbiolinks)
query.BRCA.tumor<- GDCquery(project = "TCGA-BRCA", data.category = "Transcriptome Profiling", data.type = "Gene", Expression Quantification", workflow.type = "HTSeq - Counts", experimental.strategy = "RNA-Seq",  sample.type = "Primary Tumor")

GDCdownload(query.BRCA.tumor)
prep.BRCA.tumor <- GDCprepare(query = query.BRCA.tumor, summarizedExperiment = TRUE)
query.BRCA.normal <- GDCquery(project = "TCGA-BRCA", data.category = "Transcriptome Profiling", data.type = "Gene Expression Quantification", workflow.type = "HTSeq - Counts", experimental.strategy = "RNA-Seq", sample.type = "Solid Tissue Normal")
GDCdownload(query.BRCA.normal)
prep.BRCA.normal <- GDCprepare(query = query.BRCA.normal, summarizedExperiment = TRUE)

dataSubt_BRCA <-TCGAquery_subtype(tumor = "BRCA")
samplePam50_BRCA.LumA <- dataSubt_BRCA[dataSubt_BRCA$BRCA_Subtype_PAM50 %in% "LumA",]

 samples.normal = subset(prep.BRCA.normal, select = colData(prep.BRCA.normal)$patient %in% 
  samplePam50_BRCA.LumA$patient)

  samples.tumor <- subset(prep.BRCA.tumor, select = colData(prep.BRCA.tumor)$patient %in% 
 colData(samples.normal)$patient)

  Matched.Samples.Normal <- subset(samples.normal, 
                             select = colData(samples.normal)$patient 
                             %in% colData(samples.tumor)$patient)


   Matched.Samples.LumA<- subset(prep.BRCA.tumor, 
                            select = colData(samples.tumor)$patient 
                            %in% colData(samples.normal)$patient)

   pre_Matched.normal <- TCGAanalyze_Preprocessing(object = Matched.Samples.Normal, cor.cut = 0.6, datatype = "HTSeq - Counts")

   pre_Matched.LumA <- TCGAanalyze_Preprocessing(object = Matched.Samples.LumA, cor.cut = 0.6, datatype = "HTSeq - Counts")
   matched_data.norm <- TCGAanalyze_Normalization(tabDF = cbind(pre_Matched.normal, pre_Matched.LumA), geneInfo = geneInfoHT, method = "gcContent")
   matched_data.norm <- TCGAanalyze_Normalization(tabDF = matched_data.norm, geneInfo = geneInfoHT, method = "geneLength")
   matched_data_filt <- TCGAanalyze_Filtering(tabDF = matched_data.norm, method = "quantile", qnt.cut = 0.25)

    samplesNT <- TCGAquery_SampleTypes(barcode = colnames(matched_data_filt),
                               typesample = c("NT"))

   MatchedNTdataFilt <- AllMatcheddataFilt[,samplesMatchedNT]
   MatchedNTdataFilt <- MatchedNTdataFilt[, order(colnames(MatchedNTdataFilt))]

   samplesTP <- TCGAquery_SampleTypes(barcode = colnames(matched_data_filt), 
                               typesample = c("TP"))

   matched_NT_filt <- matched_data_filt[,samplesNT]
   matched_LumA_filt <- matched_data_filt[, samplesTP]

DESeq2 TCGAbiolinks DE • 1.6k views

ADD COMMENT • link updated 4.7 years ago by Biostar 20 • written 4.9 years ago by kamalikaray1792 ▴ 20

2

Entering edit mode

You can use xena browser to download the row_counts which can be used for input for DESeq2

ADD REPLY • link 4.9 years ago by DareDevil ★ 4.5k

0

Entering edit mode

I already have the above data from TCGA. I would like to prepare this for paired analysis in DESeq2.