Entering edit mode
                    4.9 years ago
        kamalikaray1792
        
    
        ▴
    
    20
    Hi, I want to perform pairwise differential analysis for BRCA tumor subtype Luminal A and normal sample. I have used the following code to download and process the data from TCGA using TCGAbiolinks package in R. How should prepare this data to run paired differential analysis in Deseq2/edgeR.
 library(TCGAbiolinks)
query.BRCA.tumor<- GDCquery(project = "TCGA-BRCA", data.category = "Transcriptome Profiling", data.type = "Gene", Expression Quantification", workflow.type = "HTSeq - Counts", experimental.strategy = "RNA-Seq",  sample.type = "Primary Tumor")
GDCdownload(query.BRCA.tumor)
prep.BRCA.tumor <- GDCprepare(query = query.BRCA.tumor, summarizedExperiment = TRUE)
query.BRCA.normal <- GDCquery(project = "TCGA-BRCA", data.category = "Transcriptome Profiling", data.type = "Gene Expression Quantification", workflow.type = "HTSeq - Counts", experimental.strategy = "RNA-Seq", sample.type = "Solid Tissue Normal")
GDCdownload(query.BRCA.normal)
prep.BRCA.normal <- GDCprepare(query = query.BRCA.normal, summarizedExperiment = TRUE)
dataSubt_BRCA <-TCGAquery_subtype(tumor = "BRCA")
samplePam50_BRCA.LumA <- dataSubt_BRCA[dataSubt_BRCA$BRCA_Subtype_PAM50 %in% "LumA",]
 samples.normal = subset(prep.BRCA.normal, select = colData(prep.BRCA.normal)$patient %in% 
  samplePam50_BRCA.LumA$patient)
  samples.tumor <- subset(prep.BRCA.tumor, select = colData(prep.BRCA.tumor)$patient %in% 
 colData(samples.normal)$patient)
  Matched.Samples.Normal <- subset(samples.normal, 
                             select = colData(samples.normal)$patient 
                             %in% colData(samples.tumor)$patient)
   Matched.Samples.LumA<- subset(prep.BRCA.tumor, 
                            select = colData(samples.tumor)$patient 
                            %in% colData(samples.normal)$patient)
   pre_Matched.normal <- TCGAanalyze_Preprocessing(object = Matched.Samples.Normal, cor.cut = 0.6, datatype = "HTSeq - Counts")
   pre_Matched.LumA <- TCGAanalyze_Preprocessing(object = Matched.Samples.LumA, cor.cut = 0.6, datatype = "HTSeq - Counts")
   matched_data.norm <- TCGAanalyze_Normalization(tabDF = cbind(pre_Matched.normal, pre_Matched.LumA), geneInfo = geneInfoHT, method = "gcContent")
   matched_data.norm <- TCGAanalyze_Normalization(tabDF = matched_data.norm, geneInfo = geneInfoHT, method = "geneLength")
   matched_data_filt <- TCGAanalyze_Filtering(tabDF = matched_data.norm, method = "quantile", qnt.cut = 0.25)
    samplesNT <- TCGAquery_SampleTypes(barcode = colnames(matched_data_filt),
                               typesample = c("NT"))
   MatchedNTdataFilt <- AllMatcheddataFilt[,samplesMatchedNT]
   MatchedNTdataFilt <- MatchedNTdataFilt[, order(colnames(MatchedNTdataFilt))]
   samplesTP <- TCGAquery_SampleTypes(barcode = colnames(matched_data_filt), 
                               typesample = c("TP"))
   matched_NT_filt <- matched_data_filt[,samplesNT]
   matched_LumA_filt <- matched_data_filt[, samplesTP]
                    
                
                
You can use xena browser to download the row_counts which can be used for input for DESeq2
I already have the above data from TCGA. I would like to prepare this for paired analysis in DESeq2.
you can find it here Make sure the that your data are row counts