Tutorial: Clustering of DAVID gene enrichment results from gene expression studies
23
gravatar for Kevin Blighe
8 months ago by
Kevin Blighe30k
Kevin Blighe30k wrote:

Added in response to another question: Ideas to plot several enrichment results (BP) in one

f

To take this to completion, you will require:

  • differential expression results (here as topTable, generated from DESeq2 in this case)
  • Results from DAVID (here exported as MyEnrichment.tsv from DAVID)

.

#differential expression results
head(topTable)
       Gene    baseMean log2FoldChange   lfcSE       stat       pvalue
3098  HOXD4  141.097337      -4.386331 0.7718557  -5.682839 1.324773e-08
5539   IRX2  610.363722       8.930738 0.8609911  10.372625 3.303232e-25
5541   IRX1  406.924285       8.573095 1.0976594   7.810341 5.703335e-15
5686  FGF10   33.978556       7.482399 1.0559976   7.085622 1.384214e-12
7916 CHCHD2 2879.127421      -8.398716 0.6070995 -13.834167 1.585619e-43
9130   ESX1    9.426912      -5.839058 1.0965897  -5.324743 1.010960e-07
             padj
3098 2.454747e-06
5539 7.820952e-22
5541 3.472354e-12
5686 5.462264e-10
7916 3.378796e-39
9130 1.485693e-05

Filter out genes of interest

#Filter genes that pass absolute log (base2) fold change >= 4 and FDR (BH) Q<=0.0001
log2FCcutoff <- 4
BHcutoff <- 0.0001
sigGeneList <- subset(topTable, abs(log2FoldChange)>=log2FCcutoff & padj<=BHcutoff)[,1]

Perform enrichment in DAVID with 'sigGeneList' object

Go to DAVID's Functional Annotation tool and then:

  1. Upload a list of genes to enrich
  2. Select the identifier that you're using for your genes
  3. Select 'Gene List'
  4. Select the repositories against which you wish to perform enrichment (e.g. GO BP, MF, and CC)
  5. Select Functional Annotation Chart

1
images upload

A new window will be created. From there, click on Download File to export the enrichment results as MyEnrichment.tsv

Untitled

DAVIDfile <- "MyEnrichment.tsv"
DAVID <- read.table(DAVIDfile, sep="\t", header=TRUE)
colnames(DAVID)
 [1] "Category"        "Term"            "Count"           "X."             
 [5] "PValue"          "Genes"           "List.Total"      "Pop.Hits"       
 [9] "Pop.Total"       "Fold.Enrichment" "Bonferroni"      "Benjamini"      
[13] "FDR"

-----------------------------

Create a gene-by-annotation matrix

#Enrichment cut-off
enrichBcutoff <- 0.05
DAVID <- subset(DAVID, Benjamini<enrichBcutoff)
DAVID <- DAVID[,c(1,2,3,6)]

#Pull out only terms with WNT genes
#DAVID <- DAVID[grep("WNT", DAVID[,4]),]

#Create a new dataframe that has '1' for when the gene is part of a term, and '0' when not
annGSEA <- data.frame(row.names=sigGeneList)
for (j in 1:length(sigGeneList))
{
    #Create a matching pattern to ensure genes match exactly
    #   ^GENE,  --> Match at beginning of matching string
    #   , GENE$ --> Match at end of matching string
    #    GENE,  --> Match between first and last gene in matching string
    gene <- sigGeneList[j]
    pattern <- paste("^", gene, ", |, ", gene, "$| ", gene, ",", sep="")
    for (k in 1:nrow(DAVID))
    {
        if (any(grepl(pattern, DAVID$Genes[k])))
        {
            annGSEA[j,k] <- 1
        }
        else
        {
            annGSEA[j,k] <- 0
        }
    }
}
colnames(annGSEA) <- DAVID[,2]

#Remove terms with no overlapping genes
annGSEA <- annGSEA[,apply(annGSEA, 2, mean)!=0]

#Remove genes with no overlapping terms
annGSEA <- annGSEA[apply(annGSEA, 1, mean)!=0,]

--------------------

Generate heatmap and annotation

require(ComplexHeatmap)
require(circlize)

#Match the order of rownames in toptable with that of annGSEA
topTable <- topTable[which(topTable$Gene %in% rownames(annGSEA)),]
topTable <- topTable[match(rownames(annGSEA), topTable$Gene),]

#Set text and figure dimensions
geneLab=10
termLab=8

#Create heatmap annotations
    #Colour bar for -log (base 10) FDR Q value for DEGs, and fold changes
    dfMinusLog10FDRGenes <- data.frame(-log10(topTable[which(topTable[,1] %in% rownames(annGSEA)),"padj"]))
    dfMinusLog10FDRGenes[dfMinusLog10FDRGenes=="Inf"] <- 0
    dfFoldChangeGenes <- data.frame(topTable[which(topTable[,1] %in% rownames(annGSEA)),"log2FoldChange"])
    dfGeneAnno <- data.frame(dfMinusLog10FDRGenes, dfFoldChangeGenes)
    colnames(dfGeneAnno) <- c("DEG\nsignificance\nscore", "Regulation")
    dfGeneAnno[,2] <- ifelse(dfGeneAnno[,2]>0, "Up-regulated", "Down-regulated")
    colours <- list("Regulation"=c("Up-regulated"="royalblue", "Down-regulated"="yellow"))
    haGenes <- rowAnnotation(df=dfGeneAnno, col=colours, width=unit(1,"cm"))

    #Colour bar for -log (base 10) Benjamini enrichment Q value
    dfMinusLog10BenjaminiTerms <- data.frame(-log10(read.table(DAVIDfile, sep="\t", header=TRUE)[which(read.table(DAVIDfile, sep="\t", header=TRUE)$Term %in% colnames(annGSEA)),"Benjamini"]))
    colnames(dfMinusLog10BenjaminiTerms) <- "GO Term\nsignificance\nscore"
    haTerms <- HeatmapAnnotation(df=dfMinusLog10BenjaminiTerms,
        colname=anno_text(colnames(annGSEA), rot=40, just="right", offset=unit(1,"npc")-unit(2,"mm"), gp=gpar(fontsize=termLab)),
        annotation_height=unit.c(unit(1, "cm"), unit(8, "cm")))

pdf("GO.pdf", width=7, height=6)
    hmapGSEA <- Heatmap(annGSEA,

        name="My enrichment",

        split=dfGeneAnno[,2],

        col=c("0"="white", "1"="forestgreen"),

        rect_gp=gpar(col="grey85"),

        cluster_rows=T,
        show_row_dend=T,
        row_title="Statistically-significant genes",
        row_title_side="left",
        row_title_gp=gpar(fontsize=12, fontface="bold"),
        row_title_rot=0,
        show_row_names=TRUE,
        row_names_gp=gpar(fontsize=geneLab, fontface="bold"),
        row_names_side="left",
        row_names_max_width=unit(15, "cm"),
        row_dend_width=unit(10,"mm"),

        cluster_columns=T,
        show_column_dend=T,
        column_title="Enriched terms",
        column_title_side="top",
        column_title_gp=gpar(fontsize=12, fontface="bold"),
        column_title_rot=0,
        show_column_names=FALSE,
        #column_names_gp=gpar(fontsize=termLab, fontface="bold"),
        #column_names_max_height=unit(15, "cm"),

        show_heatmap_legend=FALSE,

        #width=unit(12.5, "cm"),

        clustering_distance_columns="euclidean",
        clustering_method_columns="ward.D2",
        clustering_distance_rows="euclidean",
        clustering_method_rows="ward.D2",

        bottom_annotation=haTerms)

    draw(hmapGSEA + haGenes, heatmap_legend_side="right", annotation_legend_side="right")
dev.off()
ADD COMMENTlink modified 3 months ago by krushnach80400 • written 8 months ago by Kevin Blighe30k

How to export in tsv file, when I click on download option it opens a new window with tex.

ADD REPLYlink written 7 months ago by bikash251030

Hello friend. Right-click on the 'Download' link, and then select the option to 'Save As' (or something equivalent in your own language. Let me know if that works.

ADD REPLYlink modified 7 months ago • written 7 months ago by Kevin Blighe30k

DAVID There is no option for saving as.

ADD REPLYlink written 7 months ago by bikash251030

Dear bikash2510, there is indeed a 'Save as'. Please take a look at the screenshot (note that I use my operating system in Portuguese - 'Salvar link como...' means 'Save link as...'. You have to right-click on Download File:

Captura_de_tela_de_2018_03_10_20_47_54

ADD REPLYlink written 7 months ago by Kevin Blighe30k

Error: unexpected ',' in "DAVIDfile <- "MyEnrichment.tsv","

I am getting this error.

ADD REPLYlink written 7 months ago by bikash251030

It may be a CSV file, if DAVID has changed recently. Please research the differences between file formats TSV and CSV, and then modify your function in R appropriately.

For example:

  • read.table(..., sep=",") #comma-separated
  • read.table(..., sep="\t") #tab-separated

You can also try read.csv(), a wrapper for read.table()

ADD REPLYlink written 7 months ago by Kevin Blighe30k

Facing error in topmatrix.

dat=read.delim("MyEnrichment.csv", header=TRUE)
colnames(dat)
 [1] "Category"        "Term"            "Count"           "X."              "PValue"         
 [6] "Genes"           "List.Total"      "Pop.Hits"        "Pop.Total"       "Fold.Enrichment"
[11] "Bonferroni"      "Benjamini"       "FDR"            
enrichBcutoff <- 0.05
dat <- subset(dat, Benjamini<enrichBcutoff)
dat <- dat[,c(1,2,3,6)]
annGSEA <- data.frame(row.names=rownames(topMatrix))
Error in rownames(topMatrix) : object 'topMatrix' not found
for (j in 1:length(rownames(topMatrix)))
ADD REPLYlink modified 7 months ago by Kevin Blighe30k • written 7 months ago by kalyanimeha30

What is the name of your object that contains P values an fold changes?

ADD REPLYlink written 7 months ago by Kevin Blighe30k

I didn't get your question, ("P-values" of Gene Entrez ID.) I am writing the same command that you have used, but getting some errror.

ADD REPLYlink written 7 months ago by kalyanimeha30

Oh, sorry, if you look at the very top of this page, you will see a section that mentions the additional data that you need:

  • normalised count values (here as exprsMatrix)
  • differential expression results (here as topTable, generated from DESeq2 in this case)
  • Results from DAVID (here exported as MyEnrichment.tsv from DAVID)

This plot is piecing together various parts of different data from different sources

ADD REPLYlink written 7 months ago by Kevin Blighe30k

Hi Kevin,

I am glad I found your thread, as it is basically what I am trying to do/present my data, but being new to R so I am finding it tricky.

I have Normalised gene expression data (mRNA) and I have managed to use DAVID to cluster the Genes, and have the Enrichment file.

I want to display the mean normalised values rather than significance. i.e A heatmap to display upregulated genes and another heatmap to display downregulated genes in the pathways.

I don't know how to statistically analyse the data using DESeq2, I have calculated the P-Value on Excel, so I only have the Mean of my normalised data set and the P-Value (no base mean, no log 2 fold change, no adjusted P-value), is it possible to generate a heatmap with just the data sets I have? I just want to have simple heatmaps, as I want to see which pathway/s are mostly dysregulated in response to my treatment, and then start to create other graphical representations of interesting pathways/genes.

I have tried to follow your example above, but R gives me an error for rldMatrix (saying it could not find the function). I have managed to write the codes for the rest, and it seems to be fine. Just finding the initial bit (data organisation) tricky.

Thank you for your help. Sehar :)

ADD REPLYlink modified 4 months ago • written 4 months ago by ssajid0

Hello Sehar. Well, the rldMatrix is just a data matrix of your expression values. What is the name of your object that contains your expression values? It should have samples as columns and genes as rows.

ADD REPLYlink written 4 months ago by Kevin Blighe30k
CT<-read.csv(file.choose(), header=TRUE)
x.sub<-subset(CT, Treated>1)
topMatrix<-x.sub
annGSEA<-data.frame(row.names=rownames(topMatrix))
for (j in 1:length(rownames(topMatrix)))
+ gene<-rownames(topMatrix)[j]
View(annGSEA)
DAVID<-read.csv(file.choose(),header=TRUE)
enrichBcutoff<-0.05
DAVID<-subset(DAVID, Benjamini<enrichBcutoff)
DAVID<-DAVID[,c(1,2,3,6)]
for (k in 1:nrow(DAVID))
+ if (any(grepl(pattern, DAVID$Genes[k])))
+ annGSEA[j,k] <- 1
Error in grepl(pattern, DAVID$Genes[k]) : object 'pattern' not found
annGSEA[j,k] <- 0
colnames(annGSEA) <- DAVID[,2]
Error in names(x) <- value : 
  'names' attribute [574] must be the same length as the vector [1]
View(annGSEA)
View(x.sub)
View(annGSEA)

Thank you for your reply.

the data set i am trying to use is x.sub, it only has 78 obs and 2 variables. however, i am not getting data into annGSEA, it remains empty, and I am getting a some errors (see above).

ADD REPLYlink modified 4 months ago by Kevin Blighe30k • written 4 months ago by ssajid0

Oh, you just have to follow the part entitled 'Create a gene-by-annotation matrix'. Your code above does not match what I have put in the tutorial.

Also, regarding rldMatrix, that is my fault. rldMatrix should be renamed exprsMatrix in the tutorial (I have now changed it).

ADD REPLYlink written 4 months ago by Kevin Blighe30k
>CT<-read.csv(file.choose(), header=TRUE)
> View(CT)

> x.sub<-subset(CT, Treated>1)
> upregulation<-1
> DAVID<-read.csv(file.choose(), header=TRUE)
> enrichBcutoff<-0.05
> DAVID<-subset(DAVID, Benjamini<enrichBcutoff)
> DAVID<-DAVID[,c(1,2,3,6)]
> annGSEA<-data.frame(row.names=rownames(x.sub))
> View(annGSEA)
> colnames(annGSEA)<-DAVID[,2]
Error in names(x) <- value : 
  'names' attribute [574] must be the same length as the vector [0]
> for (j in 1:length(rownames(x.sub)))
+ gene<-rownames(x.sub)[j]
> for (k in 1:nrow(DAVID))
+ if (any(grepl(pattern, DAVID$Genes[k])))
+ annGSEA[j,k] <- 0
Error in grepl(pattern, DAVID$Genes[k]) : object 'pattern' not found
> colnames(annGSEA) <- DAVID[,2]
Error in names(x) <- value : 
  'names' attribute [574] must be the same length as the vector [0]
> #Remove terms with no overlapping genes
>   annGSEA <- annGSEA[,apply(annGSEA, 2, mean)!=0]
> #Remove genes with no overlapping terms
>   annGSEA <- annGSEA[apply(annGSEA, 1, mean)!=0,]
> View(annGSEA)

I tried to follow the tutorial step by step, i am still getting some errors (above).

I know it is a big ask, but is it possible if I can send you my data files, just to check them?

ADD REPLYlink modified 4 months ago by genomax57k • written 4 months ago by ssajid0

If you can search for my name and find my email, then you can contact me :)

ADD REPLYlink written 4 months ago by Kevin Blighe30k

Hi Kevin,

I searched your name, and your email address is not visible.

ADD REPLYlink written 4 months ago by ssajid0

Sorry, I just looked at your code (here) and you do not appear to be following the tutorial line by line. Can you double check?

As an example, your code is missing the line

pattern <- paste("^", gene, ", |, ", gene, "$| ", gene, ",", sep="")
ADD REPLYlink written 4 months ago by Kevin Blighe30k

Hi Kevin,

Sorry to keep bothering you.

Can I email you the files onto your UCL email? I am still getting errors;

    + if (any(grepl(pattern, DAVID$Genes[k])))
+ annGSEA[j,k] <-1
Warning message:
In 1:row(DAVID) :
  numerical expression has 2296 elements: only the first used
> else
Error: unexpected 'else' in "else"
> annGSEA[j,k] <-0
> colnames(annGSEA)<- DAVID[,2]
Error in names(x) <- value : 
  'names' attribute [574] must be the same length as the vector [1]
ADD REPLYlink modified 4 months ago by genomax57k • written 4 months ago by ssajid0

Sure, if you like, ssajid

ADD REPLYlink written 4 months ago by Kevin Blighe30k

I have emailed you :) Thank you

ADD REPLYlink written 4 months ago by ssajid0

@Kevin you have a topMatrix object here , but i did clustering and taking some of the clustered gene set and giving that as input to david so what is my top matrix as im doing wgcna so there is no differential expression analysis involved so the only thing i have gene list from modules ..any suggestion

ADD REPLYlink written 4 months ago by krushnach80400

Dear krushnach80. Indeed, we do not actually require topMatrix here. All that we require is sigGeneList. In the code, rownames(topMatrix) can be replaced with sigGeneList. I will edit the oiriginal code now.

ADD REPLYlink written 4 months ago by Kevin Blighe30k

okay only gene list so my input which is my final filtered list of gene which im giving input can be my sigGenelist ,that what i understand

ADD REPLYlink written 4 months ago by krushnach80400

Yes, well, all that you need from the very beginning is topTable (which has test statistics) and the DAVID results, in TSV format. sigGeneList is generated from topTable.

ADD REPLYlink written 4 months ago by Kevin Blighe30k

okay but when i go for the clustering of multiple sample of different lineages so i don;t do any statistical test just VST or rlog for wgcna ..so i get modules which im doing ontology so in this case i don;t have any file for statistical test unless i do it for all the samples..

ADD REPLYlink written 4 months ago by krushnach80400

I see. The test statistics are only used for the heatmap annotation, so, you can avoid the generation of that. THey are also used in the lines:

#Filter genes that pass absolute log (base2) fold change >= 4 and FDR (BH) Q<=0.0001
log2FCcutoff <- 4
BHcutoff <- 0.0001
sigGeneList <- subset(topTable, abs(log2FoldChange)>=log2FCcutoff & padj<=BHcutoff)[,1]

You can also avoid those lines.

ADD REPLYlink written 4 months ago by Kevin Blighe30k

okay will give it a try thank for your quick response

ADD REPLYlink written 4 months ago by krushnach80400
1

You are welcome, kind Sir!

ADD REPLYlink written 4 months ago by Kevin Blighe30k

Very useful and a great way to look that data will multi feature. Will give it a try. Great resource to be bookmarked.

ADD REPLYlink written 4 months ago by vchris_ngs4.5k

Thanks, vchris_ngs. It is difficult to automate this process, which is why I just put the code here.

ADD REPLYlink written 4 months ago by Kevin Blighe30k

Hi@Kevin now since i have to do pairwise comparison im back it im doing every thing as you have mentioned after running the for loop i get this
when i see annGSEA i get this "disulfide bond Disulfide bond glycosylation site:N-linked (GlcNAc...) Glycoprotein signal peptide GO:0005615~extracellular space Polymorphism Signal hsa05310:Asthma topological domain:Extracellular GO:0005886~plasma membrane Receptor GO:0005887~integral component of plasma membrane Membrane as my colnames and my first column being my gene names so all my ontology columns are zero im not sure what im doing wrong..

ADD REPLYlink modified 3 months ago • written 3 months ago by krushnach80400

Can you paste the code that you use?

ADD REPLYlink written 3 months ago by Kevin Blighe30k

@kevin sorry i replied as answer not in your comment now i can't delete it...

ADD REPLYlink written 3 months ago by krushnach80400
log2FCcutoff <- 3
    BHcutoff <- 0.05
    sigGeneList <- subset(topTable, abs(log2FoldChange)>=log2FCcutoff & padj<=BHcutoff)[,1]




head(sigGeneList)


DAVIDfile <- "MyEnrichment.tsv"
DAVID <- read.table(DAVIDfile, sep="\t", header=TRUE)
colnames(DAVID)


enrichBcutoff <- 0.05
DAVID <- subset(DAVID, Benjamini<enrichBcutoff)
DAVID <- DAVID[,c(1,2,3,6)]



annGSEA <- data.frame(row.names=sigGeneList)
for (j in 1:length(sigGeneList))
{
  #Create a matching pattern to ensure genes match exactly
  #   ^GENE,  --> Match at beginning of matching string
  #   , GENE$ --> Match at end of matching string
  #    GENE,  --> Match between first and last gene in matching string
  gene <- sigGeneList[j]
  #pattern <- paste("^", gene, ", |, ", gene, "$| ", gene, ",", sep="")
  pattern <- paste("^", gene, ", |, ", gene, "$| ", gene, ",", sep="")

  for (k in 1:nrow(DAVID))
  {
    if (any(grep(pattern, DAVID$Genes[k])))
    {
      annGSEA[j,k] <- 1
    }
    else
    {
      annGSEA[j,k] <- 0
    }
  }
}
colnames(annGSEA) <- DAVID[,2]

#Remove terms with no overlapping genes
annGSEA <- annGSEA[,apply(annGSEA, 2, mean)!=0]

#Remove genes with no overlapping terms
annGSEA <- annGSEA[apply(annGSEA, 1, mean)!=0,]

this the code exact code im running ,my "MyEnrichment.tsv" contains the DAVID output which i get after giving gene list for enrichment with cutoff range .so im not sure what im doing wrong

ADD REPLYlink written 3 months ago by krushnach80400

This is the error now im getting

Error in grepl(pattern, DAVID$Genes[k]) :

which is now gone now im getting a new error

dfMinusLog10FDRGenes[dfMinusLog10FDRGenes=="Inf"] <- 0

Error in matrix(if (is.null(value)) logical() else value, nrow = nr, dimnames = list(rn,  : 
  length of 'dimnames' [2] not equal to array extent

The

dfMinusLog10FDRGenes

is empty ...

ADD REPLYlink written 3 months ago by krushnach80400
1

My dear Krushnach, could you possibly send me the data that you are using? - just the DAVID file and your list of genes.

I am aware that this function can be difficult to implement.

ADD REPLYlink modified 3 months ago • written 3 months ago by Kevin Blighe30k

okay i will send all the files im using for this purpose

ADD REPLYlink written 3 months ago by krushnach80400
1

Hi Krushnach, I was able to generate a figure from the file that you sent with the following code (below). I note that, in your data, the vast majority of statistically significant genes are up-regulated (positive fold changes). Indeed, with the filtering that I've used, there are no down-regulated genes in the resulting plot

ADD REPLYlink written 3 months ago by Kevin Blighe30k
1
topTable <- read.table("LSC_BLAST_deseq2.txt", header=TRUE, sep=",")
sigGeneList <- topTable[which(topTable$padj<=0.05 & abs(topTable$log2FoldChange)>=4),"Gene"]
length(sigGeneList)

sink("sigGeneList.list")
data.frame(sigGeneList)
sink()

Then, I use the genes in the sigGeneList.list file in order to generate a new file, DAVID.txt, via DAVID (https://david.ncifcrf.gov/).

DAVIDfile <- "DAVID.txt"
DAVID <- read.table(DAVIDfile, sep="\t", header=TRUE)
colnames(DAVID)

enrichBcutoff <- 0.05
DAVID <- subset(DAVID, Benjamini<enrichBcutoff)
DAVID <- DAVID[,c(1,2,3,6)]

annGSEA <- data.frame(row.names=sigGeneList)
for (j in 1:length(sigGeneList))
{
    gene <- sigGeneList[j]
    pattern <- paste("^", gene, ", |, ", gene, "$| ", gene, ",", sep="")
    for (k in 1:nrow(DAVID))
    {
        if (any(grepl(pattern, DAVID$Genes[k])))
        {
            annGSEA[j,k] <- 1
        }
        else
        {
            annGSEA[j,k] <- 0
        }
    }
}
colnames(annGSEA) <- DAVID[,2]

annGSEA <- annGSEA[,apply(annGSEA, 2, mean)!=0]

annGSEA <- annGSEA[apply(annGSEA, 1, mean)!=0,]


require(ComplexHeatmap)
require(circlize)

topTable <- topTable[which(topTable$Gene %in% rownames(annGSEA)),]
topTable <- topTable[match(rownames(annGSEA), topTable$Gene),]

geneLab=10
termLab=8

#Create heatmap annotations
    dfMinusLog10FDRGenes <- data.frame(-log10(topTable[which(topTable[,1] %in% rownames(annGSEA)),"padj"]))
    dfMinusLog10FDRGenes[dfMinusLog10FDRGenes=="Inf"] <- 0
    dfFoldChangeGenes <- data.frame(topTable[which(topTable[,1] %in% rownames(annGSEA)),"log2FoldChange"])
    dfGeneAnno <- data.frame(dfMinusLog10FDRGenes, dfFoldChangeGenes)
    colnames(dfGeneAnno) <- c("DEG\nsignificance\nscore", "Regulation")
    dfGeneAnno[,2] <- ifelse(dfGeneAnno[,2]>0, "Up-regulated", "Down-regulated")
    colours <- list("Regulation"=c("Up-regulated"="royalblue", "Down-regulated"="yellow"))
    haGenes <- rowAnnotation(df=dfGeneAnno, col=colours, width=unit(1,"cm"))

    dfMinusLog10BenjaminiTerms <- data.frame(-log10(read.table(DAVIDfile, sep="\t", header=TRUE)[which(read.table(DAVIDfile, sep="\t", header=TRUE)$Term %in% colnames(annGSEA)),"Benjamini"]))
    colnames(dfMinusLog10BenjaminiTerms) <- "GO Term\nsignificance\nscore"
    haTerms <- HeatmapAnnotation(df=dfMinusLog10BenjaminiTerms,
        colname=anno_text(colnames(annGSEA), rot=40, just="right", offset=unit(1,"npc")-unit(2,"mm"), gp=gpar(fontsize=termLab)),
        annotation_height=unit.c(unit(1, "cm"), unit(8, "cm")))

pdf("GO.pdf", width=7, height=12)
    hmapGSEA <- Heatmap(annGSEA,

        name="My enrichment",

        split=dfGeneAnno[,2],

        col=c("0"="white", "1"="forestgreen"),

        rect_gp=gpar(col="grey85"),

        cluster_rows=T,
        show_row_dend=T,
        row_title="Statistically-significant genes",
        row_title_side="left",
        row_title_gp=gpar(fontsize=12, fontface="bold"),
        row_title_rot=0,
        show_row_names=TRUE,
        row_names_gp=gpar(fontsize=geneLab, fontface="bold"),
        row_names_side="left",
        row_names_max_width=unit(15, "cm"),
        row_dend_width=unit(10,"mm"),

        cluster_columns=T,
        show_column_dend=T,
        column_title="Enriched terms",
        column_title_side="top",
        column_title_gp=gpar(fontsize=12, fontface="bold"),
        column_title_rot=0,
        show_column_names=FALSE,
        #column_names_gp=gpar(fontsize=termLab, fontface="bold"),
        #column_names_max_height=unit(15, "cm"),

        show_heatmap_legend=FALSE,

        #width=unit(12.5, "cm"),

        clustering_distance_columns="euclidean",
        clustering_method_columns="ward.D2",
        clustering_distance_rows="euclidean",
        clustering_method_rows="ward.D2",

        bottom_annotation=haTerms)

    draw(hmapGSEA + haGenes, heatmap_legend_side="right", annotation_legend_side="right")
dev.off()

krushnach

ADD REPLYlink written 3 months ago by Kevin Blighe30k

so where i was doing wrong..?can you let me know

ADD REPLYlink written 3 months ago by krushnach80400
1

It is possible that, in your gene list, only 1 or 2 genes was down-regulated. This will create a problem for the clustering because it will attempt to create a separate heatmap and dendrogram for the up- and down-regulated genes.

To test that hypothesis with your data, set cluster_rows=T=FALSE and cluster_columns=FALSE in your own example, i.e., in the Heatmap() function

ADD REPLYlink written 3 months ago by Kevin Blighe30k

okay yes in this comparison with logfold 1.5 and adjp < 0.05 i was getting only 82 genes i will try with other comparison and see if i can do the same or not

ADD REPLYlink modified 3 months ago • written 3 months ago by krushnach80400

okay even i was trying only with up regulated genes ..but i couldn't make it work

ADD REPLYlink written 3 months ago by krushnach80400
Please log in to add an answer.

Help
Access

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.
Powered by Biostar version 2.3.0
Traffic: 756 users visited in the last hour