Question

How to plot boxplot on two variables in ggplot2

0

Entering edit mode

4.2 years ago

mohammedtoufiq91 ▴ 250

Hi,

I am trying to do boxplot with two different variables (one is the sample ID and the other is Timepoints), I was able to plot with the one variable and it worked fine. However, when I have two variables I am facing issues while using the function melt() and then plot using ggplot2 library. I would also like to add the timepoints column to my boxplot for the purpose of visualisation. Please let me know how to fix this issue.

Thank you,

Toufiq

library(dplyr)
library(ggplot2)

##Import the data##
NegDCt <- read.csv(file = "./Test.csv",stringsAsFactors = FALSE)

dput(head(NegDCt))
structure(list(ID = c("A-B-001", "B-B-007", "C-B-013", "A-B1", 
"B-B6", "C-B12"), Timepoints = c("A1", "A2", "A3", "A1", "A2", 
"A3"), Gene_A = c(-1.3863, -0.3659, -0.478299, -0.3454, -0.7535, 
-1.4663), Gene_B = c(-6.5571, -5.7602, -6.862, -6.0573, -5.4221, 
-7.3554), Gene_C = c(-1.1796, -0.8521, -0.9214, -0.506, -0.705199, 
-1.3257), Gene_D = c(-3.472, -2.0541, -2.4454, -2.3377, -2.4972, 
-3.6144), Gene_E = c(-4.7636, -4.7523, -5.1025, -3.8265, -4.0595, 
-4.7439), Gene_F = c(-4.4834, -4.183, -4.3915, -3.475, -4.9245, 
-4.7773), Gene_G = c(-6.2742, -6.5278, -6.3474, -4.8904, -4.6567, 
-5.0217), Gene_H = c(-2.8588, -1.8725, -1.9748, -1.6696, -2.1437, 
-2.5732), Gene_I = c(-8.1266, -12.277, -13.7045, -7.4222, -11.1681, 
-8.7579), Gene_J = c(-3.6304, -3.3921, -3.3893, -2.9027, -3.3669, 
-4.3405), Gene_K = c(NA, NA, -9.8856, -8.0189, -9.0919, -9.9972
), Gene_L = c(-3.987, -3.6162, -4.5081, -3.327, -3.8329, -4.7276
), Gene_M = c(-5.9545, -4.8041, -4.9806, -3.698, -3.9695, -4.436
), Gene_N = c(-4.9379, -4.3993, -5.2013, -4.9426, -4.0267, -3.6957
)), row.names = c(NA, 6L), class = "data.frame")

NegDCt_v1 <- NegDCt[, -2]

dput(head(NegDCt_v1))
structure(list(ID = c("A-B-001", "B-B-007", "C-B-013", "A-B1", 
"B-B6", "C-B12"), Gene_A = c(-1.3863, -0.3659, -0.478299, -0.3454, 
-0.7535, -1.4663), Gene_B = c(-6.5571, -5.7602, -6.862, -6.0573, 
-5.4221, -7.3554), Gene_C = c(-1.1796, -0.8521, -0.9214, -0.506, 
-0.705199, -1.3257), Gene_D = c(-3.472, -2.0541, -2.4454, -2.3377, 
-2.4972, -3.6144), Gene_E = c(-4.7636, -4.7523, -5.1025, -3.8265, 
-4.0595, -4.7439), Gene_F = c(-4.4834, -4.183, -4.3915, -3.475, 
-4.9245, -4.7773), Gene_G = c(-6.2742, -6.5278, -6.3474, -4.8904, 
-4.6567, -5.0217), Gene_H = c(-2.8588, -1.8725, -1.9748, -1.6696, 
-2.1437, -2.5732), Gene_I = c(-8.1266, -12.277, -13.7045, -7.4222, 
-11.1681, -8.7579), Gene_J = c(-3.6304, -3.3921, -3.3893, -2.9027, 
-3.3669, -4.3405), Gene_K = c(NA, NA, -9.8856, -8.0189, -9.0919, 
-9.9972), Gene_L = c(-3.987, -3.6162, -4.5081, -3.327, -3.8329, 
-4.7276), Gene_M = c(-5.9545, -4.8041, -4.9806, -3.698, -3.9695, 
-4.436), Gene_N = c(-4.9379, -4.3993, -5.2013, -4.9426, -4.0267, 
-3.6957)), row.names = c(NA, 6L), class = "data.frame")


## Transpose and create a dataframe, then rename the colnames of the dataframe using the previous dataframe###
NegDCt_v1_transpose <- as.data.frame(t(NegDCt_v1[,-1]))
colnames(NegDCt_v1_transpose) <- NegDCt$ID

[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

## Check the colnames assigned are correct##
colnames(NegDCt_v1_transpose) == NegDCt$ID

##Load library##
require(reshape2)

NegDCt_v1_transpose_melt <- melt(NegDCt_v1_transpose ,  id.vars = row.names("NegDCt_v1_transpose"), variable.name = 'ID')

dput(head(NegDCt_v1_transpose_melt))
structure(list(ID = structure(c(1L, 1L, 1L, 1L, 1L, 1L), .Label = c("A-B-001", 
"B-B-007", "C-B-013", "A-B1", "B-B6", "C-B12", "A-B01", "B-B06", 
"C-B012", "A", "B", "C", "A-B-012", "B-b-001", "C-B-025", "A-b006", 
"B-b013", "C-b001"), class = "factor"), value = c(-1.3863, -6.5571, 
-1.1796, -3.472, -4.7636, -4.4834)), row.names = c(NA, 6L), class = "data.frame")

boxplot ggplot2 variables reshape2 R • 1.1k views

ADD COMMENT • link 4.2 years ago by mohammedtoufiq91 ▴ 250