Hi,
I have a question about plotting PCA on genes, I have samples in rows with associated annotations (Batch and Plate information) and genes in the columns. I have performed PCA on samples and I am familiar with it. However, I am interested to plot PCA on genes now and color by batch and plate information. Please assist me with this. The example of the data is given below.
library(FactoMineR)
library("ggfortify")
library("FactoMineR")
library("factoextra")
Neg_Dct <- read.csv(file = "./Test.csv",stringsAsFactors = FALSE)
Neg_Dct
dput(Neg_Dct)
structure(list(ID = c("S1_1", "S1_2", "S1_3", "S1_4", "S1_5",
"S1_6", "S1_7", "S1_8", "S1_9", "S1_10", "S1_11", "S1_12", "S1_13",
"S1_14", "S1_15", "S1_16", "S1_17", "S1_18", "S1_19", "S1_20",
"S1_21", "S1_22", "S1_23", "S1_24", "S1_25"), Batch = c("Batch_1",
"Batch_1", "Batch_1", "Batch_2", "Batch_2", "Batch_2", "Batch_1",
"Batch_1", "Batch_1", "Batch_2", "Batch_2", "Batch_2", "Batch_1",
"Batch_1", "Batch_2", "Batch_2", "Batch_2", "Batch_1", "Batch_1",
"Batch_2", "Batch_2", "Batch_2", "Batch_1", "Batch_1", "Batch_1"
), Plate = c("Plate_1", "Plate_2", "Plate_1", "Plate_2", "Plate_1",
"Plate_2", "Plate_1", "Plate_2", "Plate_1", "Plate_2", "Plate_1",
"Plate_2", "Plate_1", "Plate_2", "Plate_1", "Plate_2", "Plate_1",
"Plate_2", "Plate_1", "Plate_2", "Plate_1", "Plate_1", "Plate_2",
"Plate_1", "Plate_2"), Gene_1 = c(2.74566, 2.41701, 1.28156,
2.5121, 1.99305, 1.80165, 3.17652, 3.5806, 1.79384, 1.28138,
1.89935, 2.5723, 2.14152, 1.40297, 1.88353, 2.05175, 1.97743,
1.92165, 2.05574, 0.522052, 2.40835, 0.991803, 1.67695, 2.72437,
1.22242), Gene_2 = c(9.21655, 9.21203, 10.876, NA, 10.222, 8.64146,
9.5818, NA, 9.38491, NA, 8.72181, 11.4064, 9.25437, NA, 9.79516,
9.10643, 8.64458, 9.695, 7.95871, 12.0041, 8.19037, 8.19788,
9.03349, 6.66333, 9.83651), Gene_3 = c(1.24291, 1.21489, 0.655026,
0.785034, 0.856161, 0.245615, 1.33763, 0.198923, 0.532218, 1.00889,
0.186925, 1.37128, 0.905605, -0.408769, 0.453087, 1.15631, 0.410721,
0.625778, 0.417797, 0.0365906, 0.99665, 0.244682, 0.862058, 0.609123,
0.35954), Gene_4 = c(3.17245, 3.03454, 3.12256, 3.29299, 3.66615,
2.9414, 3.32122, 3.58629, 2.7757, 3.07628, 2.55718, 3.29047,
2.71563, 2.5873, 3.09971, 3.59135, 3.01682, 2.23962, 3.17484,
2.94082, 3.336, 2.87547, 3.45458, 3.43779, 3.18705), Gene_5 = c(3.99519,
4.35548, 4.00643, 4.02169, 3.85387, 3.54247, 3.81633, 2.82153,
3.62866, 4.47204, 2.49623, 5.18278, 3.65158, 3.04052, 3.97154,
4.17206, 3.21916, 3.64224, 3.42668, 3.51722, 3.83094, 2.9252,
3.90967, 3.07917, 2.94932), Gene_6 = c(3.91788, 4.14593, 2.91717,
3.62728, 4.02476, 5.54273, 3.39246, 3.78731, 4.07341, 4.04612,
2.59587, 8.49205, 2.3215, 2.63217, 2.56598, 2.27716, 2.90267,
3.99974, 3.39793, 3.27778, 3.86779, 2.95839, 4.31887, 3.5999,
2.68092), Gene_7 = c(8.22592, 7.68958, 9.191, 7.00414, 7.82801,
6.87766, 7.70689, NA, 6.67957, 7.62084, 7.42038, 10.2766, 6.66582,
6.57067, 7.29663, 7.34165, 7.55647, 6.73911, 6.79017, 7.8713,
6.6699, 7.04384, 8.14006, 6.89507, 7.50916), Gene_8 = c(4.80301,
4.95246, 2.92675, 3.88674, 2.92132, 3.44228, 4.58713, 4.84111,
3.05712, 3.29154, 4.03648, 4.38404, 4.16168, 2.93601, 2.84335,
3.39719, 3.46238, 2.94928, 3.55869, 2.49082, 4.08837, 2.34332,
3.6339, 4.55228, 2.54387), Gene_9 = c(7.8863, 6.88329, 7.9638,
6.17448, 7.57994, 7.79241, 8.43658, 3.6191, 7.07619, 7.04978,
6.63005, 11.5974, 7.80057, 4.03935, 8.55976, 9.19217, 7.66456,
6.95455, 7.08865, 7.78014, 7.66009, 4.76624, 7.60779, 6.42455,
7.34804), Gene_10 = c(4.48595, 4.729, 4.60472, 4.62689, 4.63785,
4.76703, 4.44203, 5.78705, 3.77214, 4.47739, 3.76823, 5.44039,
4.13479, 4.51685, 3.32923, 4.72064, 3.90275, 4.29956, 4.77556,
4.10673, 4.03972, 4.24786, 4.99717, 5.28136, 4.38755), Gene_11 = c(NA,
NA, 12.6253, NA, 13.8811, NA, 13.5465, 7.47741, 9.59239, NA,
NA, NA, 12.4803, NA, 17.0783, 12.9235, 12.0078, 12.339, NA, 14.2595,
13.9659, 11.3511, 13.4548, 14.3382, NA), Gene_12 = c(3.2572,
3.66556, 2.91065, 2.70477, 3.31886, 2.70622, 3.32486, 2.1, 2.77425,
3.14639, 2.12945, 4.05497, 2.65452, 2.56421, 1.96161, 3.1535,
2.48488, 2.7353, 2.68683, 2.37054, 2.99183, 2.44665, 3.29592,
3.05663, 2.85759), Gene_13 = c(5.88783, 5.28863, 6.08389, 5.5062,
5.78523, 6.21819, 5.89286, 5.63686, 5.0966, 7.30503, 5.24945,
6.58622, 5.38063, 4.83921, 4.84982, 6.29604, 5.41094, 4.48755,
5.14973, 5.62539, 5.06188, 5.59829, 6.41845, 5.56475, 6.43989
), Gene_14 = c(5.25527, 5.82523, 5.69302, 4.79978, 5.31339, 5.32151,
5.05776, NA, 5.41897, 6.76026, 4.46106, 7.00544, 4.17885, 4.96509,
3.41261, 5.00248, 5.07504, 4.91208, 5.09344, 5.14337, 5.36001,
4.56528, 5.48949, 5.31362, 4.97557), Gene_15 = c(3.22585, 2.99779,
2.78224, 2.91718, 2.89006, 2.37089, 3.38242, 3.11585, 2.31141,
3.03489, 2.22517, 3.737, 2.94255, 2.26009, 2.65806, 3.36151,
2.32279, 2.28086, 2.43151, 1.95159, 3.16607, 2.81872, 2.98992,
3.07958, 2.33871), Gene_16 = c(14.4875, 12.8944, NA, NA, NA,
12.9755, 14.6313, NA, 13.3301, NA, NA, NA, NA, NA, 13.9834, NA,
12.0268, NA, NA, NA, 13.7213, NA, 12.0839, 13.398, NA), Gene_17 = c(5.01994,
6.1439, 5.99065, 6.6743, 5.97207, 6.35524, 4.66218, 7.08466,
5.20557, 6.98409, 4.98144, 6.68725, 4.74617, 5.45292, 3.87166,
5.4348, 5.54169, 5.92897, 5.66602, 5.65002, 4.49191, 5.75363,
5.85605, 5.44937, 5.93573), Gene_18 = c(8.7047, 8.42065, 9.97416,
10.1412, 8.75691, 8.60371, 8.41236, NA, 8.06832, 8.80413, 8.0637,
9.47628, 8.25909, NA, 6.94259, 8.91285, 7.49903, 7.57374, 8.05756,
8.55374, 8.24625, 7.9067, 8.26153, 7.31693, 7.92083), Gene_19 = c(4.24694,
4.36666, 4.60216, 4.64256, 5.0665, 4.46013, 4.14319, 3.74303,
4.09912, 3.85835, 4.00904, 3.81451, 4.36853, 3.59563, 4.61738,
4.81951, 4.05342, 3.33607, 4.2358, 4.38918, 4.43542, 4.30526,
3.56997, 3.71209, 4.24122), Gene_20 = c(3.61052, 2.098, 3.18373,
3.53195, 3.38654, 3.80873, 3.17707, 2.81883, 2.81772, 3.5725,
2.65038, 4.19567, 3.10054, 3.1625, 2.09974, 2.84097, 3.24043,
3.37526, 3.08749, 2.6041, 3.03967, 2.42457, 3.32517, 3.16862,
3.17476), Gene_21 = c(6.23736, 7.05642, 6.25908, 6.38647, 6.36372,
7.72727, 5.8454, 6.78528, 5.79207, 5.96044, 5.64355, 7.99332,
5.93347, 5.98171, 5.49377, 5.99234, 5.70238, 6.39785, 5.39863,
6.43829, 5.75369, 7.20047, 6.65913, 6.01353, 6.38686), Gene_22 = c(1.81799,
2.37183, 1.17051, 1.91855, 1.54116, 1.97239, 1.85643, 2.15549,
1.72797, 1.42215, 1.39719, 3.54795, 1.879, 1.06311, 1.15683,
1.5778, 1.35862, 2.09668, 1.42871, 0.867846, 1.26178, 0.404501,
1.25818, 2.19537, 1.13493), Gene_23 = c(2.78058, 2.53238, 3.14276,
3.08658, 3.28497, 3.35548, 2.76634, 2.50289, 2.56554, 3.36543,
2.33317, 3.42169, 2.70564, 2.97701, 2.33172, 2.96386, 2.85909,
2.51073, 2.76909, 2.65596, 2.59477, 2.89682, 3.27021, 2.80932,
3.04574), Gene_24 = c(2.40581, 1.87007, 2.38937, 2.96403, 2.75568,
2.28178, 1.71635, 1.88332, 1.84544, 2.54554, 1.68298, 2.56284,
2.35033, 1.85742, 1.66729, 2.3666, 2.0839, 2.1535, 2.22574, 1.83554,
2.22627, 2.26724, 2.62143, 2.3956, 2.62112), Gene_25 = c(2.69548,
2.11143, 2.51654, 2.48409, 2.74743, 2.50751, 2.72225, 1.94942,
2.27108, 2.65104, 1.99515, 2.95257, 2.31222, 1.95298, 2.35311,
2.7179, 2.10779, 2.2902, 2.29184, 1.99961, 2.4081, 2.10598, 2.35059,
2.18436, 2.1967), Gene_26 = c(10.475, 10.3477, 9.52332, 6.89309,
10.4247, 8.5301, 9.1303, 5.0003, 9.59585, 8.75312, 8.24006, 9.60621,
10.1721, 5.86301, 10.3064, 11.2975, 9.65141, 8.69041, 7.868,
10.0429, 10.4186, 8.22059, 10.6262, 10.5224, 10.4781), Gene_27 = c(3.88185,
4.57222, 4.11611, 3.97254, 4.70415, 4.95692, 4.11781, 6.1079,
3.00593, 4.07605, 3.46867, 4.65033, 3.99537, 4.5666, 4.47645,
4.6634, 3.89716, 3.08337, 4.05497, 4.09892, 4.39914, 3.98082,
4.28408, 5.25143, 4.32329), Gene_28 = c(2.87576, 3.64546, 2.06121,
3.21908, 2.41477, 3.0286, 1.91063, 5.77133, 2.85094, 1.85037,
2.26775, 2.67921, 3.6171, 2.14517, 1.46753, 1.88992, 2.5231,
1.33496, 2.39585, 1.73231, 4.29797, 1.23472, 2.02585, 3.7009,
1.98129), Gene_29 = c(6.60579, 5.17272, 5.91187, 5.90883, 6.07068,
5.22712, 6.14627, 4.50813, 5.45807, 5.21917, 6.04496, 6.72243,
4.70608, 3.599, 4.53333, 5.7917, 6.26182, 6.32342, 5.16135, 4.99482,
5.78061, 3.5228, 5.70011, 5.64474, 6.22901), Gene_30 = c(0.00127792,
0.153625, -0.998857, -0.536397, -0.648177, -1.08603, -0.0586834,
-0.954571, -0.709482, -0.813375, -1.05033, 0.0500174, -0.657315,
-1.40147, -0.806338, -0.400814, -0.866002, -0.642641, -0.973266,
-1.51081, -0.348259, -1.49061, -0.674944, -0.423562, -1.08939
)), class = "data.frame", row.names = c(NA, -25L))
Thank you,
Toufiq
@ATpoint and RamRS, thank you for the reply. The solution provided here is PCA on sample level. However, I am looking to plot PCA on genes and label them by Batch and Plate information. Genes on rows and samples on columns. Basically see cluster formation of genes on PCA with Batch and Plate level information (for instance figure below). I have transposed the data genes (rows) and samples (column). Does
melt
function oflibrary(reshape2)
help?Transpose the matrix. Also, it helps to paste
head(data.frame.object)
-dput
is not really readable.@RamRS, please find the data below:
Tranpose: