Question

R pivot wider error. listcols output error

0

Entering edit mode

21 months ago

KABILAN ▴ 40

I have a dataset like below,

df1<-structure(list(Uniprot_IDs = c("P0A799|PGK", "P0A853|TNAA", "P0CE47|EFTU1", 
"P0A6F3|GLPK", "P0A6F5|CH60", "P0A9B2|G3P1", "P0A853|TNAA", "P0A6P1|EFTS", 
"P0A6P1|EFTS", "P0A799|PGK"), `1_3ng` = c(12305960196.5721, 24169710612.0476, 
NA, 8553811608.70032, 13176265141.6301, 92994780469.5607, 11373139178.993, 
NA, 8062061247.94512, 3484150815.20598), `2_3ng` = c(11629654800, 
25162283400, 31864546300, 8157173240, 12812379370, 90007498700, 
10191440110, NA, 7911370530, 3406054010), `3_3ng` = c(12503938417.8663, 
25733015601.0117, 34727094361.2997, 8857104380.18179, NA, 93988723611.341, 
11653192532.4546, NA, 7933102839.01341, NA), `4_7-5ng` = c(NA, 
79582218995.1549, 77615759060.3497, 21749287984.8341, 33342436650.5148, 
101254055758.836, 30624750667.6451, 39438567251.7351, 10726988796.4798, 
7850501475.22747), `5_7-5ng` = c(NA, 78743355495.2545, 81948536416.9992, 
NA, 34617564902.3219, 99485017820.8478, NA, 40420212151.9563, 
14804870783.7792, 8280398872.03417), `6_7-5ng` = c(NA, 80272416055.8845, 
77019098847.8474, 23045479130.9574, 32885483296.8046, 90789109337.1181, 
30678346321.0037, 37073444001.0421, 13710097518.7425, 7916821420.64152
), `7_10ng` = c(22617928037.5148, 97473230025.8853, 91579176089.4265, 
28086665669.9634, 38033883000.8102, NA, 37181868033.5073, 44274304023.6936, 
NA, 9288965106.5049), `8_10ng` = c(22091136513.3736, NA, 90754802145.7813, 
26405368418.6503, 36442770423.3661, NA, 36789459227.7515, 42793252584.0984, 
15307787846.1716, 8834742124.86943), `9_10ng` = c(24125219176.3177, 
98420360686.1339, 99355131865.2305, 28271975548.9608, 39837381317.8216, 
NA, 39481996086.9157, 47261977623.5276, 16463020175.2068, 9931809132.696
), `10_15ng` = c(30252776887.1842, 141726904178.35, 130889671408.26, 
38206477283.6549, 56021084469.4745, 100336249543.662, 53295491175.4506, 
62883519160.5278, NA, 13994955303.4972), `11_15ng` = c(32859283128.8916, 
161633827056.573, NA, 45497410866.4248, 61586094337.2513, NA, 
60508117975.6097, 73276218943.4545, NA, 15400735421.5), `12_15ng` = c(34372085877.8071, 
165557046117.222, 153975644961.53, 46279635074.4959, 61867667358.3367, 
106133922907.254, 63526552497.161, 76374667334.5682, NA, 15329671283.3959
)), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-10L))

And a group data,

groups <- structure(list(samples = c("1_3ng", "2_3ng", "3_3ng", "4_7-5ng", 
"5_7-5ng", "6_7-5ng", "7_10ng", "8_10ng", "9_10ng", "10_15ng", 
"11_15ng", "12_15ng"), groups = c("GrA", "GrA", "GrA", "GrB", 
"GrB", "GrB", "GrC", "GrC", "GrC", "GrD", "GrD", "GrD")), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -12L))

And I have used the following codes for removing the groupwise missing rows from the dataset.

new_colnames <- c("name", "group")
colnames(groups)<-new_colnames

x <- colnames(df1[,1]) 
df2 <- setNames(df1,replace(names(df1),names(df1)==x,"rowid"))

com_data <- df2 %>%
  pivot_longer(!rowid, values_to="mass") %>%
  inner_join(groups, by="name") %>%
  group_by(name) %>%
  filter(!all(is.na(mass))) %>%
  ungroup() %>%
  add_count(rowid) %>%
  filter(n == max(n)) %>%
  select(!c(group, n)) %>%
  pivot_wider(names_from=name, values_from=mass)

But I am not getting the desired output, it was like below,

structure(list(rowid = c("P0A799|PGK", "P0A853|TNAA", "P0A6P1|EFTS"
), `1_3ng` = list(c(12305960196.5721, 3484150815.20598), c(24169710612.0476, 
11373139178.993), c(NA, 8062061247.94512)), `2_3ng` = list(c(11629654800, 
3406054010), c(25162283400, 10191440110), c(NA, 7911370530)), 
    `3_3ng` = list(c(12503938417.8663, NA), c(25733015601.0117, 
    11653192532.4546), c(NA, 7933102839.01341)), `4_7-5ng` = list(
        c(NA, 7850501475.22747), c(79582218995.1549, 30624750667.6451
        ), c(39438567251.7351, 10726988796.4798)), `5_7-5ng` = list(
        c(NA, 8280398872.03417), c(78743355495.2545, NA), c(40420212151.9563, 
        14804870783.7792)), `6_7-5ng` = list(c(NA, 7916821420.64152
    ), c(80272416055.8845, 30678346321.0037), c(37073444001.0421, 
    13710097518.7425)), `7_10ng` = list(c(22617928037.5148, 9288965106.5049
    ), c(97473230025.8853, 37181868033.5073), c(44274304023.6936, 
    NA)), `8_10ng` = list(c(22091136513.3736, 8834742124.86943
    ), c(NA, 36789459227.7515), c(42793252584.0984, 15307787846.1716
    )), `9_10ng` = list(c(24125219176.3177, 9931809132.696), 
        c(98420360686.1339, 39481996086.9157), c(47261977623.5276, 
        16463020175.2068)), `10_15ng` = list(c(30252776887.1842, 
    13994955303.4972), c(141726904178.35, 53295491175.4506), 
        c(62883519160.5278, NA)), `11_15ng` = list(c(32859283128.8916, 
    15400735421.5), c(161633827056.573, 60508117975.6097), c(73276218943.4545, 
    NA)), `12_15ng` = list(c(34372085877.8071, 15329671283.3959
    ), c(165557046117.222, 63526552497.161), c(76374667334.5682, 
    NA))), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA, 
-3L))

And the warning message was,

Warning message:
Values from `mass` are not uniquely identified; output will contain list-cols.
* Use `values_fn = list` to suppress this warning.
* Use `values_fn = {summary_fun}` to summarise duplicates.
* Use the following dplyr code to identify duplicates.
  {data} %>%
    dplyr::group_by(rowid, name) %>%
    dplyr::summarise(n = dplyr::n(), .groups = "drop") %>%
    dplyr::filter(n > 1L)

How to get the dataframe output which contain single values in each column instead of list-cols.

tidyr pivot R dataframe • 638 views

ADD COMMENT • link updated 21 months ago by Basti ★ 2.0k • written 21 months ago by KABILAN ▴ 40

1

Entering edit mode

How would you like to deal with duplicate values ?

ADD REPLY • link 21 months ago by Basti ★ 2.0k