data prerprocessing coding
2
0
Entering edit mode
3.3 years ago
sskimvd • 0

I want to move the row that matches the first row of data1 to the last row of data1.

data1

id  s1   s2   s3   s4

a    1    0    2     1

b    2    0    1     2

b   0    0    1     1

c    0   1    2     2

data2

id    here

a      a1

b      a2 

c      a3

after

id  s1   s2   s3   s4   here

a    1    0    2     1    a1

b    2    0    1     2    a2

b   0    0    1     1     a2

c    0   1    2     2     a3

help me

thank you

R • 676 views
ADD COMMENT
0
Entering edit mode

with join function:

$ join -1 1 -2 1 data1.txt data2.txt -t $'\t' 

id  s1  s2  s3  s4  here
a   1   0   2   1   a1
b   2   0   1   2   a2  
b   0   0   1   1   a2  
c   0   1   2   2   a3

with tsv-utils join function:

input:

$ cat data1.txt 
id  s1  s2  s3  s4
a   1   0   2   1
b   2   0   1   2
b   0   0   1   1
c   0   1   2   2

$ cat data2.txt 
id  here
a   a1
b   a2  
c   a3

output:

$ tsv-join -H -f data2.txt -k 1  -a 2   data1.txt 

id  s1  s2  s3  s4  here
a   1   0   2   1   a1
b   2   0   1   2   a2
b   0   0   1   1   a2
c   0   1   2   2   a3
ADD REPLY
0
Entering edit mode
ADD REPLY
2
Entering edit mode
3.3 years ago

Your data.

data_1 <- structure(list(id = c("a", "b", "b", "c"), s1 = c(1L, 2L, 0L, 
0L), s2 = c(0L, 0L, 0L, 1L), s3 = c(2L, 1L, 1L, 2L), s4 = c(1L, 
2L, 1L, 2L)), class = "data.frame", row.names = c(NA, -4L))

data_2 <- structure(list(id = c("a", "b", "c"), here = c("a1", "a2", "a3"
)), class = "data.frame", row.names = c(NA, -3L))

tidyverse solution

library("dplyr")

merged <- left_join(data_1, data_2, by="id")

> merged
  id s1 s2 s3 s4 here
1  a  1  0  2  1   a1
2  b  2  0  1  2   a2
3  b  0  0  1  1   a2
4  c  0  1  2  2   a3

data.table solution

library("data.table")

setDT(data_1, key="id")
setDT(data_2, key="id")

merged <- data_1[data_2]

> merged
   id s1 s2 s3 s4 here
1:  a  1  0  2  1   a1
2:  b  2  0  1  2   a2
3:  b  0  0  1  1   a2
4:  c  0  1  2  2   a3
ADD COMMENT
0
Entering edit mode
3.3 years ago
library(sqldf)

input:

> data_1
  id s1 s2 s3 s4
1  a  1  0  2  1
2  b  2  0  1  2
3  b  0  0  1  1
4  c  0  1  2  2

> data_2
  id here
1  a   a1
2  b   a2
3  c   a3

output:

> sqldf('select d1.*, d2."here"  from data_1 as d1  join data_2 as d2 where d1.id=d2.id')

  id s1 s2 s3 s4 here
1  a  1  0  2  1   a1
2  b  2  0  1  2   a2
3  b  0  0  1  1   a2
4  c  0  1  2  2   a3
ADD COMMENT

Login before adding your answer.

Traffic: 2740 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6