Closed:compare three test files
1
0
Entering edit mode
8.6 years ago
Kumar ▴ 170

I improved my script for comparing three files, now it is comparing file 1 to 2, file 1 to 3 but it is not able to compare file 2 to 3.

file 1

AT4G01510.1 1 6993 7241
AT1G01020.2 1 7320 8668
AT1G01050.1 1 31388 32672

file 2

AT1G43722.1 1 13306624 13307584
AT3G30520.1 1 13307748 13309520
AT5G59280.1 1 13330509 13331505

file 3

AT1G01010.1 1 3760 5627
AT1G01020.1 1 6918 7232
AT1G01020.1 1 8236 8666
#!/usr/bin/perl -w
use strict;
use Getopt::Std;
use vars qw ($opt_s $opt_p $opt_t);
getopts ('s:p:t:');

if(! $opt_s || !$opt_p || !$opt_t){
   print "Usage: $0\n";
   print "-s file1 output file \n";
   print "-p file2 output file \n";
    print "-t file3 output file \n";
   exit;
}

my $tablefile1 = $opt_s;
my $tablefile3 = $opt_p;
my $tablefile2 = $opt_t;



if(!$tablefile1){
   die "Invalid data file name $tablefile1.\n";
}
if(!$tablefile2){
   die "Invalid data file name $tablefile2.\n";
}
if(!$tablefile3){
   die "Invalid data file name $tablefile3.\n";
}

open(IN,$tablefile1) || die "Can't open $tablefile1..exiting.\n";

my %hash = ();
my @coord_arr = ();
my @chrs = ();
while(<IN>){
 chomp;
 next if (/^\s*$/);
 my @cols = split(/\t/, $_);

      my $id = $cols[0];

      my $chr = $cols[1];
      my $s = $cols[2];
      my $e = $cols[3];

     if (exists($hash{$chr})) {
      my $str = $hash{$chr};
      $hash{$chr} = "$str/$id:$s:$e";
    } else {
      $hash{$chr} = "$id:$s:$e";
    }

}
close IN;

open(IN,$tablefile2) || die "Can't open $tablefile1..exiting.\n";

while(<IN>){
 chomp;
 next if (/^\s*$/);
 my @cols = split(/\t/, $_);

      my $id = $cols[0];

      my $chr = $cols[1];
      my $s = $cols[2];
      my $e = $cols[3];

     if (exists($hash{$chr})) {
      my $str = $hash{$chr};
      $hash{$chr} = "$str/$id:$s:$e";
    } else {
      $hash{$chr} = "$id:$s:$e";
    }

}
close IN;

open(IN,$tablefile3) || die "Can't open $tablefile3..exiting.\n";
my $found = 0;
print "Chr\tfile1-ID\tStart\tEnd\tfile2-ID\tStart\tEnd\tStart-diff\tend-diff\tfile3-ID\tStart\tEnd\tstart-diff\tend-diff\n";
while(<IN>){
 chomp;
 next if (/^\s*$/);
 my @cols = split(/\t/, $_);

      my $id = $cols[0];

      my $chr = $cols[1];
      my $s = $cols[2];
      my $e = $cols[3];

      my $info_str = $hash{$chr};
      next if (!$info_str);

      my @pseudos = split(/\//, $info_str);
      #my @pseudos2 = split(/\//, $info_str);

      for (my $i = 0; $i < @pseudos; $i++) {
         my ($id1, $s1, $e1) = split(/:/, $pseudos[$i]);

         if (&is_overlap($s, $e, $s1, $e1)) {
            print "$chr\t$id\t$s\t$e\t$id1\t$s1\t$e1\t";
           my $d1 = $s1-$s;
           my $d2 = $e1-$e;
           print "$d1\t$d2\t";
         }
      }

      for (my $j = 0; $j < @pseudos; $j++) {
         my ($id2, $s2, $e2) = split(/:/, $pseudos[$j]);

         if (&is_overlap2($s, $e, $s2, $e2)) {
           print "$id\t$s2\t$e2\t";
           my $d3 = $s2-$s;
           my $d4 = $e2-$e;
           print "$d3\t$d4\n";
         }     
      }
}

close IN;

sub is_overlap {
   my ($s, $e, $s1, $e1) = @_;

   my $found = 0;

   if ($s >= $s1 && $s < $e1 ||
       $s1 >= $s && $s1 < $e ||
       $s1 >= $s && $e1 <= $e ||
       $s1 <= $s && $e1 >= $e) {
      $found = 1;
   }

   return $found;
}

sub is_overlap2 {
   my ($s, $e, $s2, $e2) = @_;

   my $found = 0;

   if ($s >= $s2 && $s < $e2 ||
       $s2 >= $s && $s2 < $e ||
       $s2 >= $s && $e2 <= $e ||
       $s2 <= $s && $e2 >= $e) {
      $found = 1;
   }

   return $found;
}
alignment • 595 views
ADD COMMENT
This thread is not open. No new answers may be added
Traffic: 2748 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6