Question: Problem with adding element in a array (Perl script)
0
gravatar for ArusjakGevorgyan
23 months ago by
Sweden
ArusjakGevorgyan20 wrote:

Hi everyone. I have a little problem with perl. My input file looks like this:

RAxML_result.EOG09371BVG.aa.fa.aln.phy.raxtree.test.1
IT_ine1
IM_nor1
OH_azt1
IE_sup1
IT_ras1
OD_mag1
OE_aff1
  

I want to take all my species and put them in my array. But my code doesn't work, I think that I'm not entering my if loop where I tell the code when it should match my specie. I will be grateful for your help.

#!/usr/bin/perl -w
    use warnings;
    use strict;  
    use 5.010;

my $comp = $ARGV[0];                                  
if ( ! open ( FILE_HANDLE , "<" , $comp ) ) {      
        die "Error can't find the file: $comp because $!";}
if ( ! open ( OUT_HANDLE , ">" , $comp . ".comp.csv" ) ) { 
        die "Error can't make new file file: ${$comp}.comp.csv because $!";}

my @arr = ();
my $gene;  

while (my $lineContent = <FILE_HANDLE> ) {
   chomp $lineContent;
   if ( $lineContent =~ m /^\#/){ 
     next;
   }elsif($lineContent =~ m /\.raxtree$/){
    next;
   }elsif($lineContent =~ m /^WARNING/){
    next;
   }elsif($lineContent =~ m /^f/){
    next;
   }else{
   print "$lineContent\n";
 }
foreach my $line ($lineContent){
   if($line =~ m /^RAxML\_result\.(\w+)\.aa\.fa\.aln\.phy\.raxtree\.test\.\d/){
      $gene="$1\n";
           if($line =~ m /^(\w+)\s/){
       push @arr,$1;
     }
   }
 }
}
print $gene;
print @arr;
script array perl • 485 views
ADD COMMENTlink modified 23 months ago by Pierre Lindenbaum126k • written 23 months ago by ArusjakGevorgyan20
1

Hi, let me comment your code and see what exactly do you want to do:

#!/usr/bin/perl -w    # "-w" and "use warnings" are equivalents, it's prefered to use the second
    use warnings;
    use strict;  
    use 5.010; # You don't need back-compatibility, you can remove this

my $comp = $ARGV[0];      
# opening files are OK                            
if ( ! open ( FILE_HANDLE , "<" , $comp ) ) {      
        die "Error can't find the file: $comp because $!";}
if ( ! open ( OUT_HANDLE , ">" , $comp . ".comp.csv" ) ) { 
        die "Error can't make new file file: ${$comp}.comp.csv because $!";}

my @arr = ();
my $gene;  

# here is where you are iterating over the file, reading line by line
while (my $lineContent = <FILE_HANDLE> ) {
   chomp $lineContent;
   if ( $lineContent =~ m /^\#/){  # Skip comments
     next;
   }elsif($lineContent =~ m /\.raxtree$/){ # Skip lines terminated with "raxtree"
    next;
   }elsif($lineContent =~ m /^WARNING/){ # Skip lines that starts with "WARNING"
    next;
   }elsif($lineContent =~ m /^f/){ # Skip lines that start with "f"?
    next;
   }else{
   print "$lineContent\n"; # print lines that are not in previous patterns, why?
 }
# Here you are out of your file because the while {} loop ended.
foreach my $line ($lineContent){
   # I guess you want to parse this line and get the gene name, saving in a variable and then check lines with that gene, saving in a array
   if($line =~ m /^RAxML\_result\.(\w+)\.aa\.fa\.aln\.phy\.raxtree\.test\.\d/){
      $gene="$1\n";
           if($line =~ m /^(\w+)\s/){
       push @arr,$1;
     }
   }
 }
}
print $gene;
print @arr;

Guessing I can suggest:

#!/usr/bin/perl

use warnings;
use strict;

my $comp = $ARGV[0];
if ( ! open (FILE, "<" , $comp) ) {
        die "Error can't find the file: $comp because $!";}
if ( ! open (OUT, ">" , $comp . ".comp.csv") ) {
        die "Error can't make new file file: ${$comp}.comp.csv because $!";}

my %speciesByGene = ();
my $gene;

while (my $line = <FILE>) {
   chomp $line;
   if($line =~ m/^RAxML_result\.(.+)\.aa\.fa\.aln\.phy\.raxtree\.test\.\d+/){
       $gene = $1;
   }
   elsif ($line =~ m/^\#/){
       next;
   }elsif($line =~ m/\.raxtree$/){
       next;
   }elsif($line =~ m/^WARNING/){
       next;
   }elsif($line =~ m/^f/){
       next;
   }else{
       $line =~ s/\s.*//; # keeps species
       push (@{ $speciesByGene{$gene} }, $line);
   }
}
close FILE;

# print results
foreach $gene (sort keys %speciesByGene) {
    print OUT "$gene: ";
    print OUT join ",", @{ $speciesByGene{$gene} };
    print OUT "\n";
}
close OUT;

Output:

EOG09371BVG: IT_ine1,IM_nor1,OH_azt1,IE_sup1,IT_ras1,OD_mag1,OE_aff1
ADD REPLYlink written 23 months ago by JC9.5k

Thank you so much for your help and time.

ADD REPLYlink written 23 months ago by ArusjakGevorgyan20
Please log in to add an answer.

Help
Access

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.
Powered by Biostar version 2.3.0
Traffic: 879 users visited in the last hour