Sometimes the threads in perl would turn into zombie. And the whole program will just wait there. How to deal with this issue? What causes threads becoming zombie? Was it related to the pipe? How to avoid this?
The following is the codes that creates sample files.
#buildTest.pl
use strict;
use warnings;
sub generateChrs{
my ($outfile, $num, $range)=@_;
open OUTPUT, "|gzip>$outfile";
my @set=('A','T','C','G');
my $cnt=0;
while ($cnt<$num) {
# body...
my $pos=int(rand($range));
my $str = join '' => map $set[rand @set], 1 .. rand(200)+1;
print OUTPUT "$cnt\t$pos\t$str\n";
$cnt++
}
close OUTPUT;
}
sub new_chr{
my @chrs=1..22;
push @chrs,("X","Y","M", "Other");
return @chrs;
}
for my $chr (&new_chr){
generateChrs("$chr.gz",50000,100000)
}
The following codes will have zombie threads sometimes.
#paralRM.pl
use strict;
use threads;
use Thread::Semaphore;
my $s = Thread::Semaphore->new(10);
sub rmDup{
my $reads_chr=$_[0];
print "remove duplication $reads_chr START TIME: ",`date`;
return 0 if(!-s $reads_chr);
my $dup_removed_file=$reads_chr . ".rm.gz";
$s->down();
open READCHR, "gunzip -c $reads_chr |sort -n -k2 |" or die "Error: cannot open $reads_chr";
open OUTPUT, "|sort -k4 -n|gzip>$dup_removed_file";
my ($last_id, $last_pos, $last_reads)=split('\t',<READCHR>);
chomp($last_reads);
my $last_length=length($last_reads);
my $removalCnts=0;
while (<READCHR>) {
chomp;
my @line=split('\t',$_);
my ($id, $pos, $reads)=@line;
my $cur_length=length($reads);
if($last_pos==$pos){
#may dup
if($cur_length>$last_length){
($last_id, $last_pos, $last_reads)=@line;
$last_length=$cur_length;
}
$removalCnts++;
next;
}else{
#not dup
}
print OUTPUT join("\t",$last_id, $last_pos, $last_reads, $last_length, "\n");
($last_id, $last_pos, $last_reads)=@line;
$last_length=$cur_length;
}
print OUTPUT join("\t",$last_id, $last_pos, $last_reads, $last_length, "\n");
close OUTPUT;
close READCHR;
$s->up();
print "remove duplication $reads_chr END TIME: ",`date`;
#unlink("$reads_chr")
return $removalCnts;
}
sub parallelRMdup{
my @chrs=@_;
my %jobs;
my @removedCnts;
my @processing;
foreach my $chr(@chrs){
while (${$s}<=0) {
# body...
sleep 10;
}
$jobs{$chr}=async {
return &rmDup("$chr.gz")
}
push @processing, $chr;
};
#wait for all threads finish
foreach my $chr(@processing){
push @removedCnts, $jobs{$chr}->join();
}
}
sub new_chr{
my @chrs=1..22;
push @chrs,("X","Y","M", "Other");
return @chrs;
}
¶llelRMdup(&new_chr);
off-topic, better asked on stackoverflow.com