Trying to show the gaps of each seq on bio::Graphics after converting clustalw
0
0
Entering edit mode
6.4 years ago

I've been having trouble for a while with this and have been trying to get this on my own, but I can't. I'm trying to display the gaps using Bio::Graphics after retrieving my sequences from an alignment file.

The command is ./aln.pl test-bioaln.aln > ll.png My expected output would be a .png file with sequences that are aligned and show gaps as a different color.

#!/usr/bin/perl
use Bio::AlignIO;
use Bio::Graphics;
my $line = shift @ARGV;
my $in = Bio::AlignIO->new(-file=>$line,-format=>"clustalw");
while($aln = $in->next_aln()){
foreach $seqobj($aln->each_seq()){
    my $seq = $seqobj->seq;
    my $id = $seqobj->id;
    my $length = $seqobj->length;
    my $seqobj = Bio::SeqFeature::Generic->new(-start =>1, -end=>$length,-display_name=>$id);
    push (@seq, $seqobj);
}
foreach $seq(@seq){
    my @features = $seq->get_SeqFeatures;
    my %sorted_features;
    for my $f (@features) {
        my $tag = "\-";
        push @{$sorted_features{$tag}},$f;
    }

    my $panel = Bio::Graphics::Panel->new(
        -length    => $seq->length,
        -key_style => 'between',
        -width     => 800,
        -pad_left  => 10,
        -pad_right => 10,
    );
    $panel->add_track(generic => Bio::SeqFeature::Generic->new(-start=>1,
            -end=>$seq->length),
        -glyph  => 'generic',
        -bgcolor => 'blue',
        -label  => 1,
    );

    my @colors = qw(cyan orange blue purple green chartreuse magenta yellow aqua);
    my $idx    = 0;
    for my $tag (sort keys my %sorted_features) {
        my $features = $sorted_features{$tag};
        $panel->add_track($features,
            -glyph    =>  'generic',
            -bgcolor  =>  $colors[$idx++ % @colors],
            -fgcolor  => 'black',
            -font2color => 'red',
            -key      => "${tag}s",
            -bump     => +1,
            -height   => 8,
            -label    => 1,
            -description => 1,
        );
    }
    print $panel->png;

}
}
alignment • 1.8k views
ADD COMMENT
0
Entering edit mode
 CLUSTAL W (1.81) multiple sequence alignment


JD1:1:102:1601:ORFJ00027      ------------------------------atgtataaacaacaatattttatttct--c
94a:1:107:117:orf00001        ------------------------------atgtataaacaacaatattttatttct-ac
118a:1:106:158122218:orf00020 ------------------------------atgtataaacaacaatattttatttct-gc
B31:1:100:4091:ORFB0018       ------------------------------atgtataaacaacaatattttatttctggc
72a:1:105:32:orf00022         ------------------------------atgtataaacaacaatattttatttctggc
64b:1:110:473:orf00001        ------------------------------atgtataaacaacaatattttatttctggc
29805:1:108:171:orf00001      ------------------------------atgtataaacaacaatattttatttctggc
BOL26:1:111:60:orf00001       ------------------------------atgtataaacaacaatattttatttctggc
CA-11.2A:1:109:33:orf00001    ------------------------------atgtataaacaacaatattttatttctggc
WI91-23:1:112:493:orf00001    ------------------------------atgtataaacaacaatattttatttctggc
297:1:103:411:ORFB00012       ttggatagattttatacaaagaaggtaataatgtataaacaacaatattttatttctggc
N40:1:101:1716:ORFK00021      ------------------------------atgtataaacaacaatattttatttctggc
ZS7:1:113:22:orf00001         ------------------------------atgtataaacaacaatattttatttctggc
                                                            ******************************


JD1:1:102:1601:ORFJ00027      aaggtgcaaggtgttggttttagatttttcacagagcaaatagcaaataatatgaaacta
94a:1:107:117:orf00001        aaggtacaaggtgttggttttagattttttacagagcaaatagcaaataatatgaaacta
118a:1:106:158122218:orf00020 aaggtacaaggtgttggttttagattttttacagagcaaatagcaaataatatgaaacta
B31:1:100:4091:ORFB0018       aaggtgcaaggtgttggttttagatttttcacagagcaaatagcaaataatatgaaacta
72a:1:105:32:orf00022         aaggtacaaggtgttggttttagattttttacagagcaaatagcaaataatatgaaacta
64b:1:110:473:orf00001        aaggtgcaaggtgttggttttagatttttcacagagcaaatagcaaataatatgaaacta
29805:1:108:171:orf00001      aaggtgcaaggtgttggttttagatttttcacagagcaaatagcaaataatatgaaacta
BOL26:1:111:60:orf00001       aaggtacaaggtgttggttttagattttttacagagcaaatagcaaataatatgaaacta
CA-11.2A:1:109:33:orf00001    aaggtacaaggtgttggttttagattttttacagagcaaatagcaaataatatgaaacta
WI91-23:1:112:493:orf00001    aaggtgcaaggtgttggttttagatttttcacagagcaaatagcaaataatatgaaacta
297:1:103:411:ORFB00012       aaggtacaaggtgttggttttagattttttacagagcaaatagcaaataatatgaaacta
N40:1:101:1716:ORFK00021      aaggtgcaaggtgttggttttagatttttcacagagcaaatagcaaataatatgaaacta
ZS7:1:113:22:orf00001         aaggtacaaggtgttggttttagattttttacagagcaaatagcaaataatatgaaacta
                          ***** *********************** ******************************


JD1:1:102:1601:ORFJ00027      aaaggatttgtaaaaaatctcaacgatggaagggtagaaattgtagctttctttaatact
94a:1:107:117:orf00001        aaaggatttgtaaaaaatctaaacgatggaagggtagaaattgtagctttctttaatact
118a:1:106:158122218:orf00020 aaaggatttgtaaaaaatctaaacgatggaagggtagaaattgtagctttctttaatact
B31:1:100:4091:ORFB0018       aaaggatttgtaaaaaatctcaacgatggaagggtagaaattgtagctttctttaatact
72a:1:105:32:orf00022         aaaggatttgtaaaaaatctaaacgatggaagggtagaaattgtagctttctttaatact
64b:1:110:473:orf00001        aaaggatttgtaaaaaatctcaacgatggaagggtagaaattgtagctttctttaatact
29805:1:108:171:orf00001      aaaggatttgtaaaaaatctcaacgatggaagggtagaaattgtagctttctttaatact
BOL26:1:111:60:orf00001       aaaggatttgtaaaaaatctcaacgatggaagggtagaaattgtagctttctttaatact
CA-11.2A:1:109:33:orf00001    aaaggatttgtaaaaaatctaaacgatggaagggtagaaattgtagctttctttaatact
WI91-23:1:112:493:orf00001    aaaggatttgtaaaaaatctcaacgatggaagggtagaaattgtagctttctttaatact
297:1:103:411:ORFB00012       aaaggatttgtaaaaaatctaaacgatggaagggtagaaattgtagctttctttaatact
N40:1:101:1716:ORFK00021      aaaggatttgtaaaaaatctcaacgatggaagggtagaaattgtagctttctttaatact
ZS7:1:113:22:orf00001         aaaggatttgtaaaaaatctcaacgatggaagggtagaaattgtagctttctttaatact
                              ******************** ***************************************
ADD REPLY
0
Entering edit mode

So far I have fixed the script to attach the sequence features to each sequence. I'm also able to get the ranges of each gap in each sequence. Im not sure how I should attach each of the ranges into the seqfeature though.

my $hit = "\-"; #gap character
#my $range = Number::Range->new();
my $range_list;
my $line = shift @ARGV;
my $in = Bio::AlignIO->new(-file=>$file,-format=>"clustalw"); # get the alignment file from input
my $aln = $in->next_aln();
foreach  my $locseq ($aln->each_seq){
    $seq = $locseq->seq;
    $id = $locseq->display_id;
    $length = $locseq->length;
    $seqobj = Bio::Seq->new(-string=>$seq, -id=>$id,-length=>$length); #addthe seq, length, and id to each seqobj
    my $feat = Bio::SeqFeature::Generic->new( -start=>1, -end=>$length, -display_name=>$id, primary=> 'gap' ); # set the features of each seqobj
    $seqobj->add_SeqFeature($feat); # attach the features to the seqobj
    push(@seqobj,$seqobj);
    push (@seqs, $seq);
}
foreach $seq(@seqs){
    my @seqss = split('', $seq); # explode the sequence
    @matches = grep { $seqss[$_] ~~ $hit} 0 .. $#seqss; #get the positions of each gap character "-"
    my $range = Number::Range->new();
    $range->addrange(@matches);
    $range_list = join ', ',map join('-',@$_), $range->rangeList; #get the ranges of each gap character
    push(@ranges, $range_list);
}
$panel = Bio::Graphics::Panel->new(
                                         -length    => $length,
                                         -key_style => 'between',
                                         -width     => 800,
                                         -pad_left  => 10,
                                         -pad_right => 10,

                                         );
sub gap_it {
            my $gd    = shift;
            my $panel = shift;
            my ($gap_start,$gap_end) = $panel->location2pixel(10,20);
            my $top                  = $panel->top;
            my $bottom               = $panel->bottom;
            my $gray                 = $panel->translate_color('gray');
            $gd->filledRectangle($gap_start,$top,$gap_end,$bottom,$gray);
}
    $panel->add_track( arrow => Bio::SeqFeature::Generic->new(-start=>1,
                                                              -end=>$seqobj->length),
                     -bump => 0,
                     -double=>1,
                     -tick => 2);
    foreach $seqobj(@seqobj){
        for my $f ($seqobj->get_SeqFeatures) {
            my $tag = $f->primary_tag;
            push @{$sorted_features{$tag}},$f;
    }
}

    my @colors = qw(cyan orange blue purple green chartreuse magenta yellow aqua);
    my $idx    = 0;
    for my $tag (sort keys %sorted_features) {
        my $features = $sorted_features{$tag};
        $panel->add_track($features,
                       -glyph    =>  'segments',
                       -bgcolor  =>  $colors[$idx++ % @colors],
                       -fgcolor  => 'black',
                       -font2color => 'red',
                       -key      => "${tag}s",
                       -bump     => +1,
                       -height   => 8,
                       -label    => 1,
                       -description => 1,
                      );
    }
    print $panel->png;
}
ADD REPLY

Login before adding your answer.

Traffic: 2539 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6