Question

Perl program that parses the DNASIS restriction enzyme file

0

Entering edit mode

9.7 years ago

Sidney • 0

Code below is what I have so far. output not working at all.

# Declare and initialize variables
my %rebase_hash = ( );
my @file_data = ( );
my $query = '';
my $dna = '';
my $recognition_site = '';
my $regexp = '';
my @locations = ( );

#Read in the file
@file_data=get_file_data($rebasefile);

#Extract sequence data from file being used
$dna=extract_sequence(@file_data);

###Get rebase data into hash from dnasis
%rebase_hash=parseREBASE('rebase.txt');   

#Prompt user for input
do {
    print "Search for what restriction site for (or quit)?: ";
    $query = <STDIN>;
    chomp $query;

    # Exit if empty query
    if ($query =~ /^\s*$/ ) {
        exit;
    }

    # Perform the search in the DNA sequence
    if ( exists $rebase_hash{$query} ) {
        ($recognition_site, $regexp) = split  " ", $rebase_hash{$query};

        # Create the restriction map
        @locations = match_positions($regexp, $dna);

        # Report the restriction map to the user
        if (@locations) {
            print "Searching for $query $recognition_site $regexp\n";
            print "A restriction site for $query at locations:\n";
            print join(" ", @locations), "\n";
        } else {
            print "A restriction enzyme $query is not in the DNA:\n";
        }
    }
    print "\n";
} until ( $query =~ /quit/ );

 print parseREBASE;
exit;

# open_file
#
# - given filename, set filehandle
sub open_file {
    my($filename) = 'rebase.txt';
    $filename=$_;
    my $fh;
    unless(open($fh, $filename)) {
        print "Cannot open file $filename\n";
    }
    return $fh;
}

# A subroutine to get data from a file given its filename
# get_file_data
sub get_file_data {
    my($filename) = @_;

    # Initialize variables
    my @filedata = ( );
    unless( open(GET_FILE_DATA, $filename) ) {
        print STDERR "Cannot open file \"$filename\"\n\n";
        exit;
    }
    @filedata = <GET_FILE_DATA>;
    close GET_FILE_DATA;
    return @filedata;
}

sub extract_sequence {

    my(@fasta_file_data) = @_;
    my $sequence = '';

    foreach my $line (@fasta_file_data) {
        # discard fasta header line
        if($line =~ /^>/) {
            next;
        # keep line, add to sequence string
        } else {
            $sequence .= $line;
        }
    }

    # remove non-sequence data (in this case, whitespace) from $sequence string
    $sequence=~ s/\s//g;

    return $sequence;
}


sub IUB_to_regexp {
    my($iub) = @_;
    my $regular_expression = '';
    my %iub2character_class = (
    A => 'A',
    C => 'C',
    G => 'G',
    T => 'T',
    R => '[GA]',
    Y => '[CT]',
    M => '[AC]',
    K => '[GT]',
    S => '[GC]',
    W => '[AT]',
    B => '[CGT]',
    D => '[AGT]',
    H => '[ACT]',
    V => '[ACG]',
    N => '[ACGT]',
    );
    # Remove the ^ signs from the recognition sites
    $iub =~ s/\^//g;

    # Translate the iub sequence
    for ( my$i=0; $i < length($iub);$i++) {
        $regular_expression .= $iub2character_class{substr($iub,$i,1)};
    }
    return $regular_expression;
}
#Sub to match positions
sub match_positions {
    my($regexp, $sequence) = @_;    
    my @positions = ( );
    #Determine positions
    while ($sequence=~/$regexp/ig) {
        push( @positions, pos($sequence) - length($&)+ 1);
    }
    return @positions;
}

#Sub for parse rebase
sub parseREBASE {
my($rebasefile) = @_;

# Declare variables
my @rebasefile = ( );
my %rebase_hash = ( );
my $name;
my $site;
my $regexp;

# Read in the REBASE file
my $rebase_filehandle = open_file('rebase.txt');
while(<$rebase_filehandle>) {
    # Discard header lines
    (1../Rich Roberts/) and next;
    # Discard blank lines
    /^\s*$/ and next;

    chomp;
    $name = $_;
    $_ = <$rebase_filehandle>;
    chomp;
    $site = $_;
    $regexp = IUB_to_regexp($site);
    # Store the data into the hash
    $rebase_hash{$name} = "$site $regexp";
}
# Return the hash containing the reformatted REBASE data
return %rebase_hash;
}

perl • 3.2k views

ADD COMMENT • link updated 3.1 years ago by Ram 43k • written 9.7 years ago by Sidney • 0

1

Entering edit mode

A good way my teacher always taught while debugging a program is to use print statement after each and every line of the program

ADD REPLY • link updated 3.1 years ago by Ram 43k • written 9.7 years ago by Prakki Rama ★ 2.7k

0

Entering edit mode

Thanks I will def implement this in my coding to try and debug the output I am looking for.

ADD REPLY • link updated 3.1 years ago by Ram 43k • written 9.7 years ago by Sidney • 0