Question: Perl program that parses the DNASIS restriction enzyme file
0
gravatar for Sidney
5.1 years ago by
Sidney 0
United States
Sidney 0 wrote:

 

Code below is what I have so far. output not working at all.

    # Declare and initialize variables
    my %rebase_hash = ( );
    my @file_data = ( );
    my $query = '';
    my $dna = '';
    my $recognition_site = '';
    my $regexp = '';
    my @locations = ( );
    
    #Read in the file
    @file_data=get_file_data($rebasefile);
    
    #Extract sequence data from file being used
    $dna=extract_sequence(@file_data);
    
    ###Get rebase data into hash from dnasis
    %rebase_hash=parseREBASE('rebase.txt');   
    
    #Prompt user for input
    do {
    print "Search for what restriction site for (or quit)?: ";
    $query = <STDIN>;
    chomp $query;
    
    # Exit if empty query
    if ($query =~ /^\s*$/ ) {
    exit;
    }

    # Perform the search in the DNA sequence
    if ( exists $rebase_hash{$query} ) {
    ($recognition_site, $regexp) = split  " ", $rebase_hash{$query};
    
    # Create the restriction map
    @locations = match_positions($regexp, $dna);
    
    # Report the restriction map to the user
    if (@locations) {
    print "Searching for $query $recognition_site $regexp\n";
    print "A restriction site for $query at locations:\n";
    print join(" ", @locations), "\n";
    } else {
    print "A restriction enzyme $query is not in the DNA:\n";
    }
    }
    print "\n";
    } until ( $query =~ /quit/ );
    
     print parseREBASE;
    exit;

    # open_file
    #
    # - given filename, set filehandle
    sub open_file {
    my($filename) = 'rebase.txt';
    $filename=$_;
    my $fh;
    unless(open($fh, $filename)) {
    print "Cannot open file $filename\n";
    }
    return $fh;
    }
    
    # A subroutine to get data from a file given its filename
    # get_file_data
    sub get_file_data {
    my($filename) = @_;
    
    # Initialize variables
    my @filedata = ( );
    unless( open(GET_FILE_DATA, $filename) ) {
    print STDERR "Cannot open file \"$filename\"\n\n";
    exit;
    }
    @filedata = <GET_FILE_DATA>;
    close GET_FILE_DATA;
    return @filedata;
    }
    
    sub extract_sequence{

    my(@fasta_file_data) = @_;
    my $sequence = '';

    foreach my $line (@fasta_file_data) {
        # discard fasta header line
        if($line =~ /^>/) {
            next;
        # keep line, add to sequence string
        } else {
            $sequence .= $line;
        }
    }

    # remove non-sequence data (in this case, whitespace) from $sequence string
    $sequence=~ s/\s//g;

    return $sequence;
}
    
    
    sub IUB_to_regexp {
    my($iub) = @_;
    my $regular_expression = '';
    my %iub2character_class = (
    A => 'A',
    C => 'C',
    G => 'G',
    T => 'T',
    R => '[GA]',
    Y => '[CT]',
    M => '[AC]',
    K => '[GT]',
    S => '[GC]',
    W => '[AT]',
    B => '[CGT]',
    D => '[AGT]',
    H => '[ACT]',
    V => '[ACG]',
    N => '[ACGT]',
    );
    # Remove the ^ signs from the recognition sites
    $iub =~ s/\^//g;
    
    # Translate the iub sequence
    for ( my$i=0; $i < length($iub);$i++) {
    $regular_expression .= $iub2character_class{substr($iub,$i,1)};
    }
    return $regular_expression;
    }
    #Sub to match positions
    sub match_positions {
    my($regexp, $sequence) = @_;    
    my @positions = ( );
    #Determine positions
    while ($sequence=~/$regexp/ig) {
    push( @positions, pos($sequence) - length($&)+ 1);
    }
    return @positions;
    }
    
    #Sub for parse rebase
    sub parseREBASE {
    my($rebasefile) = @_;
    
    # Declare variables
    my @rebasefile = ( );
    my %rebase_hash = ( );
    my $name;
    my $site;
    my $regexp;
    
    # Read in the REBASE file
    my $rebase_filehandle = open_file('rebase.txt');
    while(<$rebase_filehandle>) {
    # Discard header lines
    (1../Rich Roberts/) and next;
    # Discard blank lines
    /^\s*$/ and next;
    
    chomp;
    $name = $_;
    $_ = <$rebase_filehandle>;
    chomp;
    $site = $_;
    $regexp = IUB_to_regexp($site);
    # Store the data into the hash
    $rebase_hash{$name} = "$site $regexp";
    }
    # Return the hash containing the reformatted REBASE data
    return %rebase_hash;
    }
   

 

 

 

perl • 2.3k views
ADD COMMENTlink modified 4.9 years ago by Dan D6.8k • written 5.1 years ago by Sidney 0
1

A good way my teacher always taught while debugging a program is to use print statement after each and every line of the program.

ADD REPLYlink modified 5.1 years ago • written 5.1 years ago by Prakki Rama2.3k

Thanks I will def implement this in my coding to try and debug the output I am looking for.

ADD REPLYlink written 5.1 years ago by Sidney 0
Please log in to add an answer.

Help
Access

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.
Powered by Biostar version 2.3.0
Traffic: 1691 users visited in the last hour