Perl program that parses the DNASIS restriction enzyme file
0
0
Entering edit mode
9.7 years ago
Sidney • 0

Code below is what I have so far. output not working at all.

# Declare and initialize variables
my %rebase_hash = ( );
my @file_data = ( );
my $query = '';
my $dna = '';
my $recognition_site = '';
my $regexp = '';
my @locations = ( );

#Read in the file
@file_data=get_file_data($rebasefile);

#Extract sequence data from file being used
$dna=extract_sequence(@file_data);

###Get rebase data into hash from dnasis
%rebase_hash=parseREBASE('rebase.txt');   

#Prompt user for input
do {
    print "Search for what restriction site for (or quit)?: ";
    $query = <STDIN>;
    chomp $query;

    # Exit if empty query
    if ($query =~ /^\s*$/ ) {
        exit;
    }

    # Perform the search in the DNA sequence
    if ( exists $rebase_hash{$query} ) {
        ($recognition_site, $regexp) = split  " ", $rebase_hash{$query};

        # Create the restriction map
        @locations = match_positions($regexp, $dna);

        # Report the restriction map to the user
        if (@locations) {
            print "Searching for $query $recognition_site $regexp\n";
            print "A restriction site for $query at locations:\n";
            print join(" ", @locations), "\n";
        } else {
            print "A restriction enzyme $query is not in the DNA:\n";
        }
    }
    print "\n";
} until ( $query =~ /quit/ );

 print parseREBASE;
exit;

# open_file
#
# - given filename, set filehandle
sub open_file {
    my($filename) = 'rebase.txt';
    $filename=$_;
    my $fh;
    unless(open($fh, $filename)) {
        print "Cannot open file $filename\n";
    }
    return $fh;
}

# A subroutine to get data from a file given its filename
# get_file_data
sub get_file_data {
    my($filename) = @_;

    # Initialize variables
    my @filedata = ( );
    unless( open(GET_FILE_DATA, $filename) ) {
        print STDERR "Cannot open file \"$filename\"\n\n";
        exit;
    }
    @filedata = <GET_FILE_DATA>;
    close GET_FILE_DATA;
    return @filedata;
}

sub extract_sequence {

    my(@fasta_file_data) = @_;
    my $sequence = '';

    foreach my $line (@fasta_file_data) {
        # discard fasta header line
        if($line =~ /^>/) {
            next;
        # keep line, add to sequence string
        } else {
            $sequence .= $line;
        }
    }

    # remove non-sequence data (in this case, whitespace) from $sequence string
    $sequence=~ s/\s//g;

    return $sequence;
}


sub IUB_to_regexp {
    my($iub) = @_;
    my $regular_expression = '';
    my %iub2character_class = (
    A => 'A',
    C => 'C',
    G => 'G',
    T => 'T',
    R => '[GA]',
    Y => '[CT]',
    M => '[AC]',
    K => '[GT]',
    S => '[GC]',
    W => '[AT]',
    B => '[CGT]',
    D => '[AGT]',
    H => '[ACT]',
    V => '[ACG]',
    N => '[ACGT]',
    );
    # Remove the ^ signs from the recognition sites
    $iub =~ s/\^//g;

    # Translate the iub sequence
    for ( my$i=0; $i < length($iub);$i++) {
        $regular_expression .= $iub2character_class{substr($iub,$i,1)};
    }
    return $regular_expression;
}
#Sub to match positions
sub match_positions {
    my($regexp, $sequence) = @_;    
    my @positions = ( );
    #Determine positions
    while ($sequence=~/$regexp/ig) {
        push( @positions, pos($sequence) - length($&)+ 1);
    }
    return @positions;
}

#Sub for parse rebase
sub parseREBASE {
my($rebasefile) = @_;

# Declare variables
my @rebasefile = ( );
my %rebase_hash = ( );
my $name;
my $site;
my $regexp;

# Read in the REBASE file
my $rebase_filehandle = open_file('rebase.txt');
while(<$rebase_filehandle>) {
    # Discard header lines
    (1../Rich Roberts/) and next;
    # Discard blank lines
    /^\s*$/ and next;

    chomp;
    $name = $_;
    $_ = <$rebase_filehandle>;
    chomp;
    $site = $_;
    $regexp = IUB_to_regexp($site);
    # Store the data into the hash
    $rebase_hash{$name} = "$site $regexp";
}
# Return the hash containing the reformatted REBASE data
return %rebase_hash;
}
perl • 3.2k views
ADD COMMENT
1
Entering edit mode

A good way my teacher always taught while debugging a program is to use print statement after each and every line of the program

ADD REPLY
0
Entering edit mode

Thanks I will def implement this in my coding to try and debug the output I am looking for.

ADD REPLY

Login before adding your answer.

Traffic: 1891 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6