Off topic:Parsing Pdb File Using Perl Script 2.
0
0
Entering edit mode
10.3 years ago
 #count for all amino acids existing in the protein
  $count_of_alanine=0;
 $count_of_arginine=0;
  $count_of_asparagine=0;
  $count_of_aspartic_acid=0;
   $count_of_cysteine=0;
 $count_of_glutamic_acid=0;
   $count_of_glutamine=0;
   $count_of_glycine=0;
   $count_of_histidine=0;
  $count_of_isoleucine=0;
  $count_of_leucine=0;
  $count_of_lysine=0;
   $count_of_methionine=0;
   $count_of_phenylalanine=0;
   $count_of_proline=0;
  $count_of_serine=0;
  $count_of_threonine=0;
   $count_of_tryptophan=0;
  $count_of_tyrosine=0;
   $count_of_valine=0;
  #count for all amino acids which hav z coordinate between -15 and 15
  $count_of_alanine2=0;
   $count_of_arginine2=0;
   $count_of_asparagine2=0;
  $count_of_aspartic_acid2=0;
  $count_of_cysteine2=0;
  $count_of_glutamic_acid2=0;
  $count_of_glutamine2=0;
   $count_of_glycine2=0;
   $count_of_histidine2=0;
  $count_of_isoleucine2=0;
 $count_of_leucine2=0;
    $count_of_lysine2=0;
 $count_of_methionine2=0;
 $count_of_phenylalanine2=0;
 $count_of_proline2=0;
   $count_of_serine2=0;
 $count_of_threonine2=0;
  $count_of_tryptophan2=0;
  $count_of_tyrosine2=0;
  $count_of_valine2=0;
 #count for groups of amino acids
  $count_of_charged=0;
   $count_of_polar=0;
  $count_of_aromatic=0;
   $count_of_hydrophobic=0;
 $count_of_charged2=0;
  $count_of_polar2=0;
  $count_of_aromatic2=0;
   $count_of_hydrophobic2=0;
   $count_of_charged3=0;
   $count_of_polar3=0;
   $count_of_aromatic3=0;
  $count_of_hydrophobic3=0;
   $count_of_charged4=0;
    $count_of_polar4=0;
   $count_of_aromatic4=0;
   $count_of_hydrophobic4=0;
   $count_of_charged5=0;
    $count_of_polar5=0;
   $count_of_aromatic5=0;
   $count_of_hydrophobic5=0;
   $count_of_charged6=0;
   $count_of_polar6=0;
   $count_of_aromatic6=0;
   $count_of_hydrophobic6=0;


 # input file query
 print "\nEnter the input file: ";
 $inputFile = <STDIN>;
 chomp $inputFile;

unless (open(INPUTFILE, $inputFile)) {
print "Cannot read from '$inputFile'.\nProgram closing.\n";
<STDIN>;
exit;
}

# load the file into an array
chomp(@dataArray = <INPUTFILE>);

# close the file
close(INPUTFILE);


#####################
# PARSE INPUT FILE! #
#####################

# parse the input file saving only backbone atoms coordinates
# format: [string "ATOM"] [number] [atom] [aa] whateva [3 decimal numbers] whateva with two dots in between
for ($line = 0; $line < scalar @dataArray; $line++) {
if ($dataArray[$line]=~/^HEADER\s+(.*?)$/) {
$header = $1;    
     }
if ($dataArray[$line]=~/^TITLE\s+(.*?)$/) {
$parsing{$line} = $1;
     }
if ($dataArray[$line] =~ m/ATOM\s+(\d+)\s+(\w+)\s+(\w{3})\s+.+\s+(\S+\.\S+)\s+(\S+\.\S+)\s+(\S+\.\S+)\s+.+\..+\..+/ig) {


    if (($2 eq "N" || $2 eq "CA" || $2 eq "C") && ($6 >= -15 && $6 <= 15) && (($3 eq "ARG") || ($3 eq "ASP") || ($3 eq "GLU") || ($3 eq "LYS"))) {
        $parsedData7{$line} = $1."\t\t".$3."\t\t".$4."\t\t".$5."\t\t".$6;
}

    if ($2 eq "N" || $2 eq "CA" || $2 eq "C") {
        $parsedData{$line} = $1."\t\t".$3."\t\t".$4."\t\t".$5."\t\t".$6;

if($3 eq "ALA"){
$count_of_alanine++;
}
if($3 eq "ARG"){
$count_of_arginine++;
}
if($3 eq "ASN"){
$count_of_asparagine++;
}
if($3 eq "ASP"){
$count_of_aspartic_acid++;
}
if($3 eq "CYS"){
$count_of_cysteine++;
}
if($3 eq "GLU"){
$count_of_glutamic_acid++;
}
if($3 eq "GLN"){
$count_of_glutamine++;
}
if($3 eq "GLY"){
$count_of_glycine++;
}
if($3 eq "HIS"){
$count_of_histidine++;
}
if($3 eq "ILE"){
$count_of_isoleucine++;
}
if($3 eq "LEU"){
$count_of_leucine++;
}
if($2 eq "LYS"){
$count_of_lysine++;
}
if($3 eq "MET"){
$count_of_methionine++;
}
if($3 eq "PHE"){
$count_of_phenylalanine++;
}
if($3 eq "PRO"){
$count_of_proline++;
}
if($3 eq "SER"){
$count_of_serine++;
}
if($3 eq "THR"){
$count_of_threonine++;
}
if($3 eq "TRP"){
$count_of_tryptophan++;
}
if($3 eq "TYR"){
$count_of_tyrosine++;
}
if($3 eq "VAL"){
$count_of_valine++;
}
if($3 eq "ARG"||$3 eq "ASP"||$3 eq "GLU"||$3 eq "LYS"){
$count_of_charged++;
}
if($3 eq "ASN"||$3 eq "GLN"||$3 eq "GLY"||$3 eq "MET"||$3 eq "PRO"){
$count_of_polar++;
}
if($3 eq "PHE"||$3 eq "TRP"||$3 eq "TYR"||$3 eq "HIS"){
$count_of_aromatic++;
}
if($3 eq "ALA"||$3 eq "ILE"||$3 eq "LEU"||$3 eq "VAL"){
$count_of_hydrophobic++;
}
}
 if (($2 eq "N" || $2 eq "CA" || $2 eq "C") && ($6 >= -15 && $6 <= 15)) {
        $parsedData2{$line} = $1."\t\t".$3."\t\t".$4."\t\t".$5."\t\t".$6;

if($3 eq "ALA"){
$count_of_alanine2++;
}
if($3 eq "ARG"){
$count_of_arginine2++;
}
if($3 eq "ASN"){
$count_of_asparagine2++;
}
if($3 eq "ASP"){
$count_of_aspartic_acid2++;
}
if($3 eq "CYS"){
$count_of_cysteine2++;
}
if($3 eq "GLU"){
$count_of_glutamic_acid2++;
}
if($3 eq "GLN"){
$count_of_glutamine2++;
}
if($3 eq "GLY"){
$count_of_glycine2++;
}
if($3 eq "HIS"){
$count_of_histidine2++;
}
if($3 eq "ILE"){
$count_of_isoleucine2++;
}
if($3 eq "LEU"){
$count_of_leucine2++;
}
if($3 eq "LYS"){
$count_of_lysine2++;
}
if($3 eq "MET"){
$count_of_methionine2++;
}
if($3 eq "PHE"){
$count_of_phenylalanine2++;
}
if($3 eq "PRO"){
$count_of_proline2++;
}
if($3 eq "SER"){
$count_of_serine2++;
}
if($3 eq "THR"){
$count_of_threonine2++;
}
if($3 eq "TRP"){
$count_of_tryptophan2++;
}
if($3 eq "TYR"){
$count_of_tyrosine2++;
}
if($3 eq "VAL"){
$count_of_valine2++;
}
if($3 eq "ARG"||$3 eq "ASP"||$3 eq "GLU"||$3 eq "LYS"){
$count_of_charged2++;
}
if($3 eq "ASN"||$3 eq "GLN"||$3 eq "GLY"||$3 eq "MET"||$3 eq "PRO"){
$count_of_polar2++;
}
if($3 eq "PHE"||$3 eq "TRP"||$3 eq "TYR"||$3 eq "HIS"){
$count_of_aromatic2++;
}
if($3 eq "ALA"||$3 eq "ILE"||$3 eq "LEU"||$3 eq "VAL"){
$count_of_hydrophobic2++;
}
}

  }
  }


     # create the output file name
    $outputFile = "coordinates_".$inputFile;

  # open the output file
  open (OUTFILE, ">$outputFile");


# print the data lines
print OUTFILE $header, "\n";
foreach $line (sort {$a <=> $b} keys %parsing) {
print OUTFILE $parsing{$line}."\n";
}
print OUTFILE $title, "\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "------------ALL BACKBONE AMINO ACIDS IN THE MEMBRANE PROTEIN-----------\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "\n";        
print OUTFILE "Atom Number\tAmino acid\tX coordinate\tY Coordinate\tZ Coordinate\n";
 foreach $line (sort {$a <=> $b} keys %parsedData) {
print OUTFILE $parsedData{$line}."\n";
}
 print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "------------ALL AMINO ACIDS WITH Z COORDINATE > -15 && < 15------------\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "\n";    
print OUTFILE "Atom Number\tAmino acid\tX coordinate\tY Coordinate\tZ Coordinate\n";
foreach $line (sort {$a <=> $b} keys %parsedData2) {
print OUTFILE $parsedData2{$line}."\n";
}   
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "------------ALL Charged AMINO ACIDS WITH Z COORDINATE > -15 && < 15------------\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "\n";    
print OUTFILE "Atom Number\tAmino acid\tX coordinate\tY Coordinate\tZ Coordinate\n";
foreach $line (sort {$a <=> $b} keys %parsedData7) {
print OUTFILE $parsedData7{$line}."\n";
}   
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "------------ALL AMINO ACIDS WITH Z COORDINATE > -5 && < 5--------------\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "\n";    
print OUTFILE "Atom Number\tAmino acid\tX coordinate\tY Coordinate\tZ Coordinate\n";
foreach $line (sort {$a <=> $b} keys %parsedData5) {
print OUTFILE $parsedData2{$line}."\n";
}
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "------ALL AMINO ACIDS WITH Z COORDINATE > -15 && < -5// >5 && <15------\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "\n";    
print OUTFILE "Atom Number\tAmino acid\tX coordinate\tY Coordinate\tZ Coordinate\n";
foreach $line (sort {$a <=> $b} keys %parsedData6) {
print OUTFILE $parsedData2{$line}."\n";
}
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "------------------------------------------------------------------------\n";
print OUTFILE "-----AMINO ACID NUMBERS AND PERCENTAGE FOR THE BACKBONE AMINO ACIDS-----\n";
print OUTFILE "------------------------------------------------------------------------\n";
print OUTFILE "\n";    
print OUTFILE "Amino acid\tTotal Number(N)\n";
print OUTFILE "--------------------------------------------------\n";
print OUTFILE "Alanine\t\t", $count_of_alanine++, "\t\t\n";
print OUTFILE "Arginine\t", $count_of_arginine++, "\t\t\n";
print OUTFILE "Asparagine\t", $count_of_asparagine++, "\t\t\n";
print OUTFILE "Aspartic Acid\t", $count_of_aspartic_acid++, "\t\t\n";
print OUTFILE "Cysteine\t", $count_of_cysteine++, "\t\t\n";
print OUTFILE "Glutamic Acid\t",$count_of_glutamic_acid++, "\t\t\n";
print OUTFILE "Glutamine\t",$count_of_glutamine++, "\t\t\n";
print OUTFILE "Glycine\t\t",$count_of_glycine++, "\t\t\n";
print OUTFILE "Histidine\t",$count_of_histidine++, "\t\t\n";
print OUTFILE "Isoleucine\t",$count_of_isoleucine++, "\t\t\n";
print OUTFILE "Leucine\t\t",$count_of_leucine++, "\t\t\n";
print OUTFILE "Lysine\t\t",$count_of_lysine++, "\t\t\n";
print OUTFILE "Methionine\t",$count_of_methionine++, "\t\t\n";
print OUTFILE "Phenylalanine\t",$count_of_phenylalanine++, "\t\t\n";
print OUTFILE "Proline\t\t",$count_of_proline++, "\t\t\n";
print OUTFILE "Serine\t\t",$count_of_serine++, "\t\t\n";
print OUTFILE "Threonine\t",$count_of_threonine++, "\t\t\n";
print OUTFILE "Tryptophan\t",$count_of_tryptophan++, "\t\t\n";
print OUTFILE "Tyrosine\t",$count_of_tyrosine++, "\t\t\n";
print OUTFILE "Valine\t\t",$count_of_valine++, "\t\t\n";
print OUTFILE "--------------------------------------------------\n";
print OUTFILE "Total\t\t", scalar(keys %parsedData), "\t\t\n";
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "CHARGED AMINO ACIDS\t\t\t",$count_of_charged++,"\n";
print OUTFILE "POLAR AMINO ACIDS\t\t\t",$count_of_polar++,"\n";
print OUTFILE "AROMATIC AMINO ACIDS\t\t\t",$count_of_aromatic++,"\n";
print OUTFILE "HYDROPHOBIC AMINO ACIDS\t\t\t",$count_of_hydrophobic++,"\n";
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "------------------------------------------------------------------------\n";
print OUTFILE "----AMINO ACID NUMBERS AND PERCENTAGE FOR Z COORDINATE > -15 && < 15----\n";
print OUTFILE "------------------------------------------------------------------------\n";
print OUTFILE "\n";    
print OUTFILE "Amino acid\tTotal Number(N)\n";
print OUTFILE "--------------------------------------------------\n";
print OUTFILE "Alanine\t\t", $count_of_alanine2++, "\t\t\n";
print OUTFILE "Arginine\t", $count_of_arginine2++, "\t\t\n";
print OUTFILE "Asparagine\t", $count_of_asparagine2++, "\t\t\n";
print OUTFILE "Aspartic Acid\t", $count_of_aspartic_acid2++, "\t\t\n";
print OUTFILE "Cysteine\t", $count_of_cysteine2++, "\t\t\n";
print OUTFILE "Glutamic Acid\t",$count_of_glutamic_acid2++, "\t\t\n";
print OUTFILE "Glutamine\t",$count_of_glutamine2++, "\t\t\n";
print OUTFILE "Glycine\t\t",$count_of_glycine2++, "\t\t\n";
print OUTFILE "Histidine\t",$count_of_histidine2++, "\t\t\n";
print OUTFILE "Isoleucine\t",$count_of_isoleucine2++, "\t\t\n";
print OUTFILE "Leucine\t\t",$count_of_leucine2++, "\t\t\n";
print OUTFILE "Lysine\t\t",$count_of_lysine2++, "\t\t\n";
print OUTFILE "Methionine\t",$count_of_methionine2++, "\t\t\n";
print OUTFILE "Phenylalanine\t",$count_of_phenylalanine2++, "\t\t\n";
print OUTFILE "Proline\t\t",$count_of_proline2++, "\t\t\n";
print OUTFILE "Serine\t\t",$count_of_serine2++, "\t\t\n";
print OUTFILE "Threonine\t",$count_of_threonine2++, "\t\t\n";
print OUTFILE "Tryptophan\t",$count_of_tryptophan2++, "\t\t\n";
print OUTFILE "Tyrosine\t",$count_of_tyrosine2++, "\t\t\n";
print OUTFILE "Valine\t\t",$count_of_valine2++, "\t\t\n";
print OUTFILE "--------------------------------------------------\n";
print OUTFILE "Total\t\t", scalar(keys %parsedData2), "\t\t\n";
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "CHARGED AMINO ACIDS\t\t\t",$count_of_charged2++,"\n";
print OUTFILE "POLAR AMINO ACIDS\t\t\t",$count_of_polar2++,"\n";
print OUTFILE "AROMATIC AMINO ACIDS\t\t\t",$count_of_aromatic2++,"\n";
print OUTFILE "HYDROPHOBIC AMINO ACIDS\t\t\t",$count_of_hydrophobic2++,"\n";
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "CHARGED AMINO ACIDS\t\t\t",$count_of_charged4++,"\n";
print OUTFILE "POLAR AMINO ACIDS\t\t\t",$count_of_polar4++,"\n";
print OUTFILE "AROMATIC AMINO ACIDS\t\t\t",$count_of_aromatic4++,"\n";
print OUTFILE "HYDROPHOBIC AMINO ACIDS\t\t\t",$count_of_hydrophobic4++,"\n";
print OUTFILE "\n";
print OUTFILE "\n";



# close the output file
close (OUTFILE);

# end message
print "The coordinates of '$inputFile' were saved into '$outputFile'.\n";

# end the program
exit;
pdb perl script • 2.6k views
ADD COMMENT
This thread is not open. No new answers may be added
Traffic: 2008 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6