Entering edit mode
10.3 years ago
hmarajkrishnan
•
0
#count for all amino acids existing in the protein
$count_of_alanine=0;
$count_of_arginine=0;
$count_of_asparagine=0;
$count_of_aspartic_acid=0;
$count_of_cysteine=0;
$count_of_glutamic_acid=0;
$count_of_glutamine=0;
$count_of_glycine=0;
$count_of_histidine=0;
$count_of_isoleucine=0;
$count_of_leucine=0;
$count_of_lysine=0;
$count_of_methionine=0;
$count_of_phenylalanine=0;
$count_of_proline=0;
$count_of_serine=0;
$count_of_threonine=0;
$count_of_tryptophan=0;
$count_of_tyrosine=0;
$count_of_valine=0;
#count for all amino acids which hav z coordinate between -15 and 15
$count_of_alanine2=0;
$count_of_arginine2=0;
$count_of_asparagine2=0;
$count_of_aspartic_acid2=0;
$count_of_cysteine2=0;
$count_of_glutamic_acid2=0;
$count_of_glutamine2=0;
$count_of_glycine2=0;
$count_of_histidine2=0;
$count_of_isoleucine2=0;
$count_of_leucine2=0;
$count_of_lysine2=0;
$count_of_methionine2=0;
$count_of_phenylalanine2=0;
$count_of_proline2=0;
$count_of_serine2=0;
$count_of_threonine2=0;
$count_of_tryptophan2=0;
$count_of_tyrosine2=0;
$count_of_valine2=0;
#count for groups of amino acids
$count_of_charged=0;
$count_of_polar=0;
$count_of_aromatic=0;
$count_of_hydrophobic=0;
$count_of_charged2=0;
$count_of_polar2=0;
$count_of_aromatic2=0;
$count_of_hydrophobic2=0;
$count_of_charged3=0;
$count_of_polar3=0;
$count_of_aromatic3=0;
$count_of_hydrophobic3=0;
$count_of_charged4=0;
$count_of_polar4=0;
$count_of_aromatic4=0;
$count_of_hydrophobic4=0;
$count_of_charged5=0;
$count_of_polar5=0;
$count_of_aromatic5=0;
$count_of_hydrophobic5=0;
$count_of_charged6=0;
$count_of_polar6=0;
$count_of_aromatic6=0;
$count_of_hydrophobic6=0;
# input file query
print "\nEnter the input file: ";
$inputFile = <STDIN>;
chomp $inputFile;
unless (open(INPUTFILE, $inputFile)) {
print "Cannot read from '$inputFile'.\nProgram closing.\n";
<STDIN>;
exit;
}
# load the file into an array
chomp(@dataArray = <INPUTFILE>);
# close the file
close(INPUTFILE);
#####################
# PARSE INPUT FILE! #
#####################
# parse the input file saving only backbone atoms coordinates
# format: [string "ATOM"] [number] [atom] [aa] whateva [3 decimal numbers] whateva with two dots in between
for ($line = 0; $line < scalar @dataArray; $line++) {
if ($dataArray[$line]=~/^HEADER\s+(.*?)$/) {
$header = $1;
}
if ($dataArray[$line]=~/^TITLE\s+(.*?)$/) {
$parsing{$line} = $1;
}
if ($dataArray[$line] =~ m/ATOM\s+(\d+)\s+(\w+)\s+(\w{3})\s+.+\s+(\S+\.\S+)\s+(\S+\.\S+)\s+(\S+\.\S+)\s+.+\..+\..+/ig) {
if (($2 eq "N" || $2 eq "CA" || $2 eq "C") && ($6 >= -15 && $6 <= 15) && (($3 eq "ARG") || ($3 eq "ASP") || ($3 eq "GLU") || ($3 eq "LYS"))) {
$parsedData7{$line} = $1."\t\t".$3."\t\t".$4."\t\t".$5."\t\t".$6;
}
if ($2 eq "N" || $2 eq "CA" || $2 eq "C") {
$parsedData{$line} = $1."\t\t".$3."\t\t".$4."\t\t".$5."\t\t".$6;
if($3 eq "ALA"){
$count_of_alanine++;
}
if($3 eq "ARG"){
$count_of_arginine++;
}
if($3 eq "ASN"){
$count_of_asparagine++;
}
if($3 eq "ASP"){
$count_of_aspartic_acid++;
}
if($3 eq "CYS"){
$count_of_cysteine++;
}
if($3 eq "GLU"){
$count_of_glutamic_acid++;
}
if($3 eq "GLN"){
$count_of_glutamine++;
}
if($3 eq "GLY"){
$count_of_glycine++;
}
if($3 eq "HIS"){
$count_of_histidine++;
}
if($3 eq "ILE"){
$count_of_isoleucine++;
}
if($3 eq "LEU"){
$count_of_leucine++;
}
if($2 eq "LYS"){
$count_of_lysine++;
}
if($3 eq "MET"){
$count_of_methionine++;
}
if($3 eq "PHE"){
$count_of_phenylalanine++;
}
if($3 eq "PRO"){
$count_of_proline++;
}
if($3 eq "SER"){
$count_of_serine++;
}
if($3 eq "THR"){
$count_of_threonine++;
}
if($3 eq "TRP"){
$count_of_tryptophan++;
}
if($3 eq "TYR"){
$count_of_tyrosine++;
}
if($3 eq "VAL"){
$count_of_valine++;
}
if($3 eq "ARG"||$3 eq "ASP"||$3 eq "GLU"||$3 eq "LYS"){
$count_of_charged++;
}
if($3 eq "ASN"||$3 eq "GLN"||$3 eq "GLY"||$3 eq "MET"||$3 eq "PRO"){
$count_of_polar++;
}
if($3 eq "PHE"||$3 eq "TRP"||$3 eq "TYR"||$3 eq "HIS"){
$count_of_aromatic++;
}
if($3 eq "ALA"||$3 eq "ILE"||$3 eq "LEU"||$3 eq "VAL"){
$count_of_hydrophobic++;
}
}
if (($2 eq "N" || $2 eq "CA" || $2 eq "C") && ($6 >= -15 && $6 <= 15)) {
$parsedData2{$line} = $1."\t\t".$3."\t\t".$4."\t\t".$5."\t\t".$6;
if($3 eq "ALA"){
$count_of_alanine2++;
}
if($3 eq "ARG"){
$count_of_arginine2++;
}
if($3 eq "ASN"){
$count_of_asparagine2++;
}
if($3 eq "ASP"){
$count_of_aspartic_acid2++;
}
if($3 eq "CYS"){
$count_of_cysteine2++;
}
if($3 eq "GLU"){
$count_of_glutamic_acid2++;
}
if($3 eq "GLN"){
$count_of_glutamine2++;
}
if($3 eq "GLY"){
$count_of_glycine2++;
}
if($3 eq "HIS"){
$count_of_histidine2++;
}
if($3 eq "ILE"){
$count_of_isoleucine2++;
}
if($3 eq "LEU"){
$count_of_leucine2++;
}
if($3 eq "LYS"){
$count_of_lysine2++;
}
if($3 eq "MET"){
$count_of_methionine2++;
}
if($3 eq "PHE"){
$count_of_phenylalanine2++;
}
if($3 eq "PRO"){
$count_of_proline2++;
}
if($3 eq "SER"){
$count_of_serine2++;
}
if($3 eq "THR"){
$count_of_threonine2++;
}
if($3 eq "TRP"){
$count_of_tryptophan2++;
}
if($3 eq "TYR"){
$count_of_tyrosine2++;
}
if($3 eq "VAL"){
$count_of_valine2++;
}
if($3 eq "ARG"||$3 eq "ASP"||$3 eq "GLU"||$3 eq "LYS"){
$count_of_charged2++;
}
if($3 eq "ASN"||$3 eq "GLN"||$3 eq "GLY"||$3 eq "MET"||$3 eq "PRO"){
$count_of_polar2++;
}
if($3 eq "PHE"||$3 eq "TRP"||$3 eq "TYR"||$3 eq "HIS"){
$count_of_aromatic2++;
}
if($3 eq "ALA"||$3 eq "ILE"||$3 eq "LEU"||$3 eq "VAL"){
$count_of_hydrophobic2++;
}
}
}
}
# create the output file name
$outputFile = "coordinates_".$inputFile;
# open the output file
open (OUTFILE, ">$outputFile");
# print the data lines
print OUTFILE $header, "\n";
foreach $line (sort {$a <=> $b} keys %parsing) {
print OUTFILE $parsing{$line}."\n";
}
print OUTFILE $title, "\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "------------ALL BACKBONE AMINO ACIDS IN THE MEMBRANE PROTEIN-----------\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "\n";
print OUTFILE "Atom Number\tAmino acid\tX coordinate\tY Coordinate\tZ Coordinate\n";
foreach $line (sort {$a <=> $b} keys %parsedData) {
print OUTFILE $parsedData{$line}."\n";
}
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "------------ALL AMINO ACIDS WITH Z COORDINATE > -15 && < 15------------\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "\n";
print OUTFILE "Atom Number\tAmino acid\tX coordinate\tY Coordinate\tZ Coordinate\n";
foreach $line (sort {$a <=> $b} keys %parsedData2) {
print OUTFILE $parsedData2{$line}."\n";
}
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "------------ALL Charged AMINO ACIDS WITH Z COORDINATE > -15 && < 15------------\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "\n";
print OUTFILE "Atom Number\tAmino acid\tX coordinate\tY Coordinate\tZ Coordinate\n";
foreach $line (sort {$a <=> $b} keys %parsedData7) {
print OUTFILE $parsedData7{$line}."\n";
}
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "------------ALL AMINO ACIDS WITH Z COORDINATE > -5 && < 5--------------\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "\n";
print OUTFILE "Atom Number\tAmino acid\tX coordinate\tY Coordinate\tZ Coordinate\n";
foreach $line (sort {$a <=> $b} keys %parsedData5) {
print OUTFILE $parsedData2{$line}."\n";
}
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "------ALL AMINO ACIDS WITH Z COORDINATE > -15 && < -5// >5 && <15------\n";
print OUTFILE "-----------------------------------------------------------------------\n";
print OUTFILE "\n";
print OUTFILE "Atom Number\tAmino acid\tX coordinate\tY Coordinate\tZ Coordinate\n";
foreach $line (sort {$a <=> $b} keys %parsedData6) {
print OUTFILE $parsedData2{$line}."\n";
}
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "------------------------------------------------------------------------\n";
print OUTFILE "-----AMINO ACID NUMBERS AND PERCENTAGE FOR THE BACKBONE AMINO ACIDS-----\n";
print OUTFILE "------------------------------------------------------------------------\n";
print OUTFILE "\n";
print OUTFILE "Amino acid\tTotal Number(N)\n";
print OUTFILE "--------------------------------------------------\n";
print OUTFILE "Alanine\t\t", $count_of_alanine++, "\t\t\n";
print OUTFILE "Arginine\t", $count_of_arginine++, "\t\t\n";
print OUTFILE "Asparagine\t", $count_of_asparagine++, "\t\t\n";
print OUTFILE "Aspartic Acid\t", $count_of_aspartic_acid++, "\t\t\n";
print OUTFILE "Cysteine\t", $count_of_cysteine++, "\t\t\n";
print OUTFILE "Glutamic Acid\t",$count_of_glutamic_acid++, "\t\t\n";
print OUTFILE "Glutamine\t",$count_of_glutamine++, "\t\t\n";
print OUTFILE "Glycine\t\t",$count_of_glycine++, "\t\t\n";
print OUTFILE "Histidine\t",$count_of_histidine++, "\t\t\n";
print OUTFILE "Isoleucine\t",$count_of_isoleucine++, "\t\t\n";
print OUTFILE "Leucine\t\t",$count_of_leucine++, "\t\t\n";
print OUTFILE "Lysine\t\t",$count_of_lysine++, "\t\t\n";
print OUTFILE "Methionine\t",$count_of_methionine++, "\t\t\n";
print OUTFILE "Phenylalanine\t",$count_of_phenylalanine++, "\t\t\n";
print OUTFILE "Proline\t\t",$count_of_proline++, "\t\t\n";
print OUTFILE "Serine\t\t",$count_of_serine++, "\t\t\n";
print OUTFILE "Threonine\t",$count_of_threonine++, "\t\t\n";
print OUTFILE "Tryptophan\t",$count_of_tryptophan++, "\t\t\n";
print OUTFILE "Tyrosine\t",$count_of_tyrosine++, "\t\t\n";
print OUTFILE "Valine\t\t",$count_of_valine++, "\t\t\n";
print OUTFILE "--------------------------------------------------\n";
print OUTFILE "Total\t\t", scalar(keys %parsedData), "\t\t\n";
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "CHARGED AMINO ACIDS\t\t\t",$count_of_charged++,"\n";
print OUTFILE "POLAR AMINO ACIDS\t\t\t",$count_of_polar++,"\n";
print OUTFILE "AROMATIC AMINO ACIDS\t\t\t",$count_of_aromatic++,"\n";
print OUTFILE "HYDROPHOBIC AMINO ACIDS\t\t\t",$count_of_hydrophobic++,"\n";
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "------------------------------------------------------------------------\n";
print OUTFILE "----AMINO ACID NUMBERS AND PERCENTAGE FOR Z COORDINATE > -15 && < 15----\n";
print OUTFILE "------------------------------------------------------------------------\n";
print OUTFILE "\n";
print OUTFILE "Amino acid\tTotal Number(N)\n";
print OUTFILE "--------------------------------------------------\n";
print OUTFILE "Alanine\t\t", $count_of_alanine2++, "\t\t\n";
print OUTFILE "Arginine\t", $count_of_arginine2++, "\t\t\n";
print OUTFILE "Asparagine\t", $count_of_asparagine2++, "\t\t\n";
print OUTFILE "Aspartic Acid\t", $count_of_aspartic_acid2++, "\t\t\n";
print OUTFILE "Cysteine\t", $count_of_cysteine2++, "\t\t\n";
print OUTFILE "Glutamic Acid\t",$count_of_glutamic_acid2++, "\t\t\n";
print OUTFILE "Glutamine\t",$count_of_glutamine2++, "\t\t\n";
print OUTFILE "Glycine\t\t",$count_of_glycine2++, "\t\t\n";
print OUTFILE "Histidine\t",$count_of_histidine2++, "\t\t\n";
print OUTFILE "Isoleucine\t",$count_of_isoleucine2++, "\t\t\n";
print OUTFILE "Leucine\t\t",$count_of_leucine2++, "\t\t\n";
print OUTFILE "Lysine\t\t",$count_of_lysine2++, "\t\t\n";
print OUTFILE "Methionine\t",$count_of_methionine2++, "\t\t\n";
print OUTFILE "Phenylalanine\t",$count_of_phenylalanine2++, "\t\t\n";
print OUTFILE "Proline\t\t",$count_of_proline2++, "\t\t\n";
print OUTFILE "Serine\t\t",$count_of_serine2++, "\t\t\n";
print OUTFILE "Threonine\t",$count_of_threonine2++, "\t\t\n";
print OUTFILE "Tryptophan\t",$count_of_tryptophan2++, "\t\t\n";
print OUTFILE "Tyrosine\t",$count_of_tyrosine2++, "\t\t\n";
print OUTFILE "Valine\t\t",$count_of_valine2++, "\t\t\n";
print OUTFILE "--------------------------------------------------\n";
print OUTFILE "Total\t\t", scalar(keys %parsedData2), "\t\t\n";
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "CHARGED AMINO ACIDS\t\t\t",$count_of_charged2++,"\n";
print OUTFILE "POLAR AMINO ACIDS\t\t\t",$count_of_polar2++,"\n";
print OUTFILE "AROMATIC AMINO ACIDS\t\t\t",$count_of_aromatic2++,"\n";
print OUTFILE "HYDROPHOBIC AMINO ACIDS\t\t\t",$count_of_hydrophobic2++,"\n";
print OUTFILE "\n";
print OUTFILE "\n";
print OUTFILE "CHARGED AMINO ACIDS\t\t\t",$count_of_charged4++,"\n";
print OUTFILE "POLAR AMINO ACIDS\t\t\t",$count_of_polar4++,"\n";
print OUTFILE "AROMATIC AMINO ACIDS\t\t\t",$count_of_aromatic4++,"\n";
print OUTFILE "HYDROPHOBIC AMINO ACIDS\t\t\t",$count_of_hydrophobic4++,"\n";
print OUTFILE "\n";
print OUTFILE "\n";
# close the output file
close (OUTFILE);
# end message
print "The coordinates of '$inputFile' were saved into '$outputFile'.\n";
# end the program
exit;