I have protein sequences that i split at every 'R' and convert them to peptides. Currently it searches for perfect and non perfect alignments. What i wanted to accomplish is:
1) Rather than searching one by one, is there a way to batch all these peptides and send them?
2) Find perfect match only
3) If i don't find the peptide, i want to log to the output file that the peptide sequence was not found.
This is my code so far:
from Bio import SeqIO
from Bio.Blast import NCBIWWW
from Bio.Blast import NCBIXML
mysplitseqs=[]
minPeptideLength=5
myEntrezQuery = "Homo sapiens[Organism]"
E_VALUE_THRESH = 0.04
with open("outfile.txt","w") as f:
for seq_record in SeqIO.parse("my.fasta", "fasta"):
mysplitseqs= seq_record.seq.split('R')
f.writestrseq_record.id) + "\n")
for peptide in mysplitseqs:
if len(peptide) >minPeptideLength:
result_handle = NCBIWWW.qblast("blastp", "nr", peptide,"-e 10000",entrez_query=myEntrezQuery)
blast_record = NCBIXML.read(result_handle)
for alignment in blast_record.alignments:
for hsp in alignment.hsps:
if hsp.expect < E_VALUE_THRESH:
print '****Alignment****'
print 'sequence:', alignment.title
print 'length:', alignment.length
print 'e value:', hsp.expect
print hsp.query[0:75] + '...'
print hsp.match[0:75] + '...'
print hsp.sbjct[0:75] + '...'
f.write('****Alignment****\n')
f.write("sequence: "+ str(alignment.title) + "\n")
f.write("length: "+ str(alignment.length) + "\n")
f.write("e-value: "+ str(hsp.expect) + "\n")
f.write("Query: "+ str(hsp.query[0:75]) + "\n")
f.write("Match: "+ str(hsp.match[0:75]) + "\n")
f.write("Subject: "+ str(hsp.sbjct[0:75]) + "\n")
Any help is really appreciated.