Hi All,
I am trying to make an overview of the most found organisms in a blast query by using biopython.
How do i get the names of the organisms found in the search?
from Bio.Blast import NCBIXML
result_handle = open("../data/blastOutput/test_2.xml")
blast_records = list(NCBIXML.parse(result_handle))
for item in blast_records:
print("\n", item.match,"\n")
if you'd like to remain in Biopython, you could extract this information from the accession or title of each alignment:
from Bio.Blast import NCBIXML
import collections
result = open("blastoutput.xml")
records = NCBIXML.parse(result)
item = next(records)
organisms = []
def get_organism(title):
"""Given an item title, return the organism as a string.
"""
parts = title.split("|")
words = parts[4].split(" ")
return words[1]
for alignment in item.alignments:
organisms.append(get_organism(alignment.title))
print(collections.Counter(organisms))