Entrez loop errors
1
0
Entering edit mode
4.6 years ago

Hey all I am trying to create a data base but I am having some issues. My below code is running but it is only pulling the last enzyme glucose-6-phosphate dehydrogenase into the table (and its subunits, etc.) and I am not sure why. Any help would be appreciated, thanks!

import sqlite3
#creating connection to my.db
conn = sqlite3.connect ('my.db')
c = conn.cursor()

#creation of genes table
c.execute("""CREATE TABLE genes (id INTEGER PRIMARY KEY AUTOINCREMENT, symbol TEXT, name TEXT, description TEXT, n_sequence TEXT)""")
#creation of pathways table
c.execute("""CREATE TABLE pathways (id INT, name TEXT, description TEXT)""")
#creation of enzymes table
c.execute("""CREATE TABLE enzymes (id INT, name TEXT)""")
#creation of association table between genes and enzymes
c.execute("""CREATE TABLE genes_and_enzymes (gene_id INT, enzyme_id INT)""")
#creation of association table between enzymes and pathways
c.execute("""CREATE TABLE enzymes_in_pathways (enzyme_id INT, pathway_id INT)""")



#commitment for new tables
conn.commit()

#next 3 lines are inserting rows into pathways and their description
c.execute("""INSERT INTO pathways (id, name, description) VALUES (1, "glycolysis", "the catalysis of glucose");""")

c.execute("""INSERT INTO pathways (id, name, description) VALUES (2, "TCA", "cyclic pathway");""")

c.execute("""INSERT INTO pathways (id, name, description) VALUES (3, "PPP", "generates NAD(P)H and pentose sugars");""")

#committing pathways insert
conn.commit()
#inserting enzymes into table
enzymes_list = [ (1, "hexokinase"), 
                (2, "phosphoglucose isomerase"),
                (3, "Aldolase"), 
                (4, "pyruvate kinase"), 
                (5, "citrate synthase"), 
                (6, "malate dehydrogenase"), 
                (7, "isocitrate dehydrogenase"), 
                (8, "fumurase"), 
                (9, "transketolase"), 
                (10, "transaldolase"), 
                (11, "gluconolactonase"), 
                (12, "glucose-6-phosphate dehydrogenase")]
for x in enzymes_list:
    c.execute("""INSERT INTO enzymes (id, name) VALUES (?, ?);""", x)
    conn.commit()

c.execute("SELECT * FROM pathways WHERE name = 'glycolysis';")

#printing the fetched row to ensure its presence
print(c.fetchone())

#importing our Bio import
from Bio import Entrez
from Bio import SeqIO
import time
Entrez.email = 'xxxxx@xxx.xx' #Letting NCBI know who I am
coli_enzymes = ["hexokinase", 
                "phosphoglucose isomerase",
                "Aldolase", 
                "pyruvate kinase", 
                "citrate synthase", 
                "malate dehydrogenase", 
                "isocitrate dehydrogenase", 
                "fumurase", 
                "transketolase", 
                "transaldolase", 
                "gluconolactonase", 
                "glucose-6-phosphate dehydrogenase"]

for enzyme in coli_enzymes:
    handle = Entrez.esearch(db = 'nuccore', 
                            term='E. coli [ORGN] ' +enzyme, 
                            sort='relevance', 
                            idtype='symbol')
    time.sleep(3)

for i in Entrez.read(handle)['IdList']:
    handle = Entrez.efetch(db = 'nuccore', id=i, rettype='gb', retmode='text', retmax=1)
    record_coli = SeqIO.read(handle, "genbank")
    seq_coli = record.seq
    c.execute("""INSERT INTO genes (symbol, name, description, n_sequence) VALUES (?, ?, ?, ?);""", 
                (str(i), 
                str(enzyme), 
                str(record_coli.description), 
                str(seq)))
    conn.commit()
    time.sleep(3)



human_enzymes = ["hexokinase", 
                "phosphoglucose isomerase",
                "Aldolase", 
                "pyruvate kinase", 
                "citrate synthase", 
                "malate dehydrogenase", 
                "isocitrate dehydrogenase", 
                "fumurase", 
                "transketolase", 
                "transaldolase", 
                "gluconolactonase", 
                "glucose-6-phosphate dehydrogenase"]

for enz in human_enzymes:
    handle = Entrez.esearch(db = 'nuccore', 
                            term='homo sapiens [ORGN] ' +enz, 
                            sort='relevance', 
                            idtype='symbol')
    time.sleep(3)
    print(['IdList'])

for d in Entrez.read(handle)['IdList']:
    handle = Entrez.efetch(db = 'nuccore', id=d, rettype='gb', retmode='text', retmax=1)
    record_human = SeqIO.read(handle, "genbank")
    seq_human = record.seq
    c.execute("""INSERT INTO genes (symbol, name, description, n_sequence) VALUES (?, ?, ?, ?);""", 
                (str(d), 
                str(enz), 
                str(record_human.description), 
                str(seq_human)))
    conn.commit()
    time.sleep(3)

drosophila_enzymes = ["hexokinase", 
                "phosphoglucose isomerase",
                "Aldolase", 
                "pyruvate kinase", 
                "citrate synthase", 
                "malate dehydrogenase", 
                "isocitrate dehydrogenase", 
                "fumurase", 
                "transketolase", 
                "transaldolase", 
                "gluconolactonase", 
                "glucose-6-phosphate dehydrogenase"]

for y in drosophila_enzymes:
    handle = Entrez.esearch(db = 'nuccore', 
                            term='drosophila melanogaster [ORGN] ' +y, 
                            sort='relevance', 
                            idtype='symbol')
    time.sleep(3)

for z in Entrez.read(handle)['IdList']:
    handle = Entrez.efetch(db = 'nuccore', id=z, rettype='gb', retmode='text', retmax=1)
    record_fly = SeqIO.read(handle, "genbank")
    seq_fly = record.seq
    c.execute("""INSERT INTO genes (symbol, name, description, n_sequence) VALUES (?, ?, ?, ?);""", 
                (str(d), 
                str(y), 
                str(record_fly.description), 
                str(seq_fly)))
    conn.commit()
    time.sleep(3)
SQLite3 Entrez Biopython • 884 views
ADD COMMENT
2
Entering edit mode
4.6 years ago
for enzyme in coli_enzymes:
    handle = Entrez.esearch(db = 'nuccore', 
                            term='E. coli [ORGN] ' +enzyme, 
                            sort='relevance', 
                            idtype='symbol')
    time.sleep(3)

for i in Entrez.read(handle)['IdList']:
    handle = Entrez.efetch(db = 'nuccore', id=i, rettype='gb', retmode='text', retmax=1)
    record_coli = SeqIO.read(handle, "genbank")
    seq_coli = record.seq
    c.execute("""INSERT INTO genes (symbol, name, description, n_sequence) VALUES (?, ?, ?, ?);""", 
                (str(i), 
                str(enzyme), 
                str(record_coli.description), 
                str(seq)))
    conn.commit()
    time.sleep(3)

You are first iterate over coli_enzymes and overwrite the value of handle each time. After finishing the loop you continue with the code. I think the next loop must be within the first loop and not outside. Same problem appears several times in your code.

ADD COMMENT

Login before adding your answer.

Traffic: 2918 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6