Question

Problem in generating interactions graphs through python script

0

Entering edit mode

2.1 years ago

anasjamshed ▴ 140

I have 16000 genes in text file and i want to make ppi graph through python by utilizing a string database. It works fine with a few hundred genes but when I try to make a graph of 2000 genes,it giving error.

My code:

### The required libraries and packages ###
import networkx as nx
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm

# List of genes to search for
list1= open("genes.txt").read()
# split line by "," into list of strings
geneList = list1.rstrip().split("\n")
#Convert genes into proteins
proteins = '%0d'.join(geneList)
#Define the URL
url = 'https://string-db.org/api/tsv/network?identifiers=' + proteins + '&species=9606'
r = requests.get(url)
lines = r.text.split('\n') # pull the text from the response object and split based on new lines
data = [l.split('\t') for l in lines] # split each line into its components based on tabs
# convert to dataframe using the first row as the column names; drop empty, final row
df = pd.DataFrame(data[1:-1], columns = data[0]) 
#print(df)

# dataframe with the preferred names of the two proteins and the score of the interaction
interactions = df[['preferredName_A', 'preferredName_B', 'score']] 

print(interactions)

G=nx.Graph(name='Gene Interaction Graph')
interactions = np.array(interactions)
for i in range(len(interactions)):
    interaction = interactions[i]
    a = interaction[0] # protein a node
    b = interaction[1] # protein b node
    w = float(interaction[2]) # score as weighted edge where high scores = low weight
    G.add_weighted_edges_from([(a,b,w)]) # add weighted edge to graph

pos = nx.spring_layout(G) # position the nodes using the spring layout
plt.figure(figsize=(18,18),facecolor=[0.9,0.7,0.7,0.5])
nx.draw_networkx(G)
plt.axis('off')
plt.show()

Error:

KeyError                                  Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_14700/807706410.py in <module>
     23 
     24 # dataframe with the preferred names of the two proteins and the score of the interaction
---> 25 interactions = df[['preferredName_A', 'preferredName_B', 'score']]
     26 
     27 print(interactions)

~\anaconda3\lib\site-packages\pandas\core\frame.py in __getitem__(self, key)
   3509             if is_iterator(key):
   3510                 key = list(key)
-> 3511             indexer = self.columns._get_indexer_strict(key, "columns")[1]
   3512 
   3513         # take() does not accept boolean indexers

~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in _get_indexer_strict(self, key, axis_name)
   5780             keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
   5781 
-> 5782         self._raise_if_missing(keyarr, indexer, axis_name)
   5783 
   5784         keyarr = self.take(indexer)

~\anaconda3\lib\site-packages\pandas\core\indexes\base.py in _raise_if_missing(self, key, indexer, axis_name)
   5840                 if use_interval_msg:
   5841                     key = list(key)
-> 5842                 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
   5843 
   5844             not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())

KeyError: "None of [Index(['preferredName_A', 'preferredName_B', 'score'], dtype='object')] are in the [columns]"

Can anyone help me to solve this?

String Python • 556 views

ADD COMMENT • link updated 2.0 years ago by Ram 44k • written 2.1 years ago by anasjamshed ▴ 140