Problem in generating interactions graphs through python script
Entering edit mode
20 months ago
anasjamshed ▴ 120

I have 16000 genes in text file and i want to make ppi graph through python by utilizing a string database. It works fine with a few hundred genes but when I try to make a graph of 2000 genes,it giving error.

My code:

### The required libraries and packages ###
import networkx as nx
import requests
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import cm

# List of genes to search for
list1= open("genes.txt").read()
# split line by "," into list of strings
geneList = list1.rstrip().split("\n")
#Convert genes into proteins
proteins = '%0d'.join(geneList)
#Define the URL
url = '' + proteins + '&species=9606'
r = requests.get(url)
lines = r.text.split('\n') # pull the text from the response object and split based on new lines
data = [l.split('\t') for l in lines] # split each line into its components based on tabs
# convert to dataframe using the first row as the column names; drop empty, final row
df = pd.DataFrame(data[1:-1], columns = data[0]) 

# dataframe with the preferred names of the two proteins and the score of the interaction
interactions = df[['preferredName_A', 'preferredName_B', 'score']] 


G=nx.Graph(name='Gene Interaction Graph')
interactions = np.array(interactions)
for i in range(len(interactions)):
    interaction = interactions[i]
    a = interaction[0] # protein a node
    b = interaction[1] # protein b node
    w = float(interaction[2]) # score as weighted edge where high scores = low weight
    G.add_weighted_edges_from([(a,b,w)]) # add weighted edge to graph

pos = nx.spring_layout(G) # position the nodes using the spring layout


KeyError                                  Traceback (most recent call last)
~\AppData\Local\Temp/ipykernel_14700/ in <module>
     24 # dataframe with the preferred names of the two proteins and the score of the interaction
---> 25 interactions = df[['preferredName_A', 'preferredName_B', 'score']]
     27 print(interactions)

~\anaconda3\lib\site-packages\pandas\core\ in __getitem__(self, key)
   3509             if is_iterator(key):
   3510                 key = list(key)
-> 3511             indexer = self.columns._get_indexer_strict(key, "columns")[1]
   3513         # take() does not accept boolean indexers

~\anaconda3\lib\site-packages\pandas\core\indexes\ in _get_indexer_strict(self, key, axis_name)
   5780             keyarr, indexer, new_indexer = self._reindex_non_unique(keyarr)
-> 5782         self._raise_if_missing(keyarr, indexer, axis_name)
   5784         keyarr = self.take(indexer)

~\anaconda3\lib\site-packages\pandas\core\indexes\ in _raise_if_missing(self, key, indexer, axis_name)
   5840                 if use_interval_msg:
   5841                     key = list(key)
-> 5842                 raise KeyError(f"None of [{key}] are in the [{axis_name}]")
   5844             not_found = list(ensure_index(key)[missing_mask.nonzero()[0]].unique())

KeyError: "None of [Index(['preferredName_A', 'preferredName_B', 'score'], dtype='object')] are in the [columns]"

Can anyone help me to solve this?

String Python • 471 views

Login before adding your answer.

Traffic: 2170 users visited in the last hour
Help About
Access RSS

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6