Off topic:Python function to superimpose protein on each other
0
0
Entering edit mode
10 weeks ago

I have made the function called sup to superimpose 2 protein-like PDB superimpose class :

from NumPy import *
from NumPy.linalg import svd, det

# Test data
# Coordinate vectors along with columns

a=matrix([[ 18.92238689,  9.18841188,  8.70764463,  9.38130981, 8.53057997],
[ 1.12391951,  0.8707568 ,  1.01214183,  0.59383894, 0.65155349],
[ 0.46106398,  0.62858099, -0.02625641,  0.35264203, 0.53670857]], 'f')

b=matrix([[ 1.68739355,  1.38774297,  2.1959675 , 1.51248281,  1.70793414],
[ 8.99726755,  8.73213223,  8.86804272, 8.31722197,  8.9924607 ],
[ 1.1668153 ,  1.1135669 ,  1.02279055, 1.06534992,  0.54881902]], 'f')

# Code

def center(m):
# Returns centered m
# Calculate center of mass of x
center_of_mass_m=m.sum(1)/m.shape
# Center m
centered_m=m-center_of_mass_m
return centered_m

def sup(x, y):
# Nr of atoms
N=x.shape

########################

# Center x and y
x=center(x)
y=center(y)

# correlation matrix
r=y*x.T

# SVD of correlation matrix
v, s, wt=svd(r)

w=wt.T
vt=v.T

# Rotation matrix
u=w*vt

# Check for roto-reflection
if det(u)<0:
z=diag((1,1,-1))
u=w*z*vt
s=-s

# Calculate RMSD
e0=sum(x.A*x.A+y.A*y.A)
print("E0 ", e0)
rmsd=sqrt((e0-2*sum(s))/N)

print('RMSD (svd) ', rmsd)

print("u estimated ")
print(u)

# Calculate RMSD from the coordinates
d=x-u*y
d=d.A*d.A
rmsd=sqrt(sum(d)/N)
print('RMSD (real) ', rmsd)

if __name__=="__main__":

# Run some stuff

# 1. Test data
sup(a,b)

# 2. Protein

# Parse PDB file
from Bio.PDB import *
p=PDBParser()
s=p.get_structure("XXX", "2NIC.pdb")

# Chain A in model 0
chain1=s["A"]
# Chain A in model 1
chain2=s["A"]

def get_coordinates(chain):
# Get the CA coordinates of a cahin and return 3xn matrix
coords=[]
for res in chain:
try:
# Extract CA coordinate
a=res["CA"]
c=a.get_coord()
coords.append(c)
except:
# No CA atom - skip
pass
# Turn coordinate list in 3xn numpy matrix
coords=matrix(coords) # nx3
coords=coords.T       # 3xn
return coords

# Get the 3xn coordinate matrices for the chains
a=get_coordinates(chain1)
b=get_coordinates(chain2)

# Superimpose and done
sup(a,b)


Now I need to make another by utilizing this sup function to download 2 PDB files from the PDB database and then superimposes the 2nd to the first one based on the backbone atoms(N, C alpha, C,0) of the amino acids specified the list of residue numbers. The function should save the rotated and translated structure in a separate PDB file

I am trying this script :

import numpy as np
import Bio
from Bio.PDB import *

# Select what residues numbers you wish to align # and put them in a list
start_id = 1
end_id   = 70
atoms_to_be_aligned = range(start_id, end_id + 1)

def superimpose():
pdb_code_1= "1d3z"
pdb_filename_1 = "%s.pdb" % pdb_code_1
pdb_out_filename = "%s_aligned.pdb" % pdb_code_1
pdb_code_2= "1ubq"
pdb_filename_2 = "%s.pdb" % pdb_code_2
# Start the parser
pdb_parser = Bio.PDB.PDBParser(QUIET = True)
# Get the structures
ref_structure = pdb_parser.get_structure("reference", pdb_filename_1)
sample_structure = pdb_parser.get_structure("sample", pdb_filename_2)
# Use the first model in the pdb-files for alignment
# Change the number 0 if you want to align to another structure
ref_model    = ref_structure
sample_model = sample_structure
# Make a list of the atoms (in the structures) you wish to align.
# In this case we use CA atoms whose index is in the specified range
list_res_1 = []
list_res_2 = []
ref_atoms = []
sample_atoms = []
# Iterate of all chains in the model in order to find all residues
for ref_chain in ref_model:
# Iterate of all residues in each model in order to find proper atoms
for ref_res in ref_chain:
# Check if residue number ( .get_id() ) is in the list
if ref_res.get_id() in atoms_to_be_aligned:
# Append CA atom to list
ref_atoms.append(ref_res['O'])
# Do the same for the sample structure
for sample_chain in sample_model:
for sample_res in sample_chain:
if sample_res.get_id() in atoms_to_be_aligned:
sample_atoms.append(sample_res['O'])
# Now we initiate the superimposer:
#super_imposer = Bio.PDB.Superimposer()
sup(ref_atoms, sample_atoms)
sup(sample_model.get_atoms())
# Print RMSD:
print (list_res_1,list_res2)

superimpose()


But it is giving me an error :

AttributeError                            Traceback (most recent call last)
<ipython-input-30-58f50b4768f7> in <module>()
49                             print (list_res_1,list_res2)
50
---> 51 superimpose()

<ipython-input-30-58f50b4768f7> in superimpose()
44                             # Now we initiate the superimposer:
45                             #super_imposer = Bio.PDB.Superimposer()
---> 46                             sup(ref_atoms, sample_atoms)
47                             sup(sample_model.get_atoms())
48                             # Print RMSD:

<ipython-input-11-e7d76316a1c0> in sup(x, y)
27 def sup(x, y):
28     # Nr of atoms
---> 29     N=x.shape
30
31     ########################

AttributeError: 'list' object has no attribute 'shape'


Can anyone help me plz?

PDB Protein • 155 views
ADD COMMENT
This thread is not open. No new answers may be added
Traffic: 2236 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.