Entering edit mode
3.2 years ago
anasjamshed
▴
120
I have made the function called sup to superimpose 2 protein-like PDB superimpose class :
from NumPy import *
from NumPy.linalg import svd, det
# Test data
# Coordinate vectors along with columns
a=matrix([[ 18.92238689, 9.18841188, 8.70764463, 9.38130981, 8.53057997],
[ 1.12391951, 0.8707568 , 1.01214183, 0.59383894, 0.65155349],
[ 0.46106398, 0.62858099, -0.02625641, 0.35264203, 0.53670857]], 'f')
b=matrix([[ 1.68739355, 1.38774297, 2.1959675 , 1.51248281, 1.70793414],
[ 8.99726755, 8.73213223, 8.86804272, 8.31722197, 8.9924607 ],
[ 1.1668153 , 1.1135669 , 1.02279055, 1.06534992, 0.54881902]], 'f')
# Code
def center(m):
# Returns centered m
# Calculate center of mass of x
center_of_mass_m=m.sum(1)/m.shape[1]
# Center m
centered_m=m-center_of_mass_m
return centered_m
def sup(x, y):
# Nr of atoms
N=x.shape[1]
########################
# Center x and y
x=center(x)
y=center(y)
# correlation matrix
r=y*x.T
# SVD of correlation matrix
v, s, wt=svd(r)
w=wt.T
vt=v.T
# Rotation matrix
u=w*vt
# Check for roto-reflection
if det(u)<0:
z=diag((1,1,-1))
u=w*z*vt
s[2]=-s[2]
# Calculate RMSD
e0=sum(x.A*x.A+y.A*y.A)
print("E0 ", e0)
rmsd=sqrt((e0-2*sum(s))/N)
print('RMSD (svd) ', rmsd)
print("u estimated ")
print(u)
# Calculate RMSD from the coordinates
d=x-u*y
d=d.A*d.A
rmsd=sqrt(sum(d)/N)
print('RMSD (real) ', rmsd)
if __name__=="__main__":
# Run some stuff
# 1. Test data
sup(a,b)
# 2. Protein
# Parse PDB file
from Bio.PDB import *
p=PDBParser()
s=p.get_structure("XXX", "2NIC.pdb")
# Chain A in model 0
chain1=s[0]["A"]
# Chain A in model 1
chain2=s[0]["A"]
def get_coordinates(chain):
# Get the CA coordinates of a cahin and return 3xn matrix
coords=[]
for res in chain:
try:
# Extract CA coordinate
a=res["CA"]
c=a.get_coord()
coords.append(c)
except:
# No CA atom - skip
pass
# Turn coordinate list in 3xn numpy matrix
coords=matrix(coords) # nx3
coords=coords.T # 3xn
return coords
# Get the 3xn coordinate matrices for the chains
a=get_coordinates(chain1)
b=get_coordinates(chain2)
# Superimpose and done
sup(a,b)
Now I need to make another by utilizing this sup function to download 2 PDB files from the PDB database and then superimposes the 2nd to the first one based on the backbone atoms(N, C alpha, C,0) of the amino acids specified the list of residue numbers. The function should save the rotated and translated structure in a separate PDB file
I am trying this script :
import numpy as np
import Bio
from Bio.PDB import *
# Select what residues numbers you wish to align # and put them in a list
start_id = 1
end_id = 70
atoms_to_be_aligned = range(start_id, end_id + 1)
def superimpose():
pdb_code_1= "1d3z"
pdb_filename_1 = "%s.pdb" % pdb_code_1
pdb_out_filename = "%s_aligned.pdb" % pdb_code_1
pdb_code_2= "1ubq"
pdb_filename_2 = "%s.pdb" % pdb_code_2
# Start the parser
pdb_parser = Bio.PDB.PDBParser(QUIET = True)
# Get the structures
ref_structure = pdb_parser.get_structure("reference", pdb_filename_1)
sample_structure = pdb_parser.get_structure("sample", pdb_filename_2)
# Use the first model in the pdb-files for alignment
# Change the number 0 if you want to align to another structure
ref_model = ref_structure[0]
sample_model = sample_structure[0]
# Make a list of the atoms (in the structures) you wish to align.
# In this case we use CA atoms whose index is in the specified range
list_res_1 = []
list_res_2 = []
ref_atoms = []
sample_atoms = []
# Iterate of all chains in the model in order to find all residues
for ref_chain in ref_model:
# Iterate of all residues in each model in order to find proper atoms
for ref_res in ref_chain:
# Check if residue number ( .get_id() ) is in the list
if ref_res.get_id()[1] in atoms_to_be_aligned:
# Append CA atom to list
ref_atoms.append(ref_res['O'])
# Do the same for the sample structure
for sample_chain in sample_model:
for sample_res in sample_chain:
if sample_res.get_id()[1] in atoms_to_be_aligned:
sample_atoms.append(sample_res['O'])
# Now we initiate the superimposer:
#super_imposer = Bio.PDB.Superimposer()
sup(ref_atoms, sample_atoms)
sup(sample_model.get_atoms())
# Print RMSD:
print (list_res_1,list_res2)
superimpose()
But it is giving me an error :
AttributeError Traceback (most recent call last)
<ipython-input-30-58f50b4768f7> in <module>()
49 print (list_res_1,list_res2)
50
---> 51 superimpose()
<ipython-input-30-58f50b4768f7> in superimpose()
44 # Now we initiate the superimposer:
45 #super_imposer = Bio.PDB.Superimposer()
---> 46 sup(ref_atoms, sample_atoms)
47 sup(sample_model.get_atoms())
48 # Print RMSD:
<ipython-input-11-e7d76316a1c0> in sup(x, y)
27 def sup(x, y):
28 # Nr of atoms
---> 29 N=x.shape[1]
30
31 ########################
AttributeError: 'list' object has no attribute 'shape'
Can anyone help me plz?