Off topic:Python function to superimpose protein on each other
0
1
Entering edit mode
3.2 years ago
anasjamshed ▴ 120

I have made the function called sup to superimpose 2 protein-like PDB superimpose class :

from NumPy import *
from NumPy.linalg import svd, det


# Test data
# Coordinate vectors along with columns 

a=matrix([[ 18.92238689,  9.18841188,  8.70764463,  9.38130981, 8.53057997],
         [ 1.12391951,  0.8707568 ,  1.01214183,  0.59383894, 0.65155349],
         [ 0.46106398,  0.62858099, -0.02625641,  0.35264203, 0.53670857]], 'f')

b=matrix([[ 1.68739355,  1.38774297,  2.1959675 , 1.51248281,  1.70793414],
         [ 8.99726755,  8.73213223,  8.86804272, 8.31722197,  8.9924607 ],
         [ 1.1668153 ,  1.1135669 ,  1.02279055, 1.06534992,  0.54881902]], 'f')

# Code

def center(m):
   # Returns centered m
   # Calculate center of mass of x
   center_of_mass_m=m.sum(1)/m.shape[1]
   # Center m
   centered_m=m-center_of_mass_m
   return centered_m


def sup(x, y):
    # Nr of atoms
    N=x.shape[1]

    ########################

    # Center x and y
    x=center(x)
    y=center(y)

    # correlation matrix
    r=y*x.T

    # SVD of correlation matrix
    v, s, wt=svd(r)

    w=wt.T
    vt=v.T

    # Rotation matrix
    u=w*vt

    # Check for roto-reflection
    if det(u)<0:
        z=diag((1,1,-1))
        u=w*z*vt
        s[2]=-s[2]

    # Calculate RMSD
    e0=sum(x.A*x.A+y.A*y.A)
    print("E0 ", e0)
    rmsd=sqrt((e0-2*sum(s))/N)

    print('RMSD (svd) ', rmsd)

    print("u estimated ")
    print(u)

    # Calculate RMSD from the coordinates 
    d=x-u*y
    d=d.A*d.A
    rmsd=sqrt(sum(d)/N)
    print('RMSD (real) ', rmsd)

if __name__=="__main__":

    # Run some stuff

    # 1. Test data
    sup(a,b)

    # 2. Protein

    # Parse PDB file
    from Bio.PDB import *
    p=PDBParser()
    s=p.get_structure("XXX", "2NIC.pdb")

    # Chain A in model 0
    chain1=s[0]["A"]
    # Chain A in model 1
    chain2=s[0]["A"]

    def get_coordinates(chain):
        # Get the CA coordinates of a cahin and return 3xn matrix
        coords=[]
        for res in chain:
            try:
                # Extract CA coordinate
                a=res["CA"]
                c=a.get_coord()
                coords.append(c)
            except:
                # No CA atom - skip
                pass
        # Turn coordinate list in 3xn numpy matrix
        coords=matrix(coords) # nx3
        coords=coords.T       # 3xn
        return coords

    # Get the 3xn coordinate matrices for the chains
    a=get_coordinates(chain1)
    b=get_coordinates(chain2)

    # Superimpose and done
    sup(a,b)

Now I need to make another by utilizing this sup function to download 2 PDB files from the PDB database and then superimposes the 2nd to the first one based on the backbone atoms(N, C alpha, C,0) of the amino acids specified the list of residue numbers. The function should save the rotated and translated structure in a separate PDB file

I am trying this script :

import numpy as np
import Bio
from Bio.PDB import * 

# Select what residues numbers you wish to align # and put them in a list
start_id = 1
end_id   = 70
atoms_to_be_aligned = range(start_id, end_id + 1)

def superimpose():
    pdb_code_1= "1d3z"
    pdb_filename_1 = "%s.pdb" % pdb_code_1
    pdb_out_filename = "%s_aligned.pdb" % pdb_code_1
    pdb_code_2= "1ubq"
    pdb_filename_2 = "%s.pdb" % pdb_code_2
    # Start the parser
    pdb_parser = Bio.PDB.PDBParser(QUIET = True)
    # Get the structures
    ref_structure = pdb_parser.get_structure("reference", pdb_filename_1)
    sample_structure = pdb_parser.get_structure("sample", pdb_filename_2)
    # Use the first model in the pdb-files for alignment
    # Change the number 0 if you want to align to another structure
    ref_model    = ref_structure[0]
    sample_model = sample_structure[0]
    # Make a list of the atoms (in the structures) you wish to align.
    # In this case we use CA atoms whose index is in the specified range
    list_res_1 = []
    list_res_2 = []
    ref_atoms = []  
    sample_atoms = []
    # Iterate of all chains in the model in order to find all residues
    for ref_chain in ref_model:
        # Iterate of all residues in each model in order to find proper atoms
        for ref_res in ref_chain:
            # Check if residue number ( .get_id() ) is in the list
            if ref_res.get_id()[1] in atoms_to_be_aligned:
                # Append CA atom to list
                ref_atoms.append(ref_res['O'])
                # Do the same for the sample structure
                for sample_chain in sample_model:
                    for sample_res in sample_chain:
                        if sample_res.get_id()[1] in atoms_to_be_aligned:
                            sample_atoms.append(sample_res['O'])
                            # Now we initiate the superimposer:
                            #super_imposer = Bio.PDB.Superimposer()
                            sup(ref_atoms, sample_atoms)
                            sup(sample_model.get_atoms())
                            # Print RMSD:
                            print (list_res_1,list_res2)

superimpose()

But it is giving me an error :

AttributeError                            Traceback (most recent call last)
<ipython-input-30-58f50b4768f7> in <module>()
     49                             print (list_res_1,list_res2)
     50 
---> 51 superimpose()

<ipython-input-30-58f50b4768f7> in superimpose()
     44                             # Now we initiate the superimposer:
     45                             #super_imposer = Bio.PDB.Superimposer()
---> 46                             sup(ref_atoms, sample_atoms)
     47                             sup(sample_model.get_atoms())
     48                             # Print RMSD:

<ipython-input-11-e7d76316a1c0> in sup(x, y)
     27 def sup(x, y):
     28     # Nr of atoms
---> 29     N=x.shape[1]
     30 
     31     ########################

AttributeError: 'list' object has no attribute 'shape'

Can anyone help me plz?

PDB Protein • 1.4k views
ADD COMMENT
This thread is not open. No new answers may be added
Traffic: 2219 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6