Question

Trying to separate different chains of a PDB file into separate files. Biopython gives an error

3

Entering edit mode

4.9 years ago

westin.kosater ▴ 80

Here is my code

parser=PDBParser()
io=PDBIO()
structure = parser.get_structure('X', '2i0q.pdb')

#This will separate each chain into its own PDB file
for chain in structure.get_chains():
    io.set_structure(chain)
    io.save(chain.get_id() + ".pdb")

Whenever I run it, my interpreter gives me this error

  File "/home/wes/.local/lib/python3.6/site-packages/Bio/PDB/Residue.py", line 83, in get_unpacked_list
    undisordered_atom_list = (undisordered_atom_list + atom.disordered_get_list())

AttributeError: 'Atom' object has no attribute 'disordered_get_list'

Does anyone have any insights into this?

python biopython • 6.2k views

ADD COMMENT • link updated 3.5 years ago by manu.llanos ▴ 10 • written 4.9 years ago by westin.kosater ▴ 80

score 1 · Answer 1 · 2019-07-24

Hello, I had the same problem and with different PDB files. As I did not find an answer on internet (just your post) I made a python fonction - split_PDBfile_by_chains() - in the following class. It seems to work fine.

from Bio.PDB import *

class Flat_File :

# Instance variables
def __init__(self, id = str(), path = '.') :
    self.id = id
    self.path = path
    self.lines = list()

# methods : downloading and reading flat files (PDB file or CSV)
def download_pdb(self, pdb_id, output_dir = '.') :
    ''' Download a PDB file with Biopython PDBList class. Returns the donwloaded 
    file path.
    /!\ the Biopython fonction assings the format name : 'pdb<pdb_id>.ent'
    '''
    pdb_file = PDBList()
    pdb_file.retrieve_pdb_file(pdb_id, pdir = output_dir, file_format = 'pdb')
    file_name = "pdb"+pdb_id.lower()+".ent"
    self.id = pdb_id
    self.path = output_dir + file_name

def read_file(self, path = '.') :
    ''' Read a flat file. Assigns a lines list to a lines attribute. This
    fonction is used by CSV and PDB files.
    '''
    if path != '.' :
        self.path = path
    f = open(self.path, "r")
    lines = f.readlines()
    f.close()
    self.lines = lines

def split_PDBfile_by_chains(self, output_dir = '.', chains = 'all', all_sections = True ) :
    ''' Split a pdb file in different pdb files by chains. data is a list of 
    pdb file lines. chains must be a list of PDB ids (e.g. ['A', 'B'])
    '''
    pdblines = self.lines
    # file split :
    initial_sections = list()
    dict_chains = dict()
    final_sections = list()
    i = 0
    while i < len(pdblines) :
        line = pdblines[i]
        if line[0:4] != 'ATOM' and line[0:3] != 'TER' :
            initial_sections.append(line)
            i += 1
        else :
            break
    while i < len(pdblines) :
        line = pdblines[i]
        possible_sections = ['ATOM  ', 'ANISOU', 'TER   ', 'HETATM']
        if line[0:6]in possible_sections:
            chain_id = line[21]
            if not(chain_id in dict_chains) :
                dict_chains[chain_id] = [line]
            else :
                dict_chains[chain_id].append(line)
            i += 1
        else :
            break
    while i < len(pdblines) :
        line = pdblines[i]
        final_sections.append(line)
        i += 1

    # Chains selection :
    if chains == 'all' :
        chains_id_list = dict_chains.keys()
        print('esto va si all en split', dict_chains.keys())
    else :
        chains_id_list = sorted(chains)
    pdb_id = self.id
    self.id = list()
    self.path = list()

    # Write the different files
    for chain_id in chains_id_list :
        sub_file_id = pdb_id +  '_' + chain_id
        sub_file_name = 'pdb' + sub_file_id + '.ent'
        sub_file_path = output_dir + sub_file_name
        f = open(sub_file_path, 'w')
        if all_sections :
            f.writelines(initial_sections)
        f.writelines(dict_chains[chain_id])
        if all_sections :
            f.writelines(final_sections)
        f.close()
        self.id.append((pdb_id, chain_id))
        self.path.append(sub_file_path)

score 1 · Answer 2 · 2020-01-13

Same issue here. I believe it is a bug.

on Python 3.7.5, Biopython 1.74 and 1.76 both have the same issue.
on Python 3.6.9, Biopython 1.72, works just fine.

Currently I have to downgrade my version to save the PDB.

  from Bio import Bio
  pdb_id, chain_id = '1ATP', 'E'
  work_dir = './'
  PDB.PDBList(verbose=False).retrieve_pdb_file(pdb_id, pdir=work_dir, file_format='pdb')
  biopdb_name = '{0}/pdb{1}.ent'.format(work_dir, pdb_id.lower())

  ## Read the PDB file and extract the chain from structure[0]
  model = PDB.PDBParser(PERMISSIVE=1,QUIET=1).get_structure(pdb_id, biopdb_name)[0]

  io = PDB.PDBIO()
  io.set_structure(model[chain_id])
  io.save('{0}/{1}_{2}.pdb'.format(work_dir, pdb_id, chain_id))

score 1 · Answer 3 · 2020-11-07

1

Entering edit mode

3.5 years ago

manu.llanos ▴ 10

I had the same problem with Biopython 1.74, but I upgraded to 1.78 and it's working now