Source code for rna_predict.pdbtools
# coding: utf-8
import itertools
import numpy as np
import os
import urllib2
import warnings
import Bio.PDB
from Bio.PDB.PDBExceptions import PDBConstructionWarning
from . import utils
from .sysconfig import SysConfig
PDB_DIRECTORY = SysConfig.SYSCONFIG_LOCATION + os.sep + "pdbs"
[docs]def get_pdb_by_code(pdb_code, pdb_directory=PDB_DIRECTORY):
"""
Get PDB file by code. Download if necessary.
:param pdb_code: PDB code to download
:param pdb_directory: directory lookup and store the PDB file, defaults to the rna_predict PDB cache directory
:return: PDB filename
"""
# make sure directory names have a trailing slash
pdb_directory = os.path.normpath(pdb_directory) + os.sep
utils.mkdir_p(pdb_directory)
pdb_file = pdb_directory + pdb_code + '.pdb'
if not os.path.exists(pdb_file):
response = urllib2.urlopen("http://www.rcsb.org/pdb/files/%s.pdb" % pdb_code.upper())
with open(pdb_file, "w") as f:
f.write(response.read())
return pdb_file
[docs]def parse_pdb(pdb_code, pdb_file):
"""
Parse PDB file using Biopyhon.
:param pdb_code: internal id
:param pdb_file: PDB filename
:return: PDB structure object
"""
with warnings.catch_warnings():
warnings.simplefilter("ignore", PDBConstructionWarning)
return Bio.PDB.PDBParser().get_structure(pdb_code, pdb_file)
[docs]def filter_atoms(atoms, heavy_only=False, p_only=False):
"""
Filter list of atoms.
:param atoms: list of atoms
:param heavy_only: only keep heavy atoms
:param p_only: only keep P atoms
:return: filtered list of atoms
"""
ret = []
for atom in atoms:
if p_only and atom.name != "P":
continue
elif heavy_only and atom.name.startswith("H"):
continue
ret.append(atom)
return ret
[docs]def get_center_of_res(res):
"""
Calculate the center of a residue.
:param res: residue object
:return: center coordinates
"""
coords = []
# loop over all atoms
for atom in filter_atoms(res, heavy_only=True):
coords.append(atom.coord)
return np.mean(coords, axis=0)
[docs]def align_structure(ref_pdb, moving_pdb, assign_b_factors=True):
"""
Align one PDB structure to another.
:param ref_pdb: reference PDB structure object
:param moving_pdb: moving PDB structure object
:param assign_b_factors: place distance values in the b-factor field
:return: tuple (res_dists, atom_dists, rmsd, transformation_matrix)
"""
chain_ref = ref_pdb[0].child_list[0]
chain_sample = moving_pdb[0].child_list[0]
ref_atoms = []
ref_res = []
sample_atoms = []
sample_res = []
for res1 in chain_ref:
if res1.id not in chain_sample:
print "skipping %s" % res1
continue
res2 = chain_sample[res1.id]
ref_res.append(res1)
sample_res.append(res2)
for atom1 in res1:
if atom1.id not in res2:
print " skipping %s" % atom1
continue
atom2 = res2[atom1.id]
ref_atoms.append(atom1)
sample_atoms.append(atom2)
super_imposer = Bio.PDB.Superimposer()
super_imposer.set_atoms(ref_atoms, sample_atoms)
super_imposer.apply(moving_pdb)
# in case we want to assign b-factors to the moving structure, set them all to 0 before.
if assign_b_factors:
for res in chain_sample:
for atom in res:
atom.set_bfactor(0)
dists_atom = []
for atom1, atom2 in itertools.izip(ref_atoms, sample_atoms):
dist = np.linalg.norm(atom1.coord - atom2.coord)
dists_atom.append(dist)
if assign_b_factors:
atom2.set_bfactor(dist)
print super_imposer.rotran
print super_imposer.rms
dists_res = []
for res1, res2 in itertools.izip(ref_res, sample_res):
dist = np.linalg.norm(get_center_of_res(res1) - get_center_of_res(res2))
dists_res.append(dist)
return dists_res, dists_atom, super_imposer.rms, super_imposer.rotran