from ast import literal_eval
from luna.mol.entry import REGEX_RESNUM_ICODE
from luna.util.config import Config
from luna.util.exceptions import IllegalArgumentError
AROMATIC_STACKINGS = ["pi-stacking", "face-to-face pi-stacking", "face-to-edge pi-stacking", "face-to-slope pi-stacking",
"edge-to-edge pi-stacking", "edge-to-face pi-stacking", "edge-to-slope pi-stacking", "displaced face-to-face pi-stacking",
"displaced face-to-edge pi-stacking", "displaced face-to-slope pi-stacking"]
[docs]class InteractionFilter:
""" Filter interactions based on their components.
Parameters
----------
ignore_self_inter : bool
If True, ignore interactions involving atoms of the same compound.
ignore_intra_chain : bool
If True, ignore intra-chain interactions (e.g., interactions between residues in the same protein chain).
ignore_inter_chain : bool
If True, ignore interactions between different chains.
ignore_res_res : bool
If True, ignore residue-residue interactions.
ignore_res_nucl : bool
If True, ignore residue-nucleotide interactions.
ignore_res_hetatm : bool
If True, ignore residue-ligand interactions.
ignore_nucl_nucl : bool
If True, ignore nucleotide-nucleotide interactions.
ignore_nucl_hetatm : bool
If True, ignore nucleotide-ligand interactions.
ignore_hetatm_hetatm : bool
If True, ignore ligand-ligand interactions.
ignore_h2o_h2o : bool
If True, ignore water-water interactions.
ignore_any_h2o : bool
If True, ignore all interactions involving water.
ignore_multi_comps : bool
If True, ignore interactions established by atom groups composed of multiple compounds
(e.g.: amides formed by peptide bonds involve two residues).
ignore_mixed_class : bool
If True, ignore interactions established by atom groups comprising mixed compound classes
(e.g. residues and ligands bound by a covalent bond).
"""
def __init__(self, ignore_self_inter=True, ignore_intra_chain=True, ignore_inter_chain=True,
ignore_res_res=True, ignore_res_nucl=True, ignore_res_hetatm=True,
ignore_nucl_nucl=True, ignore_nucl_hetatm=True, ignore_hetatm_hetatm=True,
ignore_h2o_h2o=True, ignore_any_h2o=False, ignore_multi_comps=False, ignore_mixed_class=False):
self.ignore_self_inter = ignore_self_inter
self.ignore_intra_chain = ignore_intra_chain
self.ignore_inter_chain = ignore_inter_chain
self.ignore_res_res = ignore_res_res
self.ignore_res_nucl = ignore_res_nucl
self.ignore_res_hetatm = ignore_res_hetatm
self.ignore_nucl_nucl = ignore_nucl_nucl
self.ignore_nucl_hetatm = ignore_nucl_hetatm
self.ignore_hetatm_hetatm = ignore_hetatm_hetatm
self.ignore_h2o_h2o = ignore_h2o_h2o
self.ignore_any_h2o = ignore_any_h2o
self.ignore_multi_comps = ignore_multi_comps
self.ignore_mixed_class = ignore_mixed_class
[docs] @classmethod
def new_pli_filter(cls, ignore_res_hetatm=False, ignore_hetatm_hetatm=False, ignore_any_h2o=False, ignore_self_inter=False, **kwargs):
"""Initialize the default filter for protein-ligand interactions.
Returns
-------
: `InteractionFilter`
"""
return cls(ignore_res_hetatm=ignore_res_hetatm, ignore_hetatm_hetatm=ignore_hetatm_hetatm, ignore_any_h2o=ignore_any_h2o,
ignore_self_inter=ignore_self_inter, **kwargs)
[docs] @classmethod
def new_ppi_filter(cls, ignore_res_res=False, ignore_inter_chain=False, ignore_intra_chain=False, ignore_any_h2o=False,
ignore_self_inter=False, **kwargs):
"""Initialize the default filter for protein-protein interactions.
Returns
-------
: `InteractionFilter`
"""
return cls(ignore_res_res=ignore_res_res, ignore_inter_chain=ignore_inter_chain, ignore_intra_chain=ignore_intra_chain,
ignore_any_h2o=ignore_any_h2o, ignore_self_inter=ignore_self_inter, **kwargs)
[docs] @classmethod
def new_pni_filter(cls, ignore_res_nucl=False, ignore_inter_chain=False, ignore_intra_chain=False, ignore_any_h2o=False,
ignore_self_inter=False, **kwargs):
"""Initialize the default filter for protein-nucleotide interactions.
Returns
-------
: `InteractionFilter`
"""
return cls(ignore_res_nucl=ignore_res_nucl, ignore_inter_chain=ignore_inter_chain, ignore_intra_chain=ignore_intra_chain,
ignore_any_h2o=ignore_any_h2o, ignore_self_inter=ignore_self_inter, **kwargs)
[docs] @classmethod
def new_nni_filter(cls, ignore_nucl_nucl=False, ignore_inter_chain=False, ignore_intra_chain=False,
ignore_any_h2o=False, ignore_self_inter=False, **kwargs):
"""Initialize the default filter for nucleotide-nucleotide interactions.
Returns
-------
: `InteractionFilter`
"""
return cls(ignore_nucl_nucl=ignore_nucl_nucl, ignore_inter_chain=ignore_inter_chain, ignore_intra_chain=ignore_intra_chain,
ignore_any_h2o=ignore_any_h2o, ignore_self_inter=ignore_self_inter, **kwargs)
[docs] @classmethod
def new_nli_filter(cls, ignore_nucl_hetatm=False, ignore_hetatm_hetatm=False, ignore_any_h2o=False, ignore_self_inter=False, **kwargs):
"""Initialize the default filter for nucleotide-ligand interactions.
Returns
-------
: `InteractionFilter`
"""
return cls(ignore_nucl_hetatm=ignore_nucl_hetatm, ignore_hetatm_hetatm=ignore_hetatm_hetatm, ignore_any_h2o=ignore_any_h2o,
ignore_self_inter=ignore_self_inter, **kwargs)
[docs] def is_valid_pair(self, src_grp, trgt_grp):
"""Evaluate if a pair of atom groups are valid according to the flags defined in this class.
src_grp, trgt_grp : :class:`luna.mol.groups.AtomGroup`
"""
# It will always ignore interactions involving the same atom groups.
# Loops in the graph is not permitted and does not make any sense.
if src_grp == trgt_grp:
return False
# It will always ignore interactions involving atoms and the group to which they belong to.
# For example, the centroid of an aromatic ring cannot interact with an atom
# that belongs to the ring. It is a type of Loop.
if src_grp.contain_group(trgt_grp) or trgt_grp.contain_group(src_grp):
return False
# If one of the groups contain atoms from different compounds.
has_multi_comps = (len(src_grp.compounds) > 1 or len(trgt_grp.compounds) > 1)
if self.ignore_multi_comps and has_multi_comps:
return False
# If one of the groups contain compounds from different classes as, for instance, residue and ligand.
# It means that compounds from different classes are covalently bonded to each other.
has_any_mixed = (src_grp.is_mixed() or trgt_grp.is_mixed())
if self.ignore_mixed_class and has_any_mixed:
return False
# It ignores interactions involving the same compounds if required.
# As each group may have atoms from different compounds, we can check if there is at least
# one common compound between the two groups. Remember that if two or more compounds exist in a group,
# it means that these compounds are covalently bonded and should be considered the same compound.
# For example: a carbohydrate can be expressed in a PDB as its subparts:
# E.g.: GLC + GLC = CBI
# The same applies to any group formed after covalently bonding a residue to a hetatm (ligand or non-standard amino acid
# represented as hetatm)
is_same_compounds = len(src_grp.compounds.intersection(trgt_grp.compounds)) >= 1
if self.ignore_self_inter and is_same_compounds:
return False
# Check if two groups contain the same chains and if both of them contain only one chain. The second condition removes
# groups containing residues of different chains as may occur due to disulfide bonds.
# Note, however, that this flag will be used only as a filter for intra-interactions in protein/RNA/DNA chains.
is_same_chain = src_grp.get_chains() == trgt_grp.get_chains() and len(src_grp.get_chains()) == 1
# Filters for residue-residue interactions if required.
is_res_res = (src_grp.is_residue() and trgt_grp.is_residue())
if is_res_res:
# Ignore all residue-residue interactions.
if self.ignore_res_res:
return False
# Ignore all intra-chain interactions involving two residues.
elif self.ignore_intra_chain and is_same_chain:
return False
elif self.ignore_inter_chain and not is_same_chain:
return False
# It ignores residue-nucleic acid interactions if required.
is_res_nucl = ((src_grp.is_residue() and trgt_grp.is_nucleotide())
or (src_grp.is_nucleotide() and trgt_grp.is_residue()))
if self.ignore_res_nucl and is_res_nucl:
return False
# It ignores residue-ligand interactions if required.
is_res_hetatm = ((src_grp.is_residue() and trgt_grp.is_hetatm())
or (src_grp.is_hetatm() and trgt_grp.is_residue()))
if self.ignore_res_hetatm and is_res_hetatm:
return False
# Filters for nucleic acid-nucleic acid interactions if required.
is_nucl_nucl = (src_grp.is_nucleotide() and trgt_grp.is_nucleotide())
if is_nucl_nucl:
# Ignore all nucleic acid-nucleic acid interactions
if self.ignore_nucl_nucl:
return False
# Ignore all intra-chain interactions involving two nucleic acids (RNA/DNA chains).
elif self.ignore_intra_chain and is_same_chain:
return False
# Ignore all inter-chain interactions involving two nucleic acids (RNA/DNA chains).
elif self.ignore_inter_chain and not is_same_chain:
return False
# It ignores nucleic acid-ligand interactions if required.
is_nucl_hetatm = ((src_grp.is_nucleotide() and trgt_grp.is_hetatm())
or (src_grp.is_hetatm() and trgt_grp.is_nucleotide()))
if self.ignore_nucl_hetatm and is_nucl_hetatm:
return False
# It ignores ligand-ligand interactions if required.
is_hetatm_hetatm = (src_grp.is_hetatm() and trgt_grp.is_hetatm())
if self.ignore_hetatm_hetatm and is_hetatm_hetatm:
return False
# It ignores interactions of other compound types with water.
# It enables the possibility of identifying water-bridged interaction.
# Eg: residue-water, ligand-water = residue -- water -- ligand.
is_any_h2o = (src_grp.is_water() or trgt_grp.is_water())
if self.ignore_any_h2o and is_any_h2o:
return False
# It ignores interactions involving two waters if required.
# if on, it will produce water-bridged interactions of multiple levels
# Eg: residue -- h2o -- h2o -- ligand, residue -- residue -- h2o -- h2o -- ligand.
is_h2o_h2o = (src_grp.is_water() and trgt_grp.is_water())
if self.ignore_h2o_h2o and is_h2o_h2o:
return False
return True
[docs]class BindingModeCondition:
"""Define binding mode conditions to filter interactions.
Parameters
----------
condition : str
A string defining which chains, compounds, or atoms should be accepted.
If ``condition`` is the wildcard '*', then all chains, compounds, and atoms will be considered valid.
Otherwise, ``condition`` should have the format '<CHAIN ID>/<COMPOUND NAME>/<COMPOUND NUMBER>/<ATOM>'.
Wildcards are accepted for each one of these fields.
For example:
* '\*/HIS/\*/\*': represents all histidines' atoms from all chains.
* 'A/CBL/\*/\*' represents all ligands named CBL from chain A.
* 'B/HIS/\*/N\*' represents all histidines' nitrogens from chain B.
Attributes
----------
accept_all : bool
If True, all chains, compounds, and atoms will be considered valid.
accept_all_chains : bool
If True, all chains will be considered valid.
accept_all_comps : bool
If True, all compound names will be considered valid.
accept_all_comp_nums : bool
If True, all compound numbers (residue sequence number in the PDB format) will be considered valid.
accept_all_atoms : bool
If True, all atoms will be considered valid.
chain_id : str or None
If provided, accept only chains whose id matches ``chain_id``.
comp_name : str or None
If provided, accept only compounds whose name matches ``comp_name``.
comp_num : int or None
If provided, accept only compounds whose sequence number matches ``comp_num``.
comp_icode : str or None
If provided, accept only compounds whose insertion code matches ``comp_icode``.
atom : str or None
If provided, accept only atoms whose name matches ``atom``.
"""
def __init__(self, condition):
self.accept_all = False
self.accept_all_chains = False
self.accept_all_comps = False
self.accept_all_comp_nums = False
self.accept_all_atoms = False
self.chain_id = None
self.comp_name = None
self.comp_num = None
self.comp_icode = None
self.atom = None
self._condition_repr = condition
self._parse_condition(condition.upper())
def _parse_condition(self, condition):
# Accept everything.
if condition == "*":
self.accept_all = True
else:
chain, comp_name, comp_num, atom = condition.split("/")
if chain == "*":
self.accept_all_chains = True
else:
self.chain_id = chain
if comp_name == "*":
self.accept_all_comps = True
else:
self.comp_name = comp_name
if comp_num == "*":
self.accept_all_comp_nums = True
else:
# Separate ligand number from insertion code.
matched = REGEX_RESNUM_ICODE.match(comp_num)
if matched:
comp_num = matched.group(1)
try:
assert float(comp_num).is_integer()
comp_num = int(comp_num)
except (ValueError, AssertionError):
raise IllegalArgumentError("The informed compound number '%s' is invalid. It must be an integer." % str(comp_num))
icode = None if matched.group(2) == "" else matched.group(2)
else:
raise IllegalArgumentError("The compound number and its insertion code (if applicable) '%s' is invalid. "
"It must be an integer followed by one insertion code character when applicable."
% comp_num)
self.comp_num = comp_num
self.comp_icode = icode
if atom == "*":
self.accept_all_atoms = True
else:
self.atom = atom
[docs] def is_valid(self, atm_grp):
"""Check if an atom group is valid or not based on this condition.
atm_grp : :class:`luna.mol.groups.AtomGroup`
"""
# Accept everything.
if self.accept_all:
return True
# Accept everything.
if self.accept_all_chains and self.accept_all_comps and self.accept_all_comp_nums and self.accept_all_atoms:
return True
# Tries to identify the first valid compound in the atom group.
for atm in atm_grp.atoms:
comp = atm.parent
is_chain_valid, is_comp_valid, is_comp_num_valid, is_atom_valid = (self.accept_all_chains, self.accept_all_comps,
self.accept_all_comp_nums, self.accept_all_atoms)
if self.chain_id is not None and self.chain_id == comp.parent.id:
is_chain_valid = True
if self.comp_name is not None and self.comp_name == comp.resname:
is_comp_valid = True
if self.comp_num is not None and self.comp_num == comp.id[1]:
icode = None if comp.id[2].strip() == "" else comp.id[2]
if self.comp_icode == icode:
is_comp_num_valid = True
if self.atom is not None:
# Verify element equality.
if self.atom.endswith("*"):
elem = self.atom.rstrip("*")
if elem == atm.element:
is_atom_valid = True
# Verify atom name equality.
elif self.atom == atm.name:
is_atom_valid = True
if is_chain_valid and is_comp_valid and is_comp_num_valid and is_atom_valid:
return True
return False
def __repr__(self):
return "<BindingModeCondition: %s" % self._condition_repr
[docs]class BindingModeFilter:
"""Filter interactions based on a set of binding mode conditions.
Parameters
----------
config : dict of {str : iterable}
A dict defining binding modes and how interactions should be validated.
Each key represents an interaction type and values are an iterable of `BindingModeCondition` instances.
"""
def __init__(self, config):
self.config = config
[docs] @classmethod
def from_config_file(cls, config_file):
"""Initialize from a configuration file.
Parameters
----------
``config_file`` : str
The configuration file pathname.
Returns
-------
: `BindingModeFilter`
Examples
--------
It follows an example of a configuration file::
; To configurate an interaction type, create a new line and define the interaction: [New interaction].
; Then you can define whether or not all interactions must be accepted by setting 'accept_only' to True or False.
; If you want to specify binding modes, use the variable 'accept_only', which expects a list of strings \
in the format: <CHAIN ID>/<COMPOUND NAME>/<COMPOUND NUMBER>/<ATOM>
; Wildcards are accepted for the expected fields.
; For example, "*/HIS/*/*" represents all histidines' atoms from all chains.
; "A/CBL/*/*" represents all ligands named CBL from chain A.
; "B/HIS/*/N*" represents all histidines' nitrogens from chain B.
[Hydrogen bond]
accept_only=["A/LYS/245/*", "*/HIS/*/*"]
[Hydrophobic]
accept_all=True
[Cation-pi]
accept_only=["*"]
accept_all=False
[Weak hydrogen bond]
accept_all=False
accept_only=["*/THR/434/O*"]
[Face-to-edge pi-stacking]
accept_all=False
[Aromatic stacking]
accept_all=True
[*]
accept_all=False
"""
filtering_config = {}
config = Config(config_file)
for inter_type in config.sections():
params = config.get_section_map(inter_type)
inter_type = inter_type.lower()
accept_all = False
values = []
if "accept_all" in params:
accept_all = literal_eval(params["accept_all"])
if accept_all is True:
values = ["*"]
elif "accept_only" in params and not accept_all:
values = literal_eval(params["accept_only"])
if "*" in values:
values = ["*"]
conditions = [BindingModeCondition(condition) for condition in values]
filtering_config[inter_type] = conditions
return cls(filtering_config)
[docs] def is_valid(self, inter):
"""Check if an interaction is valid or not based on this binding mode configuration.
inter : :class:`luna.interaction.type.InteractionType`
"""
if inter.type.lower() in self.config:
inter_type = inter.type.lower()
elif "aromatic stacking" in self.config and inter.type.lower() in AROMATIC_STACKINGS:
inter_type = "aromatic stacking"
elif "*" in self.config:
inter_type = "*"
else:
return False
for condition in self.config[inter_type]:
is_src_grp_valid = condition.is_valid(inter.src_grp)
is_trgt_grp_valid = condition.is_valid(inter.trgt_grp)
if is_src_grp_valid or is_trgt_grp_valid:
return True
return False