Source code for luna.interaction.filter

from ast import literal_eval

from luna.mol.entry import REGEX_RESNUM_ICODE
from luna.util.config import Config
from luna.util.exceptions import IllegalArgumentError


AROMATIC_STACKINGS = ["pi-stacking", "face-to-face pi-stacking", "face-to-edge pi-stacking", "face-to-slope pi-stacking",
                      "edge-to-edge pi-stacking", "edge-to-face pi-stacking", "edge-to-slope pi-stacking", "displaced face-to-face pi-stacking",
                      "displaced face-to-edge pi-stacking", "displaced face-to-slope pi-stacking"]


[docs]class InteractionFilter:
    """ Filter interactions based on their components.

    Parameters
    ----------
    ignore_self_inter : bool
        If True, ignore interactions involving atoms of the same compound.
    ignore_intra_chain : bool
        If True, ignore intra-chain interactions (e.g., interactions between residues in the same protein chain).
    ignore_inter_chain : bool
        If True, ignore interactions between different chains.
    ignore_res_res : bool
        If True, ignore residue-residue interactions.
    ignore_res_nucl : bool
        If True, ignore residue-nucleotide interactions.
    ignore_res_hetatm : bool
        If True, ignore residue-ligand interactions.
    ignore_nucl_nucl : bool
        If True, ignore nucleotide-nucleotide interactions.
    ignore_nucl_hetatm : bool
        If True, ignore nucleotide-ligand interactions.
    ignore_hetatm_hetatm : bool
        If True, ignore ligand-ligand interactions.
    ignore_h2o_h2o : bool
        If True, ignore water-water interactions.
    ignore_any_h2o : bool
        If True, ignore all interactions involving water.
    ignore_multi_comps : bool
        If True, ignore interactions established by atom groups composed of multiple compounds
        (e.g.: amides formed by peptide bonds involve two residues).
    ignore_mixed_class : bool
        If True, ignore interactions established by atom groups comprising mixed compound classes
        (e.g. residues and ligands bound by a covalent bond).
    """

    def __init__(self, ignore_self_inter=True, ignore_intra_chain=True, ignore_inter_chain=True,
                 ignore_res_res=True, ignore_res_nucl=True, ignore_res_hetatm=True,
                 ignore_nucl_nucl=True, ignore_nucl_hetatm=True, ignore_hetatm_hetatm=True,
                 ignore_h2o_h2o=True, ignore_any_h2o=False, ignore_multi_comps=False, ignore_mixed_class=False):

        self.ignore_self_inter = ignore_self_inter
        self.ignore_intra_chain = ignore_intra_chain
        self.ignore_inter_chain = ignore_inter_chain
        self.ignore_res_res = ignore_res_res
        self.ignore_res_nucl = ignore_res_nucl
        self.ignore_res_hetatm = ignore_res_hetatm
        self.ignore_nucl_nucl = ignore_nucl_nucl
        self.ignore_nucl_hetatm = ignore_nucl_hetatm
        self.ignore_hetatm_hetatm = ignore_hetatm_hetatm
        self.ignore_h2o_h2o = ignore_h2o_h2o
        self.ignore_any_h2o = ignore_any_h2o
        self.ignore_multi_comps = ignore_multi_comps
        self.ignore_mixed_class = ignore_mixed_class

[docs]    @classmethod
    def new_pli_filter(cls, ignore_res_hetatm=False, ignore_hetatm_hetatm=False, ignore_any_h2o=False, ignore_self_inter=False, **kwargs):
        """Initialize the default filter for protein-ligand interactions.

        Returns
        -------
         : `InteractionFilter`
        """
        return cls(ignore_res_hetatm=ignore_res_hetatm, ignore_hetatm_hetatm=ignore_hetatm_hetatm, ignore_any_h2o=ignore_any_h2o,
                   ignore_self_inter=ignore_self_inter, **kwargs)

[docs]    @classmethod
    def new_ppi_filter(cls, ignore_res_res=False, ignore_inter_chain=False, ignore_intra_chain=False, ignore_any_h2o=False,
                       ignore_self_inter=False, **kwargs):
        """Initialize the default filter for protein-protein interactions.

        Returns
        -------
         : `InteractionFilter`
        """
        return cls(ignore_res_res=ignore_res_res, ignore_inter_chain=ignore_inter_chain, ignore_intra_chain=ignore_intra_chain,
                   ignore_any_h2o=ignore_any_h2o, ignore_self_inter=ignore_self_inter, **kwargs)

[docs]    @classmethod
    def new_pni_filter(cls, ignore_res_nucl=False, ignore_inter_chain=False, ignore_intra_chain=False, ignore_any_h2o=False,
                       ignore_self_inter=False, **kwargs):
        """Initialize the default filter for protein-nucleotide interactions.

        Returns
        -------
         : `InteractionFilter`
        """
        return cls(ignore_res_nucl=ignore_res_nucl, ignore_inter_chain=ignore_inter_chain, ignore_intra_chain=ignore_intra_chain,
                   ignore_any_h2o=ignore_any_h2o, ignore_self_inter=ignore_self_inter, **kwargs)

[docs]    @classmethod
    def new_nni_filter(cls, ignore_nucl_nucl=False, ignore_inter_chain=False, ignore_intra_chain=False,
                       ignore_any_h2o=False, ignore_self_inter=False, **kwargs):
        """Initialize the default filter for nucleotide-nucleotide interactions.

        Returns
        -------
         : `InteractionFilter`
        """
        return cls(ignore_nucl_nucl=ignore_nucl_nucl, ignore_inter_chain=ignore_inter_chain, ignore_intra_chain=ignore_intra_chain,
                   ignore_any_h2o=ignore_any_h2o, ignore_self_inter=ignore_self_inter, **kwargs)

[docs]    @classmethod
    def new_nli_filter(cls, ignore_nucl_hetatm=False, ignore_hetatm_hetatm=False, ignore_any_h2o=False, ignore_self_inter=False, **kwargs):
        """Initialize the default filter for nucleotide-ligand interactions.

        Returns
        -------
         : `InteractionFilter`
        """
        return cls(ignore_nucl_hetatm=ignore_nucl_hetatm, ignore_hetatm_hetatm=ignore_hetatm_hetatm, ignore_any_h2o=ignore_any_h2o,
                   ignore_self_inter=ignore_self_inter, **kwargs)

[docs]    def is_valid_pair(self, src_grp, trgt_grp):
        """Evaluate if a pair of atom groups are valid according to the flags defined in this class.

        src_grp, trgt_grp : :class:`luna.mol.groups.AtomGroup`
        """

        # It will always ignore interactions involving the same atom groups.
        # Loops in the graph is not permitted and does not make any sense.
        if src_grp == trgt_grp:
            return False

        # It will always ignore interactions involving atoms and the group to which they belong to.
        # For example, the centroid of an aromatic ring cannot interact with an atom
        # that belongs to the ring. It is a type of Loop.
        if src_grp.contain_group(trgt_grp) or trgt_grp.contain_group(src_grp):
            return False

        # If one of the groups contain atoms from different compounds.
        has_multi_comps = (len(src_grp.compounds) > 1 or len(trgt_grp.compounds) > 1)
        if self.ignore_multi_comps and has_multi_comps:
            return False

        # If one of the groups contain compounds from different classes as, for instance, residue and ligand.
        # It means that compounds from different classes are covalently bonded to each other.
        has_any_mixed = (src_grp.is_mixed() or trgt_grp.is_mixed())
        if self.ignore_mixed_class and has_any_mixed:
            return False

        # It ignores interactions involving the same compounds if required.
        # As each group may have atoms from different compounds, we can check if there is at least
        # one common compound between the two groups. Remember that if two or more compounds exist in a group,
        # it means that these compounds are covalently bonded and should be considered the same compound.
        # For example: a carbohydrate can be expressed in a PDB as its subparts:
        #      E.g.: GLC + GLC = CBI
        # The same applies to any group formed after covalently bonding a residue to a hetatm (ligand or non-standard amino acid
        # represented as hetatm)
        is_same_compounds = len(src_grp.compounds.intersection(trgt_grp.compounds)) >= 1
        if self.ignore_self_inter and is_same_compounds:
            return False

        # Check if two groups contain the same chains and if both of them contain only one chain. The second condition removes
        # groups containing residues of different chains as may occur due to disulfide bonds.
        # Note, however, that this flag will be used only as a filter for intra-interactions in protein/RNA/DNA chains.
        is_same_chain = src_grp.get_chains() == trgt_grp.get_chains() and len(src_grp.get_chains()) == 1

        # Filters for residue-residue interactions if required.
        is_res_res = (src_grp.is_residue() and trgt_grp.is_residue())
        if is_res_res:
            # Ignore all residue-residue interactions.
            if self.ignore_res_res:
                return False
            # Ignore all intra-chain interactions involving two residues.
            elif self.ignore_intra_chain and is_same_chain:
                return False
            elif self.ignore_inter_chain and not is_same_chain:
                return False

        # It ignores residue-nucleic acid interactions if required.
        is_res_nucl = ((src_grp.is_residue() and trgt_grp.is_nucleotide())
                       or (src_grp.is_nucleotide() and trgt_grp.is_residue()))
        if self.ignore_res_nucl and is_res_nucl:
            return False

        # It ignores residue-ligand interactions if required.
        is_res_hetatm = ((src_grp.is_residue() and trgt_grp.is_hetatm())
                         or (src_grp.is_hetatm() and trgt_grp.is_residue()))
        if self.ignore_res_hetatm and is_res_hetatm:
            return False

        # Filters for nucleic acid-nucleic acid interactions if required.
        is_nucl_nucl = (src_grp.is_nucleotide() and trgt_grp.is_nucleotide())
        if is_nucl_nucl:
            # Ignore all nucleic acid-nucleic acid interactions
            if self.ignore_nucl_nucl:
                return False
            # Ignore all intra-chain interactions involving two nucleic acids (RNA/DNA chains).
            elif self.ignore_intra_chain and is_same_chain:
                return False
            # Ignore all inter-chain interactions involving two nucleic acids (RNA/DNA chains).
            elif self.ignore_inter_chain and not is_same_chain:
                return False

        # It ignores nucleic acid-ligand interactions if required.
        is_nucl_hetatm = ((src_grp.is_nucleotide() and trgt_grp.is_hetatm())
                          or (src_grp.is_hetatm() and trgt_grp.is_nucleotide()))
        if self.ignore_nucl_hetatm and is_nucl_hetatm:
            return False

        # It ignores ligand-ligand interactions if required.
        is_hetatm_hetatm = (src_grp.is_hetatm() and trgt_grp.is_hetatm())
        if self.ignore_hetatm_hetatm and is_hetatm_hetatm:
            return False

        # It ignores interactions of other compound types with water.
        # It enables the possibility of identifying water-bridged interaction.
        # Eg: residue-water, ligand-water = residue -- water -- ligand.
        is_any_h2o = (src_grp.is_water() or trgt_grp.is_water())
        if self.ignore_any_h2o and is_any_h2o:
            return False

        # It ignores interactions involving two waters if required.
        # if on, it will produce water-bridged interactions of multiple levels
        # Eg: residue -- h2o -- h2o -- ligand, residue -- residue -- h2o -- h2o -- ligand.
        is_h2o_h2o = (src_grp.is_water() and trgt_grp.is_water())
        if self.ignore_h2o_h2o and is_h2o_h2o:
            return False

        return True


[docs]class BindingModeCondition:

    """Define binding mode conditions to filter interactions.

    Parameters
    ----------

    condition : str
        A string defining which chains, compounds, or atoms should be accepted.
        If ``condition`` is the wildcard '*', then all chains, compounds, and atoms will be considered valid.
        Otherwise, ``condition`` should have the format '<CHAIN ID>/<COMPOUND NAME>/<COMPOUND NUMBER>/<ATOM>'.
        Wildcards are accepted for each one of these fields.
        For example:

            * '\*/HIS/\*/\*': represents all histidines' atoms from all chains.
            * 'A/CBL/\*/\*' represents all ligands named CBL from chain A.
            * 'B/HIS/\*/N\*' represents all histidines' nitrogens from chain B.

    Attributes
    ----------
    accept_all : bool
        If True, all chains, compounds, and atoms will be considered valid.
    accept_all_chains : bool
        If True, all chains will be considered valid.
    accept_all_comps : bool
        If True, all compound names will be considered valid.
    accept_all_comp_nums : bool
        If True, all compound numbers (residue sequence number in the PDB format) will be considered valid.
    accept_all_atoms : bool
        If True, all atoms will be considered valid.
    chain_id : str or None
        If provided, accept only chains whose id matches ``chain_id``.
    comp_name : str or None
        If provided, accept only compounds whose name matches ``comp_name``.
    comp_num : int or None
        If provided, accept only compounds whose sequence number matches ``comp_num``.
    comp_icode : str or None
        If provided, accept only compounds whose insertion code matches ``comp_icode``.
    atom : str or None
        If provided, accept only atoms whose name matches ``atom``.
    """

    def __init__(self, condition):
        self.accept_all = False
        self.accept_all_chains = False
        self.accept_all_comps = False
        self.accept_all_comp_nums = False
        self.accept_all_atoms = False

        self.chain_id = None
        self.comp_name = None
        self.comp_num = None
        self.comp_icode = None
        self.atom = None

        self._condition_repr = condition

        self._parse_condition(condition.upper())

    def _parse_condition(self, condition):
        # Accept everything.
        if condition == "*":
            self.accept_all = True
        else:
            chain, comp_name, comp_num, atom = condition.split("/")

            if chain == "*":
                self.accept_all_chains = True
            else:
                self.chain_id = chain

            if comp_name == "*":
                self.accept_all_comps = True
            else:
                self.comp_name = comp_name

            if comp_num == "*":
                self.accept_all_comp_nums = True
            else:
                # Separate ligand number from insertion code.
                matched = REGEX_RESNUM_ICODE.match(comp_num)
                if matched:
                    comp_num = matched.group(1)
                    try:
                        assert float(comp_num).is_integer()
                        comp_num = int(comp_num)
                    except (ValueError, AssertionError):
                        raise IllegalArgumentError("The informed compound number '%s' is invalid. It must be an integer." % str(comp_num))

                    icode = None if matched.group(2) == "" else matched.group(2)
                else:
                    raise IllegalArgumentError("The compound number and its insertion code (if applicable) '%s' is invalid. "
                                               "It must be an integer followed by one insertion code character when applicable."
                                               % comp_num)

                self.comp_num = comp_num
                self.comp_icode = icode

            if atom == "*":
                self.accept_all_atoms = True
            else:
                self.atom = atom

[docs]    def is_valid(self, atm_grp):
        """Check if an atom group is valid or not based on this condition.

        atm_grp : :class:`luna.mol.groups.AtomGroup`
        """

        # Accept everything.
        if self.accept_all:
            return True

        # Accept everything.
        if self.accept_all_chains and self.accept_all_comps and self.accept_all_comp_nums and self.accept_all_atoms:
            return True

        # Tries to identify the first valid compound in the atom group.
        for atm in atm_grp.atoms:
            comp = atm.parent

            is_chain_valid, is_comp_valid, is_comp_num_valid, is_atom_valid = (self.accept_all_chains, self.accept_all_comps,
                                                                               self.accept_all_comp_nums, self.accept_all_atoms)

            if self.chain_id is not None and self.chain_id == comp.parent.id:
                is_chain_valid = True

            if self.comp_name is not None and self.comp_name == comp.resname:
                is_comp_valid = True

            if self.comp_num is not None and self.comp_num == comp.id[1]:
                icode = None if comp.id[2].strip() == "" else comp.id[2]

                if self.comp_icode == icode:
                    is_comp_num_valid = True

            if self.atom is not None:
                # Verify element equality.
                if self.atom.endswith("*"):
                    elem = self.atom.rstrip("*")

                    if elem == atm.element:
                        is_atom_valid = True

                # Verify atom name equality.
                elif self.atom == atm.name:
                    is_atom_valid = True

            if is_chain_valid and is_comp_valid and is_comp_num_valid and is_atom_valid:
                return True

        return False

    def __repr__(self):
        return "<BindingModeCondition: %s" % self._condition_repr


[docs]class BindingModeFilter:

    """Filter interactions based on a set of binding mode conditions.

    Parameters
    ----------
    config : dict of {str : iterable}
        A dict defining binding modes and how interactions should be validated.
        Each key represents an interaction type and values are an iterable of `BindingModeCondition` instances.
    """

    def __init__(self, config):
        self.config = config

[docs]    @classmethod
    def from_config_file(cls, config_file):
        """Initialize from a configuration file.

        Parameters
        ----------
        ``config_file`` : str
            The configuration file pathname.

        Returns
        -------
         : `BindingModeFilter`

        Examples
        --------

        It follows an example of a configuration file::

            ; To configurate an interaction type, create a new line and define the interaction: [New interaction].
            ; Then you can define whether or not all interactions must be accepted by setting 'accept_only' to True or False.

            ; If you want to specify binding modes, use the variable 'accept_only', which expects a list of strings \
            in the format: <CHAIN ID>/<COMPOUND NAME>/<COMPOUND NUMBER>/<ATOM>
            ; Wildcards are accepted for the expected fields.
            ; For example, "*/HIS/*/*" represents all histidines' atoms from all chains.
            ;               "A/CBL/*/*" represents all ligands named CBL from chain A.
            ;               "B/HIS/*/N*" represents all histidines' nitrogens from chain B.

            [Hydrogen bond]
            accept_only=["A/LYS/245/*", "*/HIS/*/*"]

            [Hydrophobic]
            accept_all=True

            [Cation-pi]
            accept_only=["*"]
            accept_all=False

            [Weak hydrogen bond]
            accept_all=False
            accept_only=["*/THR/434/O*"]

            [Face-to-edge pi-stacking]
            accept_all=False

            [Aromatic stacking]
            accept_all=True

            [*]
            accept_all=False
        """

        filtering_config = {}

        config = Config(config_file)

        for inter_type in config.sections():
            params = config.get_section_map(inter_type)

            inter_type = inter_type.lower()

            accept_all = False

            values = []
            if "accept_all" in params:
                accept_all = literal_eval(params["accept_all"])
                if accept_all is True:
                    values = ["*"]

            elif "accept_only" in params and not accept_all:
                values = literal_eval(params["accept_only"])

                if "*" in values:
                    values = ["*"]

            conditions = [BindingModeCondition(condition) for condition in values]
            filtering_config[inter_type] = conditions

        return cls(filtering_config)

[docs]    def is_valid(self, inter):
        """Check if an interaction is valid or not based on this binding mode configuration.

        inter : :class:`luna.interaction.type.InteractionType`
        """
        if inter.type.lower() in self.config:
            inter_type = inter.type.lower()

        elif "aromatic stacking" in self.config and inter.type.lower() in AROMATIC_STACKINGS:
            inter_type = "aromatic stacking"

        elif "*" in self.config:
            inter_type = "*"

        else:
            return False

        for condition in self.config[inter_type]:
            is_src_grp_valid = condition.is_valid(inter.src_grp)
            is_trgt_grp_valid = condition.is_valid(inter.trgt_grp)

            if is_src_grp_valid or is_trgt_grp_valid:
                return True

        return False