Source code for luna.mol.features
from collections import defaultdict
# Open Babel
from openbabel.openbabel import OBMol, OBSmartsPattern
from openbabel.pybel import Molecule as PybelMol
# RDKit
from rdkit.Chem import Mol as RDMol
from luna.util import stringcase as case
from luna.util.exceptions import MoleculeObjectTypeError
from luna.wrappers.base import MolWrapper
import logging
logger = logging.getLogger()
[docs]class ChemicalFeature():
"""Define chemical features as for example pharmacophore properties.
Parameters
----------
name : str
The chemical feature name.
"""
def __init__(self, name):
self.name = name
[docs] def format_name(self, case_func="sentencecase"):
"""Convert chemical feature names to another string case.
Parameters
----------
name : str
The name of a string case function from :py:mod:`luna.util.stringcase`.
"""
func = getattr(case, case_func)
return func(self.name)
# Special methods
def __repr__(self):
return "<Feature=%s>" % self.name
def __eq__(self, other):
"""Overrides the default implementation"""
if isinstance(self, other.__class__):
return self.name == other.name
return False
def __ne__(self, other):
"""Overrides the default implementation"""
return not self.__eq__(other)
def __lt__(self, other):
return self.name < other.name
def __hash__(self):
return hash(self.name)
[docs]class OBMolChemicalFeature:
"""Mimic :class:`rdkit.Chem.rdMolChemicalFeatures.MolChemicalFeature` for Open Babel.
Parameters
----------
family : str
The family name, which is the term used by RDKit for chemical features.
atom_ids : list
List of atom identifiers.
"""
def __init__(self, family, atom_ids):
self.family = family
self.atom_ids = atom_ids
[docs] def GetAtomIds(self):
"""Get the IDs of the atoms that participate in the feature."""
return self.atom_ids
[docs] def GetFamily(self):
"""Get the family to which the feature belongs (e.g., donor, acceptor, etc.)"""
return self.family
[docs]class FeatureExtractor:
"""Perceive chemical features from molecules.
Parameters
----------
feature_factory : :class:`~rdkit.Chem.rdMolChemicalFeatures.MolChemicalFeatureFactory`
An RDKit feature factory.
Examples
--------
First, let's read a molecule (glutamine).
>>> from luna.wrappers.base import MolWrapper
>>> mol = MolWrapper.from_smiles("N[C@@H](CCC(N)=O)C(O)=O").unwrap()
Now, create a feature factory and instantiate a new `FeatureExtractor` object.
>>> from luna.util.default_values import ATOM_PROP_FILE
>>> from rdkit.Chem import ChemicalFeatures
>>> from luna.mol.features import FeatureExtractor
>>> feature_factory = ChemicalFeatures.BuildFeatureFactory(ATOM_PROP_FILE)
>>> feature_extractor = FeatureExtractor(feature_factory)
Finally, you can extract features by group or atom.
>>> features = feature_extractor.get_features_by_atoms(mol)
>>> features = feature_extractor.get_features_by_groups(mol)
"""
def __init__(self, feature_factory):
self.feature_factory = feature_factory
[docs] def get_features_by_atoms(self, mol_obj, atm_map=None):
"""Perceive chemical features from the molecule ``mol_obj`` by atom.
Parameters
----------
mol_obj : :class:`~luna.wrappers.base.MolWrapper`, :class:`rdkit.Chem.rdchem.Mol`, or :class:`openbabel.pybel.Molecule`
The molecule.
atm_map : dict
A dictionary to map an atom's index to a different value.
Returns
-------
atm_features : dict of {int : list of `ChemicalFeature`}
Chemical features by atoms that are represented by their index or by a value from ``atm_map``.
"""
if isinstance(mol_obj, MolWrapper):
mol_obj = mol_obj.unwrap()
if isinstance(mol_obj, RDMol):
perceived_features = self.feature_factory.GetFeaturesForMol(mol_obj)
elif isinstance(mol_obj, OBMol):
perceived_features = self._get_features_from_obmol(mol_obj)
elif isinstance(mol_obj, PybelMol):
perceived_features = self._get_features_from_obmol(mol_obj.OBMol)
else:
logger.exception("Objects of type '%s' are not currently accepted." % mol_obj.__class__)
raise MoleculeObjectTypeError("Objects of type '%s' are not currently accepted." % mol_obj.__class__)
atm_features = defaultdict(set)
for f in perceived_features:
for atm_idx in f.GetAtomIds():
tmp_atm_idx = atm_idx
if atm_map is not None:
if atm_idx in atm_map:
tmp_atm_idx = atm_map[atm_idx]
else:
logger.warning("There is no corresponding mapping to the index '%d'. It will be ignored." % atm_idx)
feature = ChemicalFeature(f.GetFamily())
atm_features[tmp_atm_idx].add(feature)
return atm_features
[docs] def get_features_by_groups(self, mol_obj, atm_map=None):
"""Perceive chemical features from the molecule ``mol_obj`` by atom groups.
Parameters
----------
mol_obj : :class:`~luna.wrappers.base.MolWrapper`, :class:`rdkit.Chem.rdchem.Mol`, or :class:`openbabel.pybel.Molecule`
The molecule.
atm_map : dict
A dictionary to map an atom's index to a different value.
Returns
-------
grp_features : dict of {str : dict}
Chemical features by groups. Each dictionary value is defined as follows:
* ``atm_ids`` (list): list of atoms represented by their index or by a value from ``atm_map`` ;
* ``features`` (list of `ChemicalFeature`): list of chemical features.
"""
if isinstance(mol_obj, MolWrapper):
mol_obj = mol_obj.unwrap()
if isinstance(mol_obj, RDMol):
perceived_features = self.feature_factory.GetFeaturesForMol(mol_obj)
elif isinstance(mol_obj, OBMol):
perceived_features = self._get_features_from_obmol(mol_obj)
elif isinstance(mol_obj, PybelMol):
perceived_features = self._get_features_from_obmol(mol_obj.OBMol)
else:
logger.exception("Objects of type '%s' are not currently accepted." % mol_obj.__class__)
raise MoleculeObjectTypeError("Objects of type '%s' are not currently accepted." % mol_obj.__class__)
grp_features = {}
for f in perceived_features:
atm_ids = sorted(list(f.GetAtomIds()))
if atm_map is not None:
tmp_atm_ids = []
for atm_id in atm_ids:
if atm_id in atm_map:
tmp_atm_ids.append(atm_map[atm_id])
else:
logger.warning("There is no corresponding mapping to the index '%d'. It will be ignored." % atm_id)
atm_ids = tmp_atm_ids
key = ','.join([str(x) for x in atm_ids])
if key in grp_features:
grp_obj = grp_features[key]
else:
grp_obj = {"atm_ids": atm_ids, "features": []}
features = set(grp_obj["features"])
feature = ChemicalFeature(f.GetFamily())
features.add(feature)
grp_obj["features"] = list(features)
grp_features[key] = grp_obj
return grp_features
def _get_features_from_obmol(self, ob_mol):
grp_features = defaultdict(set)
for key, smarts in self.feature_factory.GetFeatureDefs().items():
grp_type = key.split(".")[0]
ob_smart = OBSmartsPattern()
ob_smart.Init(str(smarts))
ob_smart.Match(ob_mol)
matches = [x for x in ob_smart.GetMapList()]
if matches:
for match in matches:
cur_ids = set(match)
exists = False
remove_ids = []
for ids in grp_features[grp_type]:
ids = set(ids)
# If there is any other group of the same type that already contains the current atoms.
if cur_ids.issubset(ids):
exists = True
# It just need to find one bigger group.
break
# If the current group contains atoms from already added group atoms.
elif ids.issubset(cur_ids):
exists = True
# Find all smaller groups to remove them.
remove_ids.append(tuple(ids))
if exists is False:
grp_features[grp_type].add(tuple(cur_ids))
elif remove_ids:
for ids in remove_ids:
grp_features[grp_type].remove(ids)
grp_features[grp_type].add(tuple(cur_ids))
return [OBMolChemicalFeature(family, atom_ids) for family in grp_features for atom_ids in grp_features[family]]