Source code for luna.mol.standardiser

from enum import Enum, auto

from luna.wrappers.base import BondType


import logging

logger = logging.getLogger()


METALS = ["Li", "Na", "K", "Rb", "Cs", "Fr", "Be", "Mg", "Ca", "Sr", "Ba", "Ra", "Sc", "Ti", "V", "Cr", "Mn", "Fe",
          "Co", "Ni", "Cu", "Zn", "Al", "Ga", "Y", "Zr", "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn", "Hf",
          "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg", "Tl", "Pb", "Bi"]
METAL_ATOM = "[%s]" % ",".join(METALS)


[docs]class ResidueStandard(Enum): """An enumeration of protonation states for standard residues.""" # TODO: add other residues to the standard # HID as in Amber: histidine with hydrogen on the delta nitrogen. HID = auto() # HIE as in Amber: histidine with hydrogen on the epsilon nitrogen. HIE = auto() # HIP as in Amber: histidine with hydrogens on both nitrogens; this is positively charged. HIP = auto() # CYS as in Amber: protonated cysteine CYS = auto() # CYM as in Amber: deprotonated cysteine. CYM = auto()
# TODO: create a new MetalStandardiser or LigandStandardiser for common ligands.
[docs]class ResiduesStandardiser: """Standardize residues. Parameters ---------- break_metal_bonds : bool If True, break covalent bonds with metals and correct the topology of the involved atoms. his_type : {:class:`~ResidueStandard.HID`, :class:`~ResidueStandard.HIE`, :class:`~ResidueStandard.HIP`} Define which histidine protonation state to use. Currently, this option is still not been used. """ # TODO: create filters for specific patterns like HIS tautomers or CYS:S- vs CYS:SH # TODO: some residues still have no WARNING for unexpected atoms bound to the atoms N, O, S. # TODO: I need to verify for HIS tautomers and metals around it, which will influenciate on the Hydrogen placement. # TODO: Metals will influentiate where the Hydrogen should be placed in TYR. def __init__(self, break_metal_bonds=False, his_type=ResidueStandard.HIE): self.break_metal_bonds = break_metal_bonds # TODO: not being used self.his_type = his_type
[docs] def standardise(self, atom_pairs): """Standardize residues. Parameters ---------- atom_pairs : iterable of tuple of (:class:`~luna.wrappers.base.MolWrapper`, :class:`~luna.MyBio.PDB.Atom.Atom`) The atoms to standardise. """ pdb_map = {atm_obj.get_idx(): pdb_atm for atm_obj, pdb_atm in atom_pairs} self.found_metals = {} self.removed_atoms = [] for atm_obj, pdb_atm in atom_pairs: # Atom: N # Sanity check for all N nitrogens with invalid bond types. # # E.g.: 3QQL:A:GLY:11. if pdb_atm.name == "N": # Non-N-terminal N where the carbonyl O is involved in a covalent bond with an atom not comprised # by the metal rules. So, it's better not to update anything here. if atm_obj.matches_smarts(f"N-,=[C;X4,X3]([#6])[OX2;$(O([C;X4,X3])[!#1]);!$(O{METAL_ATOM})]"): logger.debug("While checking for inconsistencies in the atom N of the residue %s, it was found an unexpected " "atom covalently bound to the neighboring atom O. So, N will not be amended." % pdb_atm.parent) continue # Non-N-terminal N where the carbonyl O is involved in a covalent bond with a metal. # However, 'break_metal_bonds' was set to False and there isn't anything to be done so. elif atm_obj.matches_smarts(f"N-,=[C;X4,X3]([#6])[OX2]{METAL_ATOM}") and self.break_metal_bonds is False: logger.debug("While checking for inconsistencies in the atom N of the residue %s, it was found a metal " "covalently bound to its atom O. However, nothing will be done because 'break_metal_bonds' " "was set to False." % pdb_atm.parent) continue # Any non-N-terminal N not from PRO. if (pdb_atm.parent.resname != "PRO" and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 3 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 1 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=1) # Any N-terminal N not from PRO. elif (pdb_atm.parent.resname != "PRO" and atm_obj.get_neighbors_number(True) == 1 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 1 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 3 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=1, implicit_h_count=3) # Non-N-terminal N from PRO. elif (pdb_atm.parent.resname == "PRO" and atm_obj.get_neighbors_number(True) == 3 and (atm_obj.get_valence() != 3 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 0 or atm_obj.is_in_ring() is False or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=0, in_ring=True) # N-terminal N from PRO. elif (pdb_atm.parent.resname == "PRO" and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 1 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 2 or atm_obj.is_in_ring() is False or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=1, implicit_h_count=2, in_ring=True) # Atom: C. # Sanity check for all C carbons with invalid bond types. elif pdb_atm.name == "C": fix_atom = False # If it is bonded to 3 heavy atoms. if atm_obj.get_neighbors_number(True) == 3: # Main chain carbonyl O involved in a metallic coordination. # # It identifies only the form generated by Open Babel, where the double bond between O and C becomes # a single bond, and it adds an additional single bond with a Metal. # # Note that if the list of atoms is sorted in a way that O comes first than C in this loop, then the # double bond would have already been amended and the bond with the metal would have already been removed. # # E.g.: 6JWU:A:GLU:42. # 1DKA:A:PRO:99. # if atm_obj.matches_smarts(f"[C;X4,X3]([$([OX2]{METAL_ATOM})])([#6])(-,=[N,O])"): # If it is necessary to break covalent bonds with metals. if self.break_metal_bonds: fix_atom = True else: logger.debug("While checking for inconsistencies in the atom C of the residue %s, it was found a metal " "covalently bound to its atom O. However, nothing will be done because 'break_metal_bonds' " "was set to False." % pdb_atm.parent) # If the neighboring oxygen is bound to something else not comprised in the previous rule, it is better not to # update anything. Otherwise, fix the C. elif (atm_obj.matches_smarts(f"[C;X4,X3]([OX2;$(O([C;X4,X3])[!#1]);!$(O{METAL_ATOM})])([#6])(-,=[N,O])") is False and atm_obj.matches_smarts("[CX3](=[OX1])([#6])[NX3]") is False and atm_obj.matches_smarts("[CX3](=[OX1])([#6])[O;H1,H0&-1]") is False): fix_atom = True # Alert for unexpected atoms bound to the oxygen O. elif atm_obj.matches_smarts(f"[C;X4,X3]([OX2;$(O([C;X4,X3])[!#1]);!$(O{METAL_ATOM})])([#6])(-,=[N,O])"): logger.debug("While checking for inconsistencies in the atom C of the residue %s, it was found an unexpected " "atom covalently bound to its atom O. So, C will not be amended." % pdb_atm.parent) if fix_atom: bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map and pdb_map[partner_obj.get_idx()].name == "O": bond_types.append((bond_obj, BondType.DOUBLE)) else: bond_types.append((bond_obj, BondType.SINGLE)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=0) # If it is bonded only to 2 heavy atoms, then it is a C-terminal with a missing OXT or there are missing residues. elif atm_obj.get_neighbors_number(True) == 2: logger.debug("The residue %s seems not to have any successor residue. " "It may be the last in the chain sequence or there are missing residues." % pdb_atm.parent) # Main chain carbonyl O involved in a metallic coordination. # # It identifies only the form generated by Open Babel, where the double bond between O and C becomes # a single bond, and it adds an additional single bond with a Metal. # # Note that if the list of atoms is sorted in a way that O comes first than C in this loop, then the # double bond would have already been amended and the bond with the metal would have already been removed. # # E.g.: 6JWU:A:GLU:42. # 1DKA:A:THR:98. # if atm_obj.matches_smarts(f"[CX4]([$([OX2]{METAL_ATOM})])([#6])"): # If it is necessary to break covalent bonds with metals. if self.break_metal_bonds: fix_atom = True else: logger.debug("While checking for inconsistencies in the atom C of the residue %s, it was found a metal " "covalently bound to its atom O. However, nothing will be done because 'break_metal_bonds' " "was set to False." % pdb_atm.parent) # If the neighboring oxygen is bound to something else not comprised in the previous rule, it is better not to # update anything. Otherwise, fix the C. elif (atm_obj.matches_smarts(f"[CX4]([OX2;$(O([CX4])[!#1]);!$(O{METAL_ATOM})])([#6])") is False and atm_obj.matches_smarts("[CX3](=[OX1])([#6])") is False): fix_atom = True # Alert for unexpected atoms bound to the oxygen O. elif atm_obj.matches_smarts(f"[CX4]([OX2;$(O([CX4])[!#1]);!$(O{METAL_ATOM})])([#6])"): logger.debug("While checking for inconsistencies in the atom C of the residue %s, it was found an unexpected " "atom covalently bound to its atom O. So, C will not be amended." % pdb_atm.parent) if fix_atom: bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map and pdb_map[partner_obj.get_idx()].name == "O": bond_types.append((bond_obj, BondType.DOUBLE)) else: bond_types.append((bond_obj, BondType.SINGLE)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=1) # Atom: CA # Sanity check for all CA with invalid bond types. # E.g.: 3QQL:A:GLY:11. elif pdb_atm.name == "CA": # Any CA not from GLY/PRO. if (pdb_atm.parent.resname not in ["GLY", "PRO"] and atm_obj.get_neighbors_number(True) == 3 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 1 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=1) elif (pdb_atm.parent.resname == "PRO" and atm_obj.get_neighbors_number(True) == 3 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 1 or atm_obj.is_in_ring() is False or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=1, in_ring=True) # CA from GLY. elif (pdb_atm.parent.resname == "GLY" and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 2 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=2) # Atom: CB # Sanity check for all CB carbons with invalid bond types. # E.g.: 1THA:B:GLN:63. elif pdb_atm.name == "CB": # Any CB not from ALA, ILE, PRO, THR, VAL. if (pdb_atm.parent.resname not in ["ALA", "ILE", "PRO", "THR", "VAL"] and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 2 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=2) # CB from ILE, THR, VAL. elif (pdb_atm.parent.resname in ["ILE", "THR", "VAL"] and atm_obj.get_neighbors_number(True) == 3 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 1 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=1) # CB from ALA. elif (pdb_atm.parent.resname == "ALA" and atm_obj.get_neighbors_number(True) == 1 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 3 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=3) # CB from PRO. elif (pdb_atm.parent.resname == "PRO" and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 2 or atm_obj.is_in_ring() is False or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=2, in_ring=True) # Atom: CG # Sanity check for all CG carbons with invalid bond types. elif pdb_atm.name == "CG": # E.g.: 1THA:B:GLN:63, 6COD:A:LYS:121. if (pdb_atm.parent.resname in ["ARG", "GLN", "GLU", "LYS", "MET"] and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 2 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=2) elif (pdb_atm.parent.resname == "PRO" and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 2 or atm_obj.is_in_ring() is False or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=2, in_ring=True) elif (pdb_atm.parent.resname in ["ASN", "ASP"] and atm_obj.get_neighbors_number(True) == 3 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 0 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): fix_atom = False # ASN/ASP with metallic coordination perceived as covalent bond. # # It identifies only the form generated by Open Babel, where the double bond between OD1 and CG becomes # a single bond, and it adds an additional single bond with a Metal. # # Note that if the list of atoms is sorted in a way that OD1 comes first than CG in this loop, then the # double bond would have already been amended and the bond with the metal would have already been removed. # # E.g.: 6JWU:B:ASP:210, 4FVR:A:ASN:678 # if (atm_obj.matches_smarts(f"[CX4](N)([#6])[OX2]{METAL_ATOM}") or atm_obj.matches_smarts(f"[C;X4,X3](-,=[O])([#6])[OX2]({METAL_ATOM})")): # If it is necessary to break covalent bonds with metals. if self.break_metal_bonds: fix_atom = True else: logger.debug("While checking for inconsistencies in the atom CG of the residue %s, it was found a metal " "covalently bound to its atom OD1. However, nothing will be done because 'break_metal_bonds' " "was set to False." % pdb_atm.parent) # If the neighboring oxygen is bound to something else not comprised in the previous rule, it is better not to # update anything. Otherwise, fix the CG. elif (atm_obj.matches_smarts("[CX4](N)([#6])[OX2][!#1]") is False and atm_obj.matches_smarts("[C;X4,X3](-,=[O])([#6])[OX2]([!#1])") is False): fix_atom = True # Alert for unexpected atoms bound to the neighboring nitrogen/oxygen. elif atm_obj.matches_smarts("[C;X4,X3](-,=[N,O])([#6])[OX2][!#1]"): logger.debug("While checking for inconsistencies in the atom CG of the residue %s, it was found an unexpected " "atom covalently bound to a neighboring nitrogen/oxygen. So, CG will not be amended." % pdb_atm.parent) if fix_atom: bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map: # Redundant rules: OD1 and OD2 section already fixes the bonds with CG. if partner_obj.get_idx() in pdb_map and pdb_map[partner_obj.get_idx()].name == "OD1": # If the OD1 atom is bound to the below metals, update the CG-OD1 bond to CG=OD1. if partner_obj.matches_smarts(f"[OX2]({METAL_ATOM})"): bond_types.append((bond_obj, BondType.DOUBLE)) else: bond_types.append((bond_obj, BondType.SINGLE)) # Redundant rules: OD1 and OD2 section already fixes the bonds with CG. elif partner_obj.get_idx() in pdb_map and pdb_map[partner_obj.get_idx()].name == "OD2": # If the second oxygen (OD1) is bound to some atom not comprised by the expected metals, but OD2 is, # then update the CG-OD2 bond to CG=OD2. if partner_obj.matches_smarts(f"[OX2]({METAL_ATOM})[CX4]([#6])" f"[OX2;$(O([CX4])[!#1]);!$(O{METAL_ATOM})]"): bond_types.append((bond_obj, BondType.DOUBLE)) else: bond_types.append((bond_obj, BondType.SINGLE)) # Any other atom bound to CG else: bond_types.append((bond_obj, BondType.SINGLE)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=0) elif (pdb_atm.parent.resname == "LEU" and atm_obj.get_neighbors_number(True) == 3 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 1 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=1) elif (pdb_atm.parent.resname in ["HIS", "PHE", "TRP", "TYR"] and atm_obj.get_neighbors_number(True) == 3 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 0 or atm_obj.is_aromatic() is False)): bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map: # Single bonds with CB (all) if pdb_map[partner_obj.get_idx()].name == "CB": bond_types.append((bond_obj, BondType.SINGLE)) # Single bond with ND1 (only in HIS) elif pdb_map[partner_obj.get_idx()].name == "ND1": bond_types.append((bond_obj, BondType.SINGLE, True)) # Double bonds with CD1 (all, except HIS) elif pdb_map[partner_obj.get_idx()].name == "CD1": bond_types.append((bond_obj, BondType.DOUBLE, True)) # Bonds with CD2 elif pdb_map[partner_obj.get_idx()].name == "CD2": # Double bond as in HIS. if pdb_atm.parent.resname == "HIS": bond_types.append((bond_obj, BondType.DOUBLE, True)) # Single bonds (all, except HIS) else: bond_types.append((bond_obj, BondType.SINGLE, True)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=0, is_aromatic=True) # Atom: CG1 # Sanity check for all CG1 carbons with invalid bond types. elif pdb_atm.name == "CG1": # CG1 from ILE. if (pdb_atm.parent.resname == "ILE" and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 2 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=2) # CG1 from VAL. elif (pdb_atm.parent.resname == "VAL" and atm_obj.get_neighbors_number(True) == 1 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 3 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=3) # Atom: CG2 # Sanity check for all CG2 carbons with invalid bond types. elif (pdb_atm.name == "CG2" and atm_obj.get_neighbors_number(True) == 1 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 3 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=3) # Atom: CD # Sanity check for all CD carbons with invalid bond types. elif pdb_atm.name == "CD": if (pdb_atm.parent.resname in ["ARG", "LYS"] and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 2 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=2) # E.g.: 4FVR:A:PRO:792 elif (pdb_atm.parent.resname == "PRO" and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 2 or atm_obj.is_in_ring() is False or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=2, in_ring=True) # E.g.: 1THA:B:GLN:63 # E.g.: 6JWU:A:GLU:42 containing metallic cordination. elif (pdb_atm.parent.resname in ["GLN", "GLU"] and atm_obj.get_neighbors_number(True) == 3 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 0 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): fix_atom = False # GLN/GLU with metallic coordination perceived as covalent bond. # # It identifies only the form generated by Open Babel, where the double bond between OE1 and CD becomes # a single bond, and it adds an additional single bond with a Metal. # # Note that if the list of atoms is sorted in a way that OE1 comes first than CD in this loop, then the # double bond would have already been amended and the bond with the metal would have already been removed. # if (atm_obj.matches_smarts(f"[CX4](N)([#6])[OX2]{METAL_ATOM}") or atm_obj.matches_smarts(f"[C;X4,X3](-,=[O])([#6])[OX2]({METAL_ATOM})")): # If it is necessary to break covalent bonds with metals. if self.break_metal_bonds: fix_atom = True else: logger.debug("While checking for inconsistencies in the atom CD of the residue %s, it was found a metal " "covalently bound to its atom OE1. However, nothing will be done because 'break_metal_bonds' " "was set to False." % pdb_atm.parent) # If the neighboring oxygen is bound to something else not comprised in the previous rule, it is better not to # update anything. Otherwise, fix the CD. elif (atm_obj.matches_smarts("[CX4](N)([#6])[OX2][!#1]") is False and atm_obj.matches_smarts("[C;X4,X3](-,=[O])([#6])[OX2][!#1]") is False): fix_atom = True # Alert for unexpected atoms bound to the neighboring nitrogen/oxygen. elif atm_obj.matches_smarts("[C;X4,X3](-,=[N,O])([#6])[OX2][!#1]"): logger.debug("While checking for inconsistencies in the atom CD of the residue %s, it was found an unexpected " "atom covalently bound to a neighboring nitrogen/oxygen. So, CD will not be amended." % pdb_atm.parent) if fix_atom: bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map: # Redundant rules: OE1 and OE2 section already fixes the bonds with CD. if partner_obj.get_idx() in pdb_map and pdb_map[partner_obj.get_idx()].name == "OE1": # If the OE1 atom is bound to the below metals, update the CD-OE1 bond to CD=OE1. if partner_obj.matches_smarts(f"[OX2]({METAL_ATOM})"): bond_types.append((bond_obj, BondType.DOUBLE)) else: bond_types.append((bond_obj, BondType.SINGLE)) # Redundant rules: OE1 and OE2 section already fixes the bonds with CD. elif partner_obj.get_idx() in pdb_map and pdb_map[partner_obj.get_idx()].name == "OE2": # If the second oxygen (OE1) is bound to some atom not comprised by the expected metals, but OE2 is, # then update the CD-OE2 bond to CD=OE2. if partner_obj.matches_smarts(f"[OX2]({METAL_ATOM})[CX4]([#6])" f"[OX2;$(O([CX4])[!#1]);!$(O{METAL_ATOM})]"): bond_types.append((bond_obj, BondType.DOUBLE)) else: bond_types.append((bond_obj, BondType.SINGLE)) # Any other atom bound to CD else: bond_types.append((bond_obj, BondType.SINGLE)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=0) # Atom: CD1 # Sanity check for all CD1 carbons with invalid bond types. elif pdb_atm.name == "CD1": if (pdb_atm.parent.resname in ["ILE", "LEU"] and atm_obj.get_neighbors_number(True) == 1 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 3 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=3) elif (pdb_atm.parent.resname in ["PHE", "TRP", "TYR"] and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 1 or atm_obj.is_aromatic() is False)): bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map: # Double bonds with CG if pdb_map[partner_obj.get_idx()].name == "CG": bond_types.append((bond_obj, BondType.DOUBLE, True)) # Single bonds with CE1 and NE1 else: bond_types.append((bond_obj, BondType.SINGLE, True)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=1, is_aromatic=True) # Atom: CD2 # Sanity check for all CD2 carbons with invalid bond types. elif pdb_atm.name == "CD2": if (pdb_atm.parent.resname == "LEU" and atm_obj.get_neighbors_number(True) == 1 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 3 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=3) elif (pdb_atm.parent.resname in ["HIS", "PHE", "TYR"] and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 1 or atm_obj.is_aromatic() is False)): bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map: # Bonds with CG if pdb_map[partner_obj.get_idx()].name == "CG": # Double bond with CG (only in HIS) if pdb_atm.parent.resname == "HIS": bond_types.append((bond_obj, BondType.DOUBLE, True)) # Single bonds with CG (all, except HIS) else: bond_types.append((bond_obj, BondType.SINGLE, True)) # Double bonds with CE2 (all, except HIS) elif pdb_map[partner_obj.get_idx()].name == "CE2": bond_types.append((bond_obj, BondType.DOUBLE, True)) # Single bonds with NE2 (only HIS) elif pdb_map[partner_obj.get_idx()].name == "NE2": bond_types.append((bond_obj, BondType.SINGLE, True)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=1, is_aromatic=True) elif (pdb_atm.parent.resname == "TRP" and atm_obj.get_neighbors_number(True) == 3 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 0 or atm_obj.is_aromatic() is False)): bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map: # Double bonds with CE2 if pdb_map[partner_obj.get_idx()].name == "CE2": bond_types.append((bond_obj, BondType.DOUBLE, True)) # Single bonds with CG and CE3 else: bond_types.append((bond_obj, BondType.SINGLE, True)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=0, is_aromatic=True) # Atom: CE # Sanity check for all CE carbons with invalid bond types. elif pdb_atm.name == "CE": # CE from LYS. if (pdb_atm.parent.resname == "LYS" and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 2 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=2) # CE from MET. elif (pdb_atm.parent.resname == "MET" and atm_obj.get_neighbors_number(True) == 1 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 3 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=3) # Atom: CE1 # Sanity check for all CE1 carbons with invalid bond types. elif pdb_atm.name == "CE1": if (pdb_atm.parent.resname in ["HIS", "PHE", "TYR"] and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 1 or atm_obj.is_aromatic() is False)): bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map: # Single bonds with CD1 (all, except HYS) if pdb_map[partner_obj.get_idx()].name == "CD1": bond_types.append((bond_obj, BondType.SINGLE, True)) # Double bonds with CZ (all, except HYS) elif pdb_map[partner_obj.get_idx()].name == "CZ": bond_types.append((bond_obj, BondType.DOUBLE, True)) # Double bonds with ND1 (only HYS) elif pdb_map[partner_obj.get_idx()].name == "ND1": bond_types.append((bond_obj, BondType.DOUBLE, True)) # Single bonds with NE2 (only HYS) elif pdb_map[partner_obj.get_idx()].name == "NE2": # CE1 - NE2 bond_types.append((bond_obj, BondType.SINGLE, True)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=1, is_aromatic=True) # Atom: CE2 # Sanity check for all CE2 carbons with invalid bond types. elif pdb_atm.name == "CE2": if (pdb_atm.parent.resname in ["PHE", "TYR"] and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 1 or atm_obj.is_aromatic() is False)): bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map: # Double bonds with CD2 if pdb_map[partner_obj.get_idx()].name == "CD2": bond_types.append((bond_obj, BondType.DOUBLE, True)) # Single bonds with CZ elif pdb_map[partner_obj.get_idx()].name == "CZ": bond_types.append((bond_obj, BondType.SINGLE, True)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=1, is_aromatic=True) elif (pdb_atm.parent.resname == "TRP" and atm_obj.get_neighbors_number(True) == 3 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 0 or atm_obj.is_aromatic() is False)): bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map: # Double bond with CD2 if pdb_map[partner_obj.get_idx()].name == "CD2": bond_types.append((bond_obj, BondType.DOUBLE, True)) # Single bonds with NE1 and CZ2 else: bond_types.append((bond_obj, BondType.SINGLE, True)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=0, is_aromatic=True) # Atom: CZ # Sanity check for all CZ carbons with invalid bond types. elif pdb_atm.name == "CZ": if (pdb_atm.parent.resname == "PHE" and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 1 or atm_obj.is_aromatic() is False)): bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map: # Double bond with CE1 if pdb_map[partner_obj.get_idx()].name == "CE1": bond_types.append((bond_obj, BondType.DOUBLE, True)) # Single bond with CE2 else: bond_types.append((bond_obj, BondType.SINGLE, True)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=1, is_aromatic=True) elif (pdb_atm.parent.resname == "TYR" and atm_obj.get_neighbors_number(True) == 3 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 0 or atm_obj.is_aromatic() is False)): bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map: # Double bond with CE1 if pdb_map[partner_obj.get_idx()].name == "CE1": bond_types.append((bond_obj, BondType.DOUBLE, True)) # Single bond with CE2 elif pdb_map[partner_obj.get_idx()].name == "CE2": bond_types.append((bond_obj, BondType.SINGLE, True)) # Single bond with OH else: bond_types.append((bond_obj, BondType.SINGLE)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=0, is_aromatic=True) elif (pdb_atm.parent.resname == "ARG" and atm_obj.get_neighbors_number(True) == 3 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 0 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map: # Double bond with NH2 if pdb_map[partner_obj.get_idx()].name == "NH2": bond_types.append((bond_obj, BondType.DOUBLE)) # Single bonds with NE and NH1 else: bond_types.append((bond_obj, BondType.SINGLE)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=0) # Atom: CE3, CZ2, CZ3, CH2 # Sanity check for TRP carbons with invalid bond types. elif (pdb_atm.name in ["CE3", "CZ2", "CZ3", "CH2"] and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 1 or atm_obj.is_aromatic() is False)): bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map: # Double bonds if ((pdb_atm.name == "CE3" and pdb_map[partner_obj.get_idx()].name == "CZ3") or (pdb_atm.name == "CZ3" and pdb_map[partner_obj.get_idx()].name == "CE3") or (pdb_atm.name == "CZ2" and pdb_map[partner_obj.get_idx()].name == "CH2") or (pdb_atm.name == "CH2" and pdb_map[partner_obj.get_idx()].name == "CZ2")): bond_types.append((bond_obj, BondType.DOUBLE, True)) # Single bonds else: bond_types.append((bond_obj, BondType.SINGLE, True)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=1, is_aromatic=True) # Atom: ND1 # Sanity check for HIS:ND1 with invalid bond types. elif pdb_atm.name == "ND1": fix_atom = False # HIS with metallic coordination perceived as covalent bond. # # Sometimes, the aromatic ring in HIS becomes a simple ring, therefore we should check for any # type of nitrogen in a ring, i.e, aromatic or aliphatic. Note that the aromatic ring # will be fixed after removing the bond with the metal. # # E.g.: 1USN:A:HIS:179 # 1IUZ:A:HIS:37 # if atm_obj.matches_smarts(f"[#7;R]-{METAL_ATOM}"): # If it is necessary to break covalent bonds with metals. if self.break_metal_bonds: self._remove_metallic_bond(atm_obj) fix_atom = True else: logger.debug("While checking for inconsistencies in the atom ND1 of the residue %s, it was found a metal " "covalently bound to it. However, nothing will be done because 'break_metal_bonds' " "was set to False." % pdb_atm.parent) # If the nitrogen is bound to something else not comprised in the previous rule, it is better not to # update anything. Otherwise, fix ND1. elif (atm_obj.matches_smarts("[#7;R]([#6])([#6])[!#1]") is False and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 3 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 2 or atm_obj.get_h_count() != 0 or atm_obj.is_aromatic() is False)): fix_atom = True # Alert for unexpected atoms bound to the nitrogen ND1. elif atm_obj.matches_smarts("[#7;R]([#6])([#6])[!#1]"): logger.debug("While checking for inconsistencies in the atom ND1 of the residue %s, it was found an unexpected " "atom covalently bound to it. So, ND1 will not be amended." % pdb_atm.parent) if fix_atom: bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map: # Double bond with CE1 if pdb_map[partner_obj.get_idx()].name == "CE1": bond_types.append((bond_obj, BondType.DOUBLE, True)) # Single bonds with CG else: bond_types.append((bond_obj, BondType.SINGLE, True)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=0, is_aromatic=True) elif (pdb_atm.name == "ND1" and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 3 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 2 or atm_obj.get_h_count() != 0 or atm_obj.is_aromatic() is False)): bond_types = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_idx() in pdb_map: # Double bond with CE1 if pdb_map[partner_obj.get_idx()].name == "CE1": bond_types.append((bond_obj, BondType.DOUBLE, True)) # Single bonds with CG else: bond_types.append((bond_obj, BondType.SINGLE, True)) self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=0, is_aromatic=True) # Atom: ND2 # Sanity check for ASN:ND2 with invalid bond types. elif (pdb_atm.name == "ND2" and atm_obj.get_neighbors_number(True) == 1 and (atm_obj.get_valence() != 3 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 2 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): # ASN with metallic coordination perceived as covalent bond. # # It identifies two forms: # - the firt consists of the usual form generated by Open Babel, where the double bond between # OD1 and CG becomes a single bond, and it adds an additional single bond with a Metal. # # - the second form may appear after correcting CG first, so the missing double bond would have already been # fixed. # # E.g.: 4FVR:A:ASN:678 # if atm_obj.matches_smarts(f"N[C;X4,X3]([#6])-,=[OX2]{METAL_ATOM}"): # If it is necessary to break covalent bonds with metals. if self.break_metal_bonds: self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=2) else: logger.debug("While checking for inconsistencies in the atom ND2 of the residue %s, it was found a metal " "covalently bound to the atom OD1. However, nothing will be done because 'break_metal_bonds' " "was set to False." % pdb_atm.parent) # If the oxygen is bound to something else not comprised in the previous rules, it is better not to update anything. # Otherwise, fix the ND2. elif atm_obj.matches_smarts("N[CX4]([#6])[OX2][!#1]") is False: self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=2) # Alert for unexpected atoms bound to the oxygen OD1. elif atm_obj.matches_smarts("N[CX4]([#6])[OX2][!#1]"): logger.debug("While checking for inconsistencies in the atom ND2 of the residue %s, it was found an unexpected " "atom covalently bound to the oxygen OD1. So, ND2 will not be amended." % pdb_atm.parent) # Atom: NE # Sanity check for ARG:NE with invalid bond types. elif (pdb_atm.name == "NE" and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 3 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 1 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=1) # Atom: NE1 # Sanity check for TRP:NE1 with invalid bond types. elif (pdb_atm.name == "NE1" and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 3 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 1 or atm_obj.is_aromatic() is False)): bond_types = [(bond_obj, BondType.SINGLE, True) for bond_obj in atm_obj.get_bonds()] self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=1, is_aromatic=True) # Atom: NE2 # Sanity check for NE2 nitrogens with invalid bond types. elif pdb_atm.name == "NE2": if pdb_atm.parent.resname == "HIS": # HIS with metallic coordination perceived as covalent bond. # # Sometimes, the aromatic ring in HIS becomes a simple ring, therefore we check for any # type of nitrogen in a ring, i.e, aromatic or aliphatic. Note that the aromatic ring # will be fixed after removing the bond with the metal. # # E.g.: 1USN:A:HIS:151 # 1USN:A:HIS:166 if atm_obj.matches_smarts(f"[#7;R]{METAL_ATOM}"): # If it is necessary to break covalent bonds with metals. if self.break_metal_bonds: self._remove_metallic_bond(atm_obj) bond_types = [(bond_obj, BondType.SINGLE, True) for bond_obj in atm_obj.get_bonds()] self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=1, is_aromatic=True) else: logger.debug("While checking for inconsistencies in the atom NE2 of the residue %s, it was found a metal " "covalently bound to it. However, nothing will be done because 'break_metal_bonds' " "was set to False." % pdb_atm.parent) # If the nitrogen is bound to something else not comprised in the previous rule, it is better not to # update anything. Otherwise, fix NE2. elif (atm_obj.matches_smarts("[#7;R]([#6])([#6])[!#1]") is False and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 3 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 1 or atm_obj.is_aromatic() is False)): bond_types = [(bond_obj, BondType.SINGLE, True) for bond_obj in atm_obj.get_bonds()] self._fix_atom(atm_obj, bond_types=bond_types, charge=0, implicit_h_count=1, is_aromatic=True) # Alert for unexpected atoms bound to the nitrogen NE2. elif atm_obj.matches_smarts("[#7;R]([#6])([#6])[!#1]"): logger.debug("While checking for inconsistencies in the atom NE2 of the residue %s, it was found an unexpected " "atom covalently bound to it. So, NE2 will not be amended." % pdb_atm.parent) elif (pdb_atm.parent.resname == "GLN" and atm_obj.get_neighbors_number(True) == 1 and (atm_obj.get_valence() != 3 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 2 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): # GLN with metallic coordination perceived as covalent bond. # # It identifies two forms: # - the firt consists of the usual form generated by Open Babel, where the double bond between # OE1 and CD becomes a single bond, and it adds an additional single bond with a Metal. # # - the second form may appear after correcting CD first, so the missing double bond would have already been # fixed. # if atm_obj.matches_smarts(f"N[C;X4,X3]([#6])-,=[OX2]{METAL_ATOM}"): # If it is necessary to break covalent bonds with metals. if self.break_metal_bonds: self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=2) else: logger.debug("While checking for inconsistencies in the atom NE2 of the residue %s, it was found a metal " "covalently bound to the atom OE1. However, nothing will be done because 'break_metal_bonds' " "was set to False." % pdb_atm.parent) # If the oxygen is bound to something else not comprised in the previous rules, it is better not to update anything. # Otherwise, fix the NE2. elif atm_obj.matches_smarts("N[CX4]([#6])[OX2][!#1]") is False: self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=2) # Alert for unexpected atoms bound to the oxygen OE1. elif atm_obj.matches_smarts("N[CX4]([#6])[OX2][!#1]"): logger.debug("While checking for inconsistencies in the atom NE2 of the residue %s, it was found an unexpected " "atom covalently bound to the oxygen OE1. So, NE2 will not be amended." % pdb_atm.parent) # Atom: NZ # Sanity check for LYS:NZ with invalid bond types. elif (pdb_atm.name == "NZ" and atm_obj.get_neighbors_number(True) == 1 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 1 or atm_obj.get_degree() != 4 or atm_obj.get_h_count() != 3 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=1, implicit_h_count=3) # Atom: NH1 # Sanity check for ARG:NH1 with invalid bond types. elif (pdb_atm.name == "NH1" and atm_obj.get_neighbors_number(True) == 1 and (atm_obj.get_valence() != 3 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 2 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=2) # Atom: NH2 # Sanity check for ARG:NH2 with invalid bond types. elif (pdb_atm.name == "NH2" and atm_obj.get_neighbors_number(True) == 1 and (atm_obj.get_valence() != 4 or atm_obj.get_charge() != 1 or atm_obj.get_degree() != 3 or atm_obj.get_h_count() != 2 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.DOUBLE], charge=1, implicit_h_count=2) # Atom: O, OD1, and OE1 # Sanity check for OD1/OE1 oxygens with invalid bond types. elif pdb_atm.name in ["O", "OD1", "OE1"]: # Any O oxygen or ASN/ASP/GLN/GLU OD1/OE1 with metallic coordination perceived as covalent bond. # # It identifies two forms: # - the firt consists of the usual form generated by Open Babel where the double bonds between O and C, # OD1 and CG, or OE1 and CD become single bonds, while it adds an additional single bond with a Metal. # # - the second form may appear after correcting C/CG/CD first, so the missing double bond would have already been # fixed, but it makes the oxygens to have incorrect valence and degree, so we still need to fix them. # And of course, we still need to remove the bond with the metal. # # E.g.: 6JWU:A:GLU:42, 6JWU:B:ASP:210, 4FVR:A:ASN:678. # if (atm_obj.matches_smarts(f"[OX2]({METAL_ATOM})-,=[C;X4,X3]([#6])(-,=[N])") or atm_obj.matches_smarts(f"[OX2]({METAL_ATOM})-,=[C;X4,X3]([#6])(-,=[O])")): # If it is necessary to break covalent bonds with metals. if self.break_metal_bonds: self._remove_metallic_bond(atm_obj) self._fix_atom(atm_obj, bond_types=[BondType.DOUBLE], charge=0, implicit_h_count=0) else: logger.debug("While checking for inconsistencies in the atom %s of the residue %s, it was found a metal " "covalently bound to it. However, nothing will be done because 'break_metal_bonds' " "was set to False." % (pdb_atm.name, pdb_atm.parent)) # If the oxygen is bound to something else not comprised in the previous rules, it is better not to update anything. # Otherwise, fix the oxygen. # # Note that it doesn't check out the situation of the other oxygen (OXT or OD2/OE2 in ASP/GLU), because O/OD1/OE1 will # always have a double bond no matter the other oxygen has or not a bond with metals or other atoms. # # E.g.: 3QQL:A:GLY:11 contain incorrectly perceived bonds. # elif (atm_obj.get_neighbors_number(True) == 1 and (atm_obj.get_valence() != 2 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.DOUBLE) is False or atm_obj.get_h_count() != 0 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.DOUBLE], charge=0, implicit_h_count=0) # Alert for unexpected atoms bound to the oxygens. elif atm_obj.matches_smarts(f"[OX2;$(O([C;X4,X3])[!#1]);!$(O{METAL_ATOM})]-,=[C;X4,X3]"): logger.debug("While checking for inconsistencies in the atom %s of the residue %s, it was found an unexpected " "atom covalently bound to it. So, %s will not be amended." % (pdb_atm.name, pdb_atm.parent, pdb_atm.name)) # Atom: OXT, OD2, and OE2 # Sanity check for OXT/OD2/OE2 oxygens with invalid bond types. elif pdb_atm.name in ["OXT", "OD2", "OE2"]: # ASP/GLU or any OXT with metallic coordination perceived as covalent bond. # # It considers monodentate (only OXT/OD2/E2) and bidentate (both oxygens in ASP/GLU or main chain O and OXT) # interactions with metals. # # For the second oxygen (O/OD1/OE1), we also consider that it may appear after correcting C/CG/CD first, so the # missing double bond would have already been fixed. # # E.g.: 6JWU:B:ASP:210. # if (atm_obj.matches_smarts(f"[OX2]({METAL_ATOM})[C;X4,X3]([#6])-,=[OX2;$(O{METAL_ATOM})]") or atm_obj.matches_smarts(f"[OX2]({METAL_ATOM})[CX3]([#6])=[OX1]")): # If it is necessary to break covalent bonds with metals. if self.break_metal_bonds: self._remove_metallic_bond(atm_obj) self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=-1, implicit_h_count=0) else: logger.debug("While checking for inconsistencies in the atom %s of the residue %s, it was found a metal " "covalently bound to it. However, nothing will be done because 'break_metal_bonds' " "was set to False." % (pdb_atm.name, pdb_atm.parent)) # If the second oxygen (OD1/OE1) is bound to something else not comprised in the previous rules, # then it won't be fixed and, therefore, we should update OD2/OE2 as follows: remove the bond with # the metal and then substitute the single bond with CG/CD for a double bond. Note that OD2/OE2 # will play the role of OD1/OE1 after the update because that oxygen won't be amended. # # Note, we also consider that it may appear after correcting C/CG/CD first, so the missing double # bond would have already been fixed. elif atm_obj.matches_smarts(f"[OX2]({METAL_ATOM})-,=[C;X4,X3]([#6])[OX2;$(O([C;X4,X3])[!#1])]"): # If it is necessary to break covalent bonds with metals. if self.break_metal_bonds: self._remove_metallic_bond(atm_obj) self._fix_atom(atm_obj, bond_types=[BondType.DOUBLE], charge=0, implicit_h_count=0) else: logger.debug("While checking for inconsistencies in the atom %s of the residue %s, it was found a metal " "covalently bound to it. However, nothing will be done because 'break_metal_bonds' " "was set to False." % (pdb_atm.name, pdb_atm.parent)) # Capture cases where the second oxygen is bound to a metal atom, but not OXT/OD2/OE2. elif atm_obj.matches_smarts(f"[OX1]=[CX3]([#6])[OX2;$(O{METAL_ATOM})]"): # If it is necessary to break covalent bonds with metals. if self.break_metal_bonds: self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=-1, implicit_h_count=0) else: logger.debug("While checking for inconsistencies in the atom %s of the residue %s, it was found a metal " "covalently bound to the other carboxyl oxygen. However, nothing will be done because " "'break_metal_bonds' was set to False." % (pdb_atm.name, pdb_atm.parent)) # It fixes invalid oxygens, excluing cases where OD2/OE2 is bound to an atom not comprised in the # previous rules and cases where OD2/OE2 has a double bond with CG/CD and the second oxygen (OD1/OE1) # is bound to two atoms, one of which is an unexpected atom not considered in our standardization function elif (atm_obj.matches_smarts("[OX2]([!#1])[C;X4,X3]") is False and atm_obj.matches_smarts(f"[OX1]=[CX3]([#6])[OX2;$(O([CX3])[!#1]);!$(O{METAL_ATOM})]") is False and atm_obj.get_neighbors_number(True) == 1 and (atm_obj.get_valence() != 1 or atm_obj.get_charge() != -1 or atm_obj.get_degree() != 1 or atm_obj.get_h_count() != 0 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=-1, implicit_h_count=0) # Alert for unexpected atoms bound to the oxygens. elif atm_obj.matches_smarts("[OX2]([!#1])[C;X4,X3]"): logger.debug("While checking for inconsistencies in the atom %s of the residue %s, it was found an unexpected " "atom covalently bound to it. So, %s will not be amended." % (pdb_atm.name, pdb_atm.parent, pdb_atm.name)) # Alert for unexpected atoms bound to the oxygens. elif atm_obj.matches_smarts(f"[OX1]=[CX3]([#6])[OX2;$(O([CX3])[!#1]);!$(O{METAL_ATOM})]"): logger.debug("While checking for inconsistencies in the atom %s of the residue %s, it was found an unexpected " "atom covalently bound to the other carboxyl oxygen. So, %s was treated as a carbonyl oxygen." % (pdb_atm.name, pdb_atm.parent, pdb_atm.name)) # Atom: OG, OG1, and OH # Sanity check for OG/OG1/OH oxygens with invalid bond types. elif pdb_atm.name in ["OG", "OG1", "OH"]: # TYR/SER/THR with metallic coordination perceived as covalent bond. # # Bertini et al. 2007. Biological Inorganic Chemistry: Structure and Reactivity. # # E.g: 1TFD:A:TYR:188 # if atm_obj.matches_smarts(f"[OX2]{METAL_ATOM}"): # If it is necessary to break covalent bonds with metals. if self.break_metal_bonds: self._remove_metallic_bond(atm_obj) self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=1) else: logger.debug("While checking for inconsistencies in the atom %s of the residue %s, it was found a metal " "covalently bound to it. However, nothing will be done because 'break_metal_bonds' " "was set to False." % (pdb_atm.name, pdb_atm.parent)) # If the oxygen is bound to something else not comprised in the previous rule, it is better not to # update anything. Otherwise, fix OG/OG1/OH. elif atm_obj.get_neighbors_number(True) == 1 and (atm_obj.get_valence() != 2 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 1 or atm_obj.is_in_ring() or atm_obj.is_aromatic()): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=1) # Alert for unexpected atoms bound to the hydroxyl oxygens. elif atm_obj.matches_smarts(f"[OX2;$(O([#6])[!#1]);!$(O{METAL_ATOM})]"): logger.debug("While checking for inconsistencies in the atom %s of the residue %s, it was found an unexpected " "atom covalently bound to it. So, %s will not be amended." % (pdb_atm.name, pdb_atm.parent, pdb_atm.name)) # Atom: SD # Sanity check for MET:SD with invalid bond types. # # Although MET can be involved in the coordination of metals, due to its two single bonds, it will never be incorrectly # perceived as being covalently bound to metals. Therefore, we do not need to check for it. # elif (pdb_atm.name == "SD" and atm_obj.get_neighbors_number(True) == 2 and (atm_obj.get_valence() != 2 or atm_obj.get_charge() != 0 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.get_h_count() != 0 or atm_obj.is_in_ring() or atm_obj.is_aromatic())): self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=0) # Atom: SG # Sanity check for CYS:SG with invalid bond types. elif pdb_atm.name == "SG": # CYS with metallic coordination perceived as covalent bond. # # Harding et al. 2010. Metals in protein structures: a review of their principal features. # Bertini et al. 2007. Biological Inorganic Chemistry: Structure and Reactivity. # # E.g: 1IUZ:A:CYS:84 # if atm_obj.matches_smarts(f"[SX2]{METAL_ATOM}"): # If it is necessary to break covalent bonds with metals. if self.break_metal_bonds: self._remove_metallic_bond(atm_obj) self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=-1, implicit_h_count=0) else: logger.debug("While checking for inconsistencies in the atom SG of the residue %s, it was found a metal " "covalently bound to it. However, nothing will be done because 'break_metal_bonds' " "was set to False." % pdb_atm.parent) # If the sulfur is bound to something else not comprised in the previous rule, it is better not to # update anything. Otherwise, fix SG. elif (atm_obj.matches_smarts("[SX2](C)[!#1]") is False and (atm_obj.get_valence() != 2 or atm_obj.get_charge() != 0 or atm_obj.get_degree() != 2 or atm_obj.has_only_bond_type(BondType.SINGLE) is False or atm_obj.is_in_ring() or atm_obj.is_aromatic())): # Get the number of implicit hydrogen count based on the bonds SG has, i.e., if it is the usual cysteine, then # it has 1 implicit hydrogen, but if it establishes a disulfide bond, then it has no hydrogen. implicit_h_count = 0 if atm_obj.get_neighbors_number(True) == 2 else 1 self._fix_atom(atm_obj, bond_types=[BondType.SINGLE], charge=0, implicit_h_count=implicit_h_count) # Alert for unexpected atoms bound to the hydroxyl oxygens. elif atm_obj.matches_smarts("[SX2](C)[!#1]"): logger.debug("While checking for inconsistencies in the atom SG of the residue %s, it was found an unexpected " "atom covalently bound to it. So, SG will not be amended." % pdb_atm.parent) # Only remove metals with no bond to any residue. for metal_obj in self.found_metals.values(): if len(metal_obj.get_bonds()) == 0: self.removed_atoms.append(metal_obj.get_id()) atm_obj.parent.unwrap().DeleteAtom(metal_obj)
def _fix_atom(self, atm_obj, charge=None, remove_explict_h=True, implicit_h_count=None, bond_types=None, is_aromatic=False, in_ring=False): # Remove all current explicit hydrogens. if remove_explict_h: self._remove_explict_hydrogens(atm_obj) # Convert bond types. if bond_types is not None: self._fix_bonds(atm_obj, bond_types) # All aromatic atoms will have their in_ring property set to True. if is_aromatic: in_ring = True # Set if atom belongs to a ring or not. atm_obj.set_in_ring(in_ring) # Set if an atom is aromatic or not. atm_obj.set_as_aromatic(is_aromatic) if implicit_h_count is not None: atm_obj.unwrap().SetImplicitHCount(implicit_h_count) # Set charge. if charge is not None: atm_obj.set_charge(charge) def _remove_explict_hydrogens(self, atm_obj): delete_hs = [] for b in atm_obj.get_bonds(): if b.get_partner_atom(atm_obj).get_symbol() == "H": delete_hs.append(b.get_partner_atom(atm_obj)) for hs_obj in delete_hs: atm_obj.parent.unwrap().DeleteAtom(hs_obj.unwrap()) def _fix_bonds(self, atm_obj, bond_types): # bond_types becomes an empty list if bond_types is None. bond_types = bond_types or [] # If bond_types is an empty list, do nothing. if len(bond_types) == 0: return # All bonds will be converted to the same type. elif len(bond_types) == 1 and isinstance(bond_types[0], BondType): new_bond_type = bond_types[0] for bond_obj in atm_obj.get_bonds(): bond_obj.set_bond_type(new_bond_type) # It expects a list of tuples where the first element is the bond to be updated, the second element should be the new type, # and the third optional element is a flag to indicate if the bond is aromatic or not. else: for bond_info in bond_types: bond_obj, new_bond_type = bond_info[0:2] # Update bond type bond_obj.set_bond_type(new_bond_type) # If the tuple contains three elements, it must be a boolean to define if it is an aromatic bond or not. if len(bond_info) == 3: is_aromatic = bond_info[2] bond_obj.set_as_aromatic(is_aromatic) def _remove_metallic_bond(self, atm_obj): bonds_to_remove = [] for bond_obj in atm_obj.get_bonds(): partner_obj = bond_obj.get_partner_atom(atm_obj) if partner_obj.get_symbol() in METALS: bonds_to_remove.append((partner_obj, bond_obj)) # Remove metallic bond and mark metal to remotion. for metal_obj, bond_obj in bonds_to_remove: self.found_metals[metal_obj.get_id()] = metal_obj atm_obj.parent.unwrap().DeleteBond(bond_obj.unwrap())