
com.actelion.research.chem.MoleculeStandardizer Maven / Gradle / Ivy
package com.actelion.research.chem;
import com.actelion.research.chem.coords.CoordinateInventor;
/**
* MoleculeStandardizer
* @author Modest von Korff, Thomas Sander
* @version 1.0
* Apr 5, 2012 MvK: Start implementation
* Oct 2020 MvK,TLS: improved performance and adapted standardization according to following publication:
* Bento, A. P., Hersey, A., Félix, E., Landrum, G., Gaulton, A., Atkinson, F., ... & Leach, A. R. (2020).
* An open source chemical structure curation pipeline using RDKit. Journal of Cheminformatics, 12(1), 1-16.
* Exceptions: - S=O is not transformed into S(+)-O(-)
* - If charges (e.g. quaternary nitrogen) cannot be balanced, then Na(+) or Cl(-) are added to neutralize as last resort
*/
public class MoleculeStandardizer {
public static final int MODE_GET_PARENT = 1;
public static final int MODE_ADD_NA_AND_CL = 2;
/**
* Under normal circumstances, one should never need to standardize a molecule from an idcode,
* because molecules should be standardized before generating the canonical encoding.
* An exception is when generating the parent structure using mode MODE_GET_PARENT and
* potentially MODE_ADD_NA_AND_CL.
* @param idcode
* @param coordinates if null the result may change.
* @param mode 0 or any combination of MODE_GET_PARENT and MODE_ADD_NA_AND_CL
* @return
* @throws Exception
*/
public static StereoMolecule getStandardized(String idcode, String coordinates, int mode) throws Exception {
StereoMolecule mol = new IDCodeParser().getCompactMolecule(idcode, coordinates);
standardize(mol, mode);
return mol;
}
/**
* Standardises a molecule and fixes some structural errors.
* Typically, this is done before canonicalization.
* It includes the following changes:
* - different forms of functional groups (e.g. nitro) are normalized to a preferred one
* - charged acidic or basic atoms are (de-)protonated to remove charges and neutralize the molecule, if possible.
* - alkali/earthalkali/halogene atoms, if charged despite being covalently bound, get uncharged
* - trivalent, uncharged oxygens get a positive charge
* - unusual amide tautomeric structures, if not in a ring, are inverted
* - uncharged isocyano groups get proper charges to validate valences
* - wrongly charged azido groups get proper charges to validate valences
* - uncharged, quarternary nitrogens get a positive charge
* If mode includes MODE_GET_PARENT, then only the largest, normalized fragment is kept.
* If mode includes MODE_ADD_NA_AND_CL, then molecules, that are still charged after normalization,
* e.g. quarternary ammonium, are neutralized by adding the right amount of Na+ or Cl- ions.
* @param mol
* @param mode 0 or any combination of MODE_GET_PARENT and MODE_ADD_NA_AND_CL
* @throws Exception
*/
public static void standardize(StereoMolecule mol, int mode) throws Exception {
if((mode & MODE_GET_PARENT) != 0) {
mol.stripSmallFragments();
mol.stripIsotopInfo();
}
repairAndUnify(mol);
mol.normalizeAmbiguousBonds();
int remainingCharge = mol.canonizeCharge(true, true);
if (remainingCharge != 0)
neutralizeCharges(mol, mode, remainingCharge);
mol.validateAtomQueryFeatures();
mol.validateBondQueryFeatures();
}
/**
* Repairs wrongly uncharged quaternary nitrogen. Unifies carbonyl acid groups,
* sulfonic acid, phosphoric acid, phenolic oxygen. Means: negative charges are removed.
* Adds Na+ or Cl- for final charge equilibration.
* @param mol
*/
private static void repairAndUnify(StereoMolecule mol) {
mol.ensureHelperArrays(Molecule.cHelperRings);
repairCovalentBoundChargedAlkaliAndHalogen(mol);
chargeTrivalentOxygen(mol);
repairBadAmideTautomer(mol);
repairQuaternaryNitrogen(mol);
unifyIsoCyano(mol);
unifyAzido(mol);
}
private static void neutralizeCharges(StereoMolecule mol, int mode, int totalCharge) {
mol.ensureHelperArrays(Molecule.cHelperNeighbours);
for (int atom=0; atom0; atom++) {
if (AtomFunctionAnalyzer.isAcidicOxygen(mol, atom)) {
mol.setAtomCharge(atom, -1);
totalCharge--;
}
}
for (int atom=0; atom 0) {
int ind = mol.addAtom(17);
mol.setAtomCharge(ind, -1);
totalCharge--;
}
// add Na+
while (totalCharge < 0) {
int ind = mol.addAtom(11);
mol.setAtomCharge(ind, 1);
totalCharge++;
}
new CoordinateInventor(CoordinateInventor.MODE_KEEP_MARKED_ATOM_COORDS
+ CoordinateInventor.MODE_REMOVE_HYDROGEN).invent(mol);
}
}
/**
* Remove wrong charges on halogen and (earth)alkali atoms, if they are
* covalently bound.
* @param mol
*/
private static void repairCovalentBoundChargedAlkaliAndHalogen(StereoMolecule mol) {
for (int atom=0; atom
© 2015 - 2025 Weber Informatics LLC | Privacy Policy