com.actelion.research.chem.MoleculeStandardizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of openchemlib Show documentation
Show all versions of openchemlib Show documentation
Open Source Chemistry Library
package com.actelion.research.chem;
import com.actelion.research.chem.coords.CoordinateInventor;
/**
* MoleculeStandardizer
* @author Modest von Korff, Thomas Sander
* @version 1.0
* Apr 5, 2012 MvK: Start implementation
* Oct 2020 MvK,TLS: improved performance and adapted standardization according to following publication:
* Bento, A. P., Hersey, A., Félix, E., Landrum, G., Gaulton, A., Atkinson, F., ... & Leach, A. R. (2020).
* An open source chemical structure curation pipeline using RDKit. Journal of Cheminformatics, 12(1), 1-16.
* Exceptions: - S=O is not transformed into S(+)-O(-)
* - If charges (e.g. quaternary nitrogen) cannot be balanced, then Na(+) or Cl(-) are added to neutralize as last resort
*/
public class MoleculeStandardizer {
public static final int MODE_GET_PARENT = 1;
public static final int MODE_ADD_NA_AND_CL = 2;
/**
* Under normal circumstances, one should never need to standardize a molecule from an idcode,
* because molecules should be standardized before generating the canonical encoding.
* An exception is when generating the parent structure using mode MODE_GET_PARENT and
* potentially MODE_ADD_NA_AND_CL.
* @param idcode
* @param coordinates if null the result may change.
* @param mode 0 or any combination of MODE_GET_PARENT and MODE_ADD_NA_AND_CL
* @return
* @throws Exception
*/
public static StereoMolecule getStandardized(String idcode, String coordinates, int mode) throws Exception {
StereoMolecule mol = new IDCodeParser().getCompactMolecule(idcode, coordinates);
standardize(mol, mode);
return mol;
}
/**
* Standardises a molecule and fixes some structural errors.
* Typically, this is done before canonicalization.
* It includes the following changes:
* - different forms of functional groups (e.g. nitro) are normalized to a preferred one
* - charged acidic or basic atoms are (de-)protonated to remove charges and neutralize the molecule, if possible.
* - alkali/earthalkali/halogene atoms, if charged despite being covalently bound, get uncharged
* - trivalent, uncharged oxygens get a positive charge
* - unusual amide tautomeric structures, if not in a ring, are inverted
* - uncharged isocyano groups get proper charges to validate valences
* - wrongly charged azido groups get proper charges to validate valences
* - uncharged, quarternary nitrogens get a positive charge
* If mode includes MODE_GET_PARENT, then only the largest, normalized fragment is kept.
* If mode includes MODE_ADD_NA_AND_CL, then molecules, that are still charged after normalization,
* e.g. quarternary ammonium, are neutralized by adding the right amount of Na+ or Cl- ions.
* @param mol
* @param mode 0 or any combination of MODE_GET_PARENT and MODE_ADD_NA_AND_CL
* @throws Exception
*/
public static void standardize(StereoMolecule mol, int mode) throws Exception {
if((mode & MODE_GET_PARENT) != 0) {
mol.stripSmallFragments();
mol.stripIsotopInfo();
}
repairAndUnify(mol);
mol.normalizeAmbiguousBonds();
int remainingCharge = mol.canonizeCharge(true, true);
if (remainingCharge != 0)
neutralizeCharges(mol, mode, remainingCharge);
mol.validateAtomQueryFeatures();
mol.validateBondQueryFeatures();
}
/**
* Repairs wrongly uncharged quaternary nitrogen. Unifies carbonyl acid groups,
* sulfonic acid, phosphoric acid, phenolic oxygen. Means: negative charges are removed.
* Adds Na+ or Cl- for final charge equilibration.
* @param mol
*/
private static void repairAndUnify(StereoMolecule mol) {
mol.ensureHelperArrays(Molecule.cHelperRings);
repairCovalentBoundChargedAlkaliAndHalogen(mol);
chargeTrivalentOxygen(mol);
repairBadAmideTautomer(mol);
repairQuaternaryNitrogen(mol);
unifyIsoCyano(mol);
unifyAzido(mol);
}
private static void neutralizeCharges(StereoMolecule mol, int mode, int totalCharge) {
mol.ensureHelperArrays(Molecule.cHelperNeighbours);
for (int atom=0; atom0; atom++) {
if (AtomFunctionAnalyzer.isAcidicOxygen(mol, atom)) {
mol.setAtomCharge(atom, -1);
totalCharge--;
}
}
for (int atom=0; atom 0) {
int ind = mol.addAtom(17);
mol.setAtomCharge(ind, -1);
totalCharge--;
}
// add Na+
while (totalCharge < 0) {
int ind = mol.addAtom(11);
mol.setAtomCharge(ind, 1);
totalCharge++;
}
new CoordinateInventor(CoordinateInventor.MODE_KEEP_MARKED_ATOM_COORDS
+ CoordinateInventor.MODE_REMOVE_HYDROGEN).invent(mol);
}
}
/**
* Remove wrong charges on halogen and (earth)alkali atoms, if they are
* covalently bound.
* @param mol
*/
private static void repairCovalentBoundChargedAlkaliAndHalogen(StereoMolecule mol) {
for (int atom=0; atom