All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.MoleculeStandardizer Maven / Gradle / Ivy

There is a newer version: 2024.12.1
Show newest version
package com.actelion.research.chem;

import com.actelion.research.chem.coords.CoordinateInventor;

/**
 * MoleculeStandardizer
 * @author Modest von Korff, Thomas Sander
 * @version 1.0
 * Apr 5, 2012 MvK: Start implementation
 * Oct 2020 MvK,TLS: improved performance and adapted standardization according to following publication:
 * Bento, A. P., Hersey, A., Félix, E., Landrum, G., Gaulton, A., Atkinson, F., ... & Leach, A. R. (2020).
 * An open source chemical structure curation pipeline using RDKit. Journal of Cheminformatics, 12(1), 1-16.
* Exceptions: - S=O is not transformed into S(+)-O(-)
* - If charges (e.g. quaternary nitrogen) cannot be balanced, then Na(+) or Cl(-) are added to neutralize as last resort
*/ public class MoleculeStandardizer { public static final int MODE_GET_PARENT = 1; public static final int MODE_ADD_NA_AND_CL = 2; /** * Under normal circumstances, one should never need to standardize a molecule from an idcode, * because molecules should be standardized before generating the canonical encoding. * An exception is when generating the parent structure using mode MODE_GET_PARENT and * potentially MODE_ADD_NA_AND_CL. * @param idcode * @param coordinates if null the result may change. * @param mode 0 or any combination of MODE_GET_PARENT and MODE_ADD_NA_AND_CL * @return * @throws Exception */ public static StereoMolecule getStandardized(String idcode, String coordinates, int mode) throws Exception { StereoMolecule mol = new IDCodeParser().getCompactMolecule(idcode, coordinates); standardize(mol, mode); return mol; } /** * Standardises a molecule and fixes some structural errors. * Typically this is done before canonicalization. * It includes the following changes:
* - different forms of functional groups (e.g. nitro) are normalized to a preferred one
* - charged acidic or basic atoms are (de-)protonated to remove charges and neutralize the molecule, if possible.
* - alkali/earthalkali/halogene atoms, if charged despite being covalently bound, get uncharged
* - trivalent, uncharged oxygens get a positive charge
* - unusual amide tautomeric structures, if not in a ring, are inverted
* - uncharged isocyano groups get proper charges to validate valences
* - wrongly charged azido groups get proper charges to validate valences
* - uncharged, quarternary nitrogens get a positive charge
* If mode includes MODE_GET_PARENT, then only the largest, normalized fragment is kept. * If mode includes MODE_ADD_NA_AND_CL, then molecules, that are still charged after normalization, * e.g. quarternary ammonium, are neutralized by adding the right amount of Na+ or Cl- ions. * @param mol * @param mode 0 or any combination of MODE_GET_PARENT and MODE_ADD_NA_AND_CL * @throws Exception */ public static void standardize(StereoMolecule mol, int mode) throws Exception { if((mode & MODE_GET_PARENT) != 0) { mol.stripSmallFragments(); mol.stripIsotopInfo(); } repairAndUnify(mol); mol.normalizeAmbiguousBonds(); int remainingCharge = mol.canonizeCharge(true, true); if (remainingCharge != 0) neutralizeCharges(mol, mode, remainingCharge); mol.validateAtomQueryFeatures(); mol.validateBondQueryFeatures(); } /** * Repairs wrongly uncharged quaternary nitrogen. Unifies carbonyl acid groups, * sulfonic acid, phosphoric acid, phenolic oxygen. Means: negative charges are removed. * Adds Na+ or Cl- for final charge equilibration. * @param mol * @return true if an atom was added. */ private static void repairAndUnify(StereoMolecule mol) { mol.ensureHelperArrays(Molecule.cHelperRings); repairCovalentBoundChargedAlkaliAndHalogen(mol); chargeTrivalentOxygen(mol); repairBadAmideTautomer(mol); repairQuaternaryNitrogen(mol); unifyIsoCyano(mol); unifyAzido(mol); } private static void neutralizeCharges(StereoMolecule mol, int mode, int totalCharge) { mol.ensureHelperArrays(Molecule.cHelperNeighbours); for (int atom=0; atom0; atom++) { if (AtomFunctionAnalyzer.isAcidicOxygen(mol, atom)) { mol.setAtomCharge(atom, -1); totalCharge--; } } for (int atom=0; atom 0) { int ind = mol.addAtom(17); mol.setAtomCharge(ind, -1); totalCharge--; } // add Na+ while (totalCharge < 0) { int ind = mol.addAtom(11); mol.setAtomCharge(ind, 1); totalCharge++; } new CoordinateInventor(CoordinateInventor.MODE_KEEP_MARKED_ATOM_COORDS + CoordinateInventor.MODE_REMOVE_HYDROGEN).invent(mol); } } /** * Remove wrong charges on halogen and (earth)alkali atoms, if they are * covalently bound. * @param mol */ private static void repairCovalentBoundChargedAlkaliAndHalogen(StereoMolecule mol) { for (int atom=0; atom




© 2015 - 2025 Weber Informatics LLC | Privacy Policy