org.openmolecules.chem.conf.gen.RigidFragmentProvider Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of openchemlib Show documentation
Show all versions of openchemlib Show documentation
Open Source Chemistry Library
/*
* Copyright 2013-2020 Thomas Sander, openmolecules.org
*
* Redistribution and use in source and binary forms, with or without modification,
* are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. Neither the name of the copyright holder nor the names of its contributors
* may be used to endorse or promote products derived from this software without
* specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
* SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @author Thomas Sander
*/
package org.openmolecules.chem.conf.gen;
import com.actelion.research.calc.ThreadMaster;
import com.actelion.research.chem.Canonizer;
import com.actelion.research.chem.Coordinates;
import com.actelion.research.chem.Molecule;
import com.actelion.research.chem.StereoMolecule;
import com.actelion.research.chem.conf.Conformer;
import com.actelion.research.chem.forcefield.mmff.BadAtomTypeException;
import com.actelion.research.chem.forcefield.mmff.ForceFieldMMFF94;
import org.openmolecules.chem.conf.so.ConformationSelfOrganizer;
import org.openmolecules.chem.conf.so.SelfOrganizedConformer;
import java.util.ArrayList;
/**
* An instance of this class is used by any ConformerGenerator to hand out one or multiple 3D-coordinate sets
* for rigid fragments within a molecules. RigidFragments are those substructures obtained, when breaking a
* molecule apart at its non-ring single bonds, which are called 'rotatable bonds'.
* A RigidFragment consists of core atoms, extended atoms and outer shell atoms.
* Core atoms are all atoms making up the original substructure. Extended atoms are all atoms of the original
* molecule, which directly connect to any core atom. Extended atoms are important, because they define the exit
* vectors of the core atom fragment. Outer shell atoms are another layer of atoms connected the extended atoms.
* They are needed for the self-organizer to create context depending 3D-coordinates that considers steric and
* atom type dependent geometries.
* RigidFragments are fragments rather than molecules. There are no implicit hydrogen atoms and some atoms
* have unoccupied valences. Nitrogen atoms may carry the flat-nitrogen query feature, which is considered
* by the self organizer when creating 3D-coordinates.
* Important: Stereo centers in the molecule may not be a stereo center in the fragment anymore.
* Nevertheless, the self-organized coordinates must reflect the correct configuration of the originating molecule.
* Therefore, atom parities are copied from molecule to fragment and defined valid, which is possible, because
* parities are based on atom indexes and the atom order is kept in-tact, when assembling the fragment.
* Caching: When generating conformers of many molecules, then many RigidFragments occurr repeatedly.
* Therefore, caching of fragment's coordinates speeds up conformer generation significantly. However, we need
* to consider various issues:
* - As key for the cache we use the fragment's idcode (all atoms). To not loose hydrogen atoms, we convert all
* plain hydrogen atoms into deuterium.
* - In the cache we just store 3D-coordinate sets and their likelyhoods based on self-organizer scores or MMFF-energies
* - When locating the same fragment in a different molecule, then the atom order may be different. Thus, we need to normalize.
* For this purpose we use the graphIndex of the Canonizer used to create the fragment's idcode and store coordinates in
* canonical atom order.
* - For molecule stereo centers, which are gone in the fragment, copying of molecule parities to the fragment ensures
* proper 3D-coordinates. For the Canonizer graphIndex to reflect the original configuration, we need to make sure,
* that up/down-bonds are copied (parities won't do), which are now overspecifying the non-stereo center.
* And we use the Canonizer mode TIE_BREAK_FREE_VALENCE_ATOMS to distinguish symmetrical fragment atoms if they have
* free valences and, thus, could be differently substituted in molecule matches. This way we locate all potential
* stereo centers in fragments. If a given molecule does not specify a parity for any of the potential stereo
* centers, then this fragment is not cached. Otherwise a later hit with a defined stereo center at that position
* might get coordinates for the wrong stereo configuration.
* - If a fragment contains stereo centers, then only one of the two possible enantiomers is cached. The other one is
* constructed by z-coordinate inversion.
* - If optimizeRigidFragments is true, then the MMFF94s+ forcefield is used to minimize fragments before caching/using
* them. A different force field may be used by overriding RigidFragmentProvider and all of its forceField...
* methods and passing an overridden instance to the constructor(s) of the ConformerGenerator to be used.
*/
public class RigidFragmentProvider {
private static int MAX_CONFORMERS = 16;
private static int MAX_ATOMS_FOR_CACHING = 32;
// Random seed for initializing the SelfOrganizer.
private long mRandomSeed;
private boolean mOptimizeFragments;
private RigidFragmentCache mCache;
private ThreadMaster mThreadMaster;
public RigidFragmentProvider(long randomSeed, RigidFragmentCache cache, boolean optimizeRigidFragments) {
mRandomSeed = randomSeed;
mCache = cache;
mOptimizeFragments = optimizeRigidFragments;
if (optimizeRigidFragments)
forceFieldInitialize();
}
/**
* If the conformer generation must be stopped from outside, for instance because of user
* intervention or because of a defined timeout, then provide a ThreadMaster with this method.
* @param tm
*/
public void setThreadMaster(ThreadMaster tm) {
mThreadMaster = tm;
}
public void setCache(RigidFragmentCache cache) {
mCache = cache;
}
public RigidFragment createFragment(StereoMolecule mol, int[] fragmentNo, int fragmentIndex) {
int coreAtomCount = 0;
int atomCount = 0;
int extendedAtomCount = 0;
// mark all atoms with specified fragmentNo and two layers around it
boolean[] includeAtom = new boolean[mol.getAllAtoms()];
boolean[] isOuterShellAtom = new boolean[mol.getAllAtoms()];
boolean[] isCoreFragment = new boolean[mol.getAllAtoms()];
for (int atom=0; atom conformerList = null;
double[] likelihood = null;
Canonizer canonizer = null;
String key = null;
boolean invertedEnantiomer = false;
boolean useCache = (mCache != null && atomCount <= MAX_ATOMS_FOR_CACHING);
// Generate stereo parities for all potential stereo features in the fragment.
// If one or more potential stereo features are unknown, then the fragment doesn't qualify to be cached.
if (useCache) {
// By distinguishing equal ranking atoms, if they have free valences, we detect all possible stereo features
canonizer = new Canonizer(fragment, Canonizer.TIE_BREAK_FREE_VALENCE_ATOMS);
// we don't cache fragments with unspecified stereo configurations
for (int atom=0; atom();
for (Coordinates[] coords:cacheEntry.coordinates) {
for (int j = 0; j();
SelfOrganizedConformer bestConformer = selfOrganizer.getNextConformer();
conformerList.add(bestConformer);
SelfOrganizedConformer conformer = selfOrganizer.getNextConformer();
while (conformer != null) {
conformerList.add(conformer);
conformer = selfOrganizer.getNextConformer();
}
// Calculate fraction values of the population from strain values, which somewhat resemble energies in kcal/mol
double ENERGY_FOR_FACTOR_10 = 1.36; // The ConformerSelfOrganizer and MMFF use kcal/mol; 1.36 kcal/mol is factor 10
double minStrain = Double.MAX_VALUE;
for(Conformer conf:conformerList)
minStrain = Math.min(minStrain, ((SelfOrganizedConformer)conf).getTotalStrain());
// Strain values resemble energies in kcal/mol, but not as reliable. Therefore we are less strict and allow factor 1000
double strainLimit = minStrain + 3.0 * ENERGY_FOR_FACTOR_10;
for (int i=conformerList.size()-1; i>=0; i--)
if (conformerList.get(i).getTotalStrain()>strainLimit)
conformerList.remove(i);
likelihood = new double[conformerList.size()];
double likelihoodSum = 0;
int index = 0;
for(int i=0; ienergyLimit) {
conf.setEnergy(Double.NaN);
validEnergyCount--;
}
}
// If we have no valid MMFF energy values, we keep the likelihoods from the self organizer, otherwise...
if (validEnergyCount != 0) {
double[] population = new double[validEnergyCount];
double populationSum = 0;
index = 0;
for(int i=conformerList.size()-1; i>=0; i--) {
Conformer conf = conformerList.get(i);
if (Double.isNaN(conf.getEnergy()))
conformerList.remove(i);
else {
population[index] = Math.pow(10, (minEnergy - conf.getEnergy()) / ENERGY_FOR_FACTOR_10);
populationSum += population[index];
index++;
}
}
likelihood = new double[validEnergyCount];
for (int i=0; i 1) {
energy_out[1] = ff.getTotalEnergy();
}
}
ff.minimise();
if(energy_out!=null) {
if (energy_out.length > 1) {
energy_out[0] = ff.getTotalEnergy();
}
}
//MMFFMolecule m_optimized = ff.getMMFFMolecule();
Conformer conf_optimized = new Conformer(conformer);
copyFFMolCoordsToConformer(conf_optimized,ff,n_atoms);
return conf_optimized;
}
catch (BadAtomTypeException bate) {
if(energy_out!=null) {
if (energy_out.length > 1) {
energy_out[0]= Double.NaN;
energy_out[1]= Double.NaN;
}
}
return new Conformer(conformer);
}
catch(Exception ex) {
ex.printStackTrace();
if(energy_out!=null) {
if (energy_out.length > 1) {
energy_out[0]= Double.NaN;
energy_out[1]= Double.NaN;
}
}
return new Conformer(conformer);
}
}
private static void copyFFMolCoordsToConformer(Conformer mol, ForceFieldMMFF94 ff, int n_atoms) {
//int n_atoms = mol.getMolecule().getAllAtoms();
for (int atom=0; atom