All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openmolecules.chem.conf.gen.RigidFragmentProvider Maven / Gradle / Ivy

There is a newer version: 2024.11.2
Show newest version
/*
 * Copyright 2013-2020 Thomas Sander, openmolecules.org
 *
 * Redistribution and use in source and binary forms, with or without modification,
 * are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice,
 *    this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of the copyright holder nor the names of its contributors
 *    may be used to endorse or promote products derived from this software without
 *    specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
 * SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * @author Thomas Sander
 */

package org.openmolecules.chem.conf.gen;

import com.actelion.research.calc.ThreadMaster;
import com.actelion.research.chem.Canonizer;
import com.actelion.research.chem.Coordinates;
import com.actelion.research.chem.Molecule;
import com.actelion.research.chem.StereoMolecule;
import com.actelion.research.chem.conf.Conformer;
import com.actelion.research.chem.forcefield.mmff.BadAtomTypeException;
import com.actelion.research.chem.forcefield.mmff.ForceFieldMMFF94;
import org.openmolecules.chem.conf.so.ConformationSelfOrganizer;
import org.openmolecules.chem.conf.so.SelfOrganizedConformer;

import java.util.ArrayList;

/**
 * An instance of this class is used by any ConformerGenerator to hand out one or multiple 3D-coordinate sets
 * for rigid fragments within a molecules. RigidFragments are those substructures obtained, when breaking a
 * molecule apart at its non-ring single bonds, which are called 'rotatable bonds'.
 * A RigidFragment consists of core atoms, extended atoms and outer shell atoms.
 * Core atoms are all atoms making up the original substructure. Extended atoms are all atoms of the original
 * molecule, which directly connect to any core atom. Extended atoms are important, because they define the exit
 * vectors of the core atom fragment. Outer shell atoms are another layer of atoms connected the extended atoms.
 * They are needed for the self-organizer to create context depending 3D-coordinates that considers steric and
 * atom type dependent geometries.
* RigidFragments are fragments rather than molecules. There are no implicit hydrogen atoms and some atoms * have unoccupied valences. Nitrogen atoms may carry the flat-nitrogen query feature, which is considered * by the self organizer when creating 3D-coordinates.
* Important: Stereo centers in the molecule may not be a stereo center in the fragment anymore. * Nevertheless, the self-organized coordinates must reflect the correct configuration of the originating molecule. * Therefore, atom parities are copied from molecule to fragment and defined valid, which is possible, because * parities are based on atom indexes and the atom order is kept in-tact, when assembling the fragment.
* Caching: When generating conformers of many molecules, then many RigidFragments occurr repeatedly. * Therefore, caching of fragment's coordinates speeds up conformer generation significantly. However, we need * to consider various issues:
* - As key for the cache we use the fragment's idcode (all atoms). To not loose hydrogen atoms, we convert all * plain hydrogen atoms into deuterium.
* - In the cache we just store 3D-coordinate sets and their likelyhoods based on self-organizer scores or MMFF-energies
* - When locating the same fragment in a different molecule, then the atom order may be different. Thus, we need to normalize. * For this purpose we use the graphIndex of the Canonizer used to create the fragment's idcode and store coordinates in * canonical atom order.
* - For molecule stereo centers, which are gone in the fragment, copying of molecule parities to the fragment ensures * proper 3D-coordinates. For the Canonizer graphIndex to reflect the original configuration, we need to make sure, * that up/down-bonds are copied (parities won't do), which are now overspecifying the non-stereo center. * And we use the Canonizer mode TIE_BREAK_FREE_VALENCE_ATOMS to distinguish symmetrical fragment atoms if they have * free valences and, thus, could be differently substituted in molecule matches. This way we locate all potential * stereo centers in fragments. If a given molecule does not specify a parity for any of the potential stereo * centers, then this fragment is not cached. Otherwise a later hit with a defined stereo center at that position * might get coordinates for the wrong stereo configuration.
* - If a fragment contains stereo centers, then only one of the two possible enantiomers is cached. The other one is * constructed by z-coordinate inversion.
* - If optimizeRigidFragments is true, then the MMFF94s+ forcefield is used to minimize fragments before caching/using * them. A different force field may be used by overriding RigidFragmentProvider and all of its forceField... * methods and passing an overridden instance to the constructor(s) of the ConformerGenerator to be used. */ public class RigidFragmentProvider { private static int MAX_CONFORMERS = 16; private static int MAX_ATOMS_FOR_CACHING = 32; // Random seed for initializing the SelfOrganizer. private long mRandomSeed; private boolean mOptimizeFragments; private RigidFragmentCache mCache; private ThreadMaster mThreadMaster; public RigidFragmentProvider(long randomSeed, RigidFragmentCache cache, boolean optimizeRigidFragments) { mRandomSeed = randomSeed; mCache = cache; mOptimizeFragments = optimizeRigidFragments; if (optimizeRigidFragments) forceFieldInitialize(); } /** * If the conformer generation must be stopped from outside, for instance because of user * intervention or because of a defined timeout, then provide a ThreadMaster with this method. * @param tm */ public void setThreadMaster(ThreadMaster tm) { mThreadMaster = tm; } public void setCache(RigidFragmentCache cache) { mCache = cache; } public RigidFragment createFragment(StereoMolecule mol, int[] fragmentNo, int fragmentIndex) { int coreAtomCount = 0; int atomCount = 0; int extendedAtomCount = 0; // mark all atoms with specified fragmentNo and two layers around it boolean[] includeAtom = new boolean[mol.getAllAtoms()]; boolean[] isOuterShellAtom = new boolean[mol.getAllAtoms()]; boolean[] isCoreFragment = new boolean[mol.getAllAtoms()]; for (int atom=0; atom conformerList = null; double[] likelihood = null; Canonizer canonizer = null; String key = null; boolean invertedEnantiomer = false; boolean useCache = (mCache != null && atomCount <= MAX_ATOMS_FOR_CACHING); // Generate stereo parities for all potential stereo features in the fragment. // If one or more potential stereo features are unknown, then the fragment doesn't qualify to be cached. if (useCache) { // By distinguishing equal ranking atoms, if they have free valences, we detect all possible stereo features canonizer = new Canonizer(fragment, Canonizer.TIE_BREAK_FREE_VALENCE_ATOMS); // we don't cache fragments with unspecified stereo configurations for (int atom=0; atom(); for (Coordinates[] coords:cacheEntry.coordinates) { for (int j = 0; j(); SelfOrganizedConformer bestConformer = selfOrganizer.getNextConformer(); conformerList.add(bestConformer); SelfOrganizedConformer conformer = selfOrganizer.getNextConformer(); while (conformer != null) { conformerList.add(conformer); conformer = selfOrganizer.getNextConformer(); } // Calculate fraction values of the population from strain values, which somewhat resemble energies in kcal/mol double ENERGY_FOR_FACTOR_10 = 1.36; // The ConformerSelfOrganizer and MMFF use kcal/mol; 1.36 kcal/mol is factor 10 double minStrain = Double.MAX_VALUE; for(Conformer conf:conformerList) minStrain = Math.min(minStrain, ((SelfOrganizedConformer)conf).getTotalStrain()); // Strain values resemble energies in kcal/mol, but not as reliable. Therefore we are less strict and allow factor 1000 double strainLimit = minStrain + 3.0 * ENERGY_FOR_FACTOR_10; for (int i=conformerList.size()-1; i>=0; i--) if (conformerList.get(i).getTotalStrain()>strainLimit) conformerList.remove(i); likelihood = new double[conformerList.size()]; double likelihoodSum = 0; int index = 0; for(int i=0; ienergyLimit) { conf.setEnergy(Double.NaN); validEnergyCount--; } } // If we have no valid MMFF energy values, we keep the likelihoods from the self organizer, otherwise... if (validEnergyCount != 0) { double[] population = new double[validEnergyCount]; double populationSum = 0; index = 0; for(int i=conformerList.size()-1; i>=0; i--) { Conformer conf = conformerList.get(i); if (Double.isNaN(conf.getEnergy())) conformerList.remove(i); else { population[index] = Math.pow(10, (minEnergy - conf.getEnergy()) / ENERGY_FOR_FACTOR_10); populationSum += population[index]; index++; } } likelihood = new double[validEnergyCount]; for (int i=0; i 1) { energy_out[1] = ff.getTotalEnergy(); } } ff.minimise(); if(energy_out!=null) { if (energy_out.length > 1) { energy_out[0] = ff.getTotalEnergy(); } } //MMFFMolecule m_optimized = ff.getMMFFMolecule(); Conformer conf_optimized = new Conformer(conformer); copyFFMolCoordsToConformer(conf_optimized,ff,n_atoms); return conf_optimized; } catch (BadAtomTypeException bate) { if(energy_out!=null) { if (energy_out.length > 1) { energy_out[0]= Double.NaN; energy_out[1]= Double.NaN; } } return new Conformer(conformer); } catch(Exception ex) { ex.printStackTrace(); if(energy_out!=null) { if (energy_out.length > 1) { energy_out[0]= Double.NaN; energy_out[1]= Double.NaN; } } return new Conformer(conformer); } } private static void copyFFMolCoordsToConformer(Conformer mol, ForceFieldMMFF94 ff, int n_atoms) { //int n_atoms = mol.getMolecule().getAllAtoms(); for (int atom=0; atom




© 2015 - 2024 Weber Informatics LLC | Privacy Policy