com.actelion.research.chem.reaction.mapping.SimilarityGraphBasedReactionMapper Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of openchemlib Show documentation
Show all versions of openchemlib Show documentation
Open Source Chemistry Library
/*
* Copyright (c) 1997 - 2016
* Actelion Pharmaceuticals Ltd.
* Gewerbestrasse 16
* CH-4123 Allschwil, Switzerland
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. Neither the name of the the copyright holder nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* @author Thomas Sander
*/
package com.actelion.research.chem.reaction.mapping;
import com.actelion.research.chem.*;
import com.actelion.research.chem.reaction.Reaction;
import com.actelion.research.util.ByteArrayComparator;
import java.nio.charset.StandardCharsets;
import java.util.Arrays;
public class SimilarityGraphBasedReactionMapper {
public static final boolean DEBUG = false;
public static final boolean PRINT_SCORES = false;
// When building the mapping graph, next neighbors are added based on directed environment similarity.
// To determine this similarity, directed radial substructures are precalculated for every atom from
// the perspective of every of one its neighbour atoms (fromAtom). The smallest fragment just contains
// the neighbor and the atom itself. The next fragment is build from the first by adding all direct
// neighbours to the atom (except fromAtom neighbours).
private static final int SIMILARITY_SHIFT = 8; // bit shift to allow for lower significant features to distinguish equal environment similarities
private static final int NO_PI_PENALTY = 1;
private static final int SKELETON_PENALTY = 2;
private static final int MAX_ENVIRONMENT_RADIUS = 8;
private static final int MAX_ENVIRONMENT_SIMILARITY = MAX_ENVIRONMENT_RADIUS << SIMILARITY_SHIFT;
private static final int MAX_SKELETON_SIMILARITY = (MAX_ENVIRONMENT_RADIUS - SKELETON_PENALTY) << SIMILARITY_SHIFT;
// Fine-tune environment similarity score modifiers
private static final int PI_AND_HETERO_PLUS = 1; // only for skeleton similarity
private static final int STEREO_MATCH_PLUS = 64;
private static final int ENDO_RING_PLUS = 128;
private StereoMolecule mReactant,mProduct;
private int mMapNo,mGraphMapNoCount,mMappableAtomCount,mAtomPairSequenceCount;
private int[] mReactantMapNo,mProductMapNo,mReactantRingMembership,mProductRingMembership;
private float mScore;
private ByteArrayComparator mSimilarityComparator;
private byte[][][][] mReactantConnAtomEnv,mProductConnAtomEnv; // indexes: atom,connIndex,radius,idcode bytes
// private byte[][][][] mReactantNoPiAtomEnv,mProductNoPiAtomEnv; // indexes: atom,connIndex,radius,idcode bytes
private byte[][][][] mReactantSkelAtomEnv,mProductSkelAtomEnv; // indexes: atom,connIndex,radius,idcode bytes
/**
* Entirely maps the given reaction by setting all reactant's and product's mapping numbers.
* If the reaction already contains manually mapped atoms, then these are untouched and used
* as seed atoms when building the mapping graph.
* @param rxn reaction with proper atom coordinates
*/
public void map(Reaction rxn) {
mergeReactantsAndProducts(rxn);
map(mReactant, mProduct, new int[mReactant.getAtoms()], new int[mProduct.getAtoms()]);
copyMapNosToReaction(rxn, mReactantMapNo, mProductMapNo, mGraphMapNoCount);
}
/**
* Entirely maps the given reaction by setting all reactant's and product's mapping numbers.
* @param reactant all reactants merged into one molecule; may contain seed mapping
* @param product all products merged into one molecule; may contain seed mapping
* @param reactantMapNo array to receive the created mapping numbers
* @param productMapNo array to receive the created mapping numbers
*/
public void map(StereoMolecule reactant, StereoMolecule product, int[] reactantMapNo, int[] productMapNo) {
mReactant = reactant;
mProduct = product;
mReactantMapNo = new int[reactantMapNo.length];
mProductMapNo = new int[productMapNo.length];
mReactantConnAtomEnv = classifyNeighbourAtomEnvironment(mReactant, false, false);
// mReactantNoPiAtomEnv = classifyNeighbourAtomEnvironment(mReactant, false, true);
mReactantSkelAtomEnv = classifyNeighbourAtomEnvironment(mReactant, true, false);
mProductConnAtomEnv = classifyNeighbourAtomEnvironment(mProduct, false, false);
// mProductNoPiAtomEnv = classifyNeighbourAtomEnvironment(mProduct, false, true);
mProductSkelAtomEnv = classifyNeighbourAtomEnvironment(mProduct, true, false);
initializeRingMembership();
mSimilarityComparator = new ByteArrayComparator();
mScore = -1e10f;
RootAtomPairSource rootAtomPairSource = new RootAtomPairSource(reactant, product, mReactantMapNo, mProductMapNo);
mAtomPairSequenceCount = 0;
while (rootAtomPairSource.hasNextPairSequence()) {
mAtomPairSequenceCount++;
mMapNo = rootAtomPairSource.getManualMapCount();
mMappableAtomCount = rootAtomPairSource.getMappableAtomCount();
RootAtomPair pair = rootAtomPairSource.nextPair();
while (pair != null) {
if (PRINT_SCORES) { System.out.println(); System.out.println("@ SMapper.map() pair: "+pair.reactantAtom+","+pair.productAtom+" mMapNo:"+mMapNo+" sequence:"+mAtomPairSequenceCount); }
mapFromRootAtoms(pair);
if (PRINT_SCORES) { System.out.print("@ rMapNo:"); for (int mapNo:mReactantMapNo) System.out.print(" "+mapNo); System.out.println(); }
if (PRINT_SCORES) { System.out.print("@ pMapNo:"); for (int mapNo:mProductMapNo) System.out.print(" "+mapNo); System.out.println(); }
pair = rootAtomPairSource.nextPair();
}
mGraphMapNoCount = mMapNo;
float score;
if (mMapNo < mMappableAtomCount) {
ReactionCenterMapper centerMapper = new ReactionCenterMapper(mReactant, mProduct, mReactantMapNo, mProductMapNo, mMapNo);
score = centerMapper.completeAndScoreMapping();
mMapNo += centerMapper.getMappedAtomCount();
}
else {
MappingScorer scorer = new MappingScorer(mReactant, mProduct);
score = scorer.scoreMapping(scorer.createReactantToProductAtomMap(mReactantMapNo, mProductMapNo));
}
if (PRINT_SCORES) System.out.println("@ score:"+score);
if (DEBUG) {
Reaction rxn = new Reaction();
rxn.addReactant(new StereoMolecule(reactant));
rxn.addProduct(new StereoMolecule(product));
for (int atom=0; atom fragment.getConnAtoms(atomMap[atom]))
fragment.setAtomQueryFeature(atomMap[atom], Molecule.cAtomQFMoreNeighbours, true);
if (skeletonOnly || isWithoutPi)
for (int bond=0; bond 1)
return false;
// reductive ester cleavage
if (mReactant.getAtomicNo(reactantConn) == 8
&& mReactant.getConnAtoms(reactantConn) == 2
&& connAtomsOfAtomicNo(mReactant, reactantAtom, 8) > connAtomsOfAtomicNo(mProduct, productAtom, 8))
return false;
if (mProduct.getAtomicNo(productConn) == 8
&& mProduct.getConnAtoms(productConn) == 2
&& connAtomsOfAtomicNo(mReactant, reactantAtom, 8) < connAtomsOfAtomicNo(mProduct, productAtom, 8))
return false;
// If the number of neighbours at an atom behind an oxygen changes, then the oxygen itself may in question
if (mReactant.getAtomicNo(reactantConn) == 8
&& mReactant.getConnAtoms(reactantConn) == 2
&& mProduct.getConnAtoms(productConn) == 2) {
// int reactantConnConn = mReactant.getConnAtom(reactantConn, mReactant.getConnAtom(reactantConn, 0) == reactantAtom ? 1 : 0);
// int productConnConn = mProduct.getConnAtom(productConn, mProduct.getConnAtom(productConn, 0) == productAtom ? 1 : 0);
// if (mReactant.getConnAtoms(reactantConnConn) != mProduct.getConnAtoms(productConnConn))
// return false;
int reactantConnIndex = mReactant.getConnAtom(reactantConn, 0) == reactantAtom ? 0 : 1;
int productConnIndex = mProduct.getConnAtom(productConn, 0) == productAtom ? 0 : 1;
if (mSimilarityComparator.compare(mReactantSkelAtomEnv[reactantConn][reactantConnIndex][3],
mProductSkelAtomEnv[productConn][productConnIndex][3]) != 0)
return false;
}
/* strangely this causes lots of false mappings
// attached hydroxies may not be the same
if (mReactant.getAtomPi(reactantAtom) == 0
&& mReactant.getAtomicNo(reactantConn) == 8
&& mReactant.getAtomPi(reactantConn) == 0
&& mReactant.getConnAtoms(reactantConn) == 1
&& mProduct.getConnAtoms(productConn) > 1)
return false;
if (mProduct.getAtomPi(productAtom) == 0
&& mProduct.getAtomicNo(productConn) == 8
&& mProduct.getAtomPi(productConn) == 0
&& mProduct.getConnAtoms(productConn) == 1
&& mReactant.getConnAtoms(reactantConn) > 1)
return false;
*/
// imine formation
if (mReactant.getAtomicNo(reactantConn) == 7
&& connAtomsOfAtomicNo(mReactant, reactantAtom, 7) < connAtomsOfAtomicNo(mProduct, productAtom, 7))
return false;
if (mProduct.getAtomicNo(productConn) == 7
&& connAtomsOfAtomicNo(mReactant, reactantAtom, 7) > connAtomsOfAtomicNo(mProduct, productAtom, 7))
return false;
// if a 3-membered ring is formed or broken
if (mReactant.getBondRingSize(mReactant.getBond(reactantAtom, reactantConn)) == 3
^ mProduct.getBondRingSize(mProduct.getBond(productAtom, productConn)) == 3)
return false;
return true;
}
private boolean passesSimilarityDependentRules(int reactantAtom, int reactantConn, int productAtom, int productConn, int similarity, boolean isStereoMatch) {
if (mReactant.getAtomicNo(reactantConn) == 8
&& mReactant.getAtomPi(reactantConn) == 0
&& hasOxo(mReactant, reactantAtom)
&& hasOxo(mProduct, productAtom)
&& similarity != MAX_ENVIRONMENT_SIMILARITY)
return false;
// Since carboxylates,sulfonates, etc are potential leaving groups, we dont follow a bond to oxygen,
// if on the other side is a keto on the reactant side and similarity is not high
if (mReactant.getAtomicNo(reactantConn) == 8
&& mReactant.getConnAtoms(reactantConn) == 2
&& hasOxo(mReactant, getNextNeighbour(mReactant, reactantAtom, reactantConn))
&& similarity < (3 << SIMILARITY_SHIFT))
return false;
// A bond from oxygen to keto (C=O, S=O, C=N, etc) won't be followed,
// unless the first target fragment shells are identical
if (mReactant.getAtomicNo(reactantAtom) == 8
&& (hasOxo(mReactant, reactantConn)
|| hasOxo(mProduct, productConn))
&& similarity < (2 << SIMILARITY_SHIFT))
return false;
if (!isStereoMatch
&& (mReactant.getAtomicNo(reactantConn) != 6
|| !hasNonCarbonNeighbour(mReactant, reactantAtom)))
return false;
// potential replacement of -O-R (R:H,C,any) at non-carbon atoms
if (mReactant.getAtomicNo(reactantConn) == 8
&& mReactant.getAtomicNo(reactantAtom) != 6
&& similarity != MAX_ENVIRONMENT_SIMILARITY)
return false;
if (mReactant.getAtomicNo(reactantAtom) == 5
&& mReactant.getAtomicNo(reactantConn) == 6
&& similarity < (3 << SIMILARITY_SHIFT))
return false;
return true;
}
/**
* Returns true, if we have no stereo situation or if the stereo center (or EZ-bond) is retained
* by continuing the graph with the candidate.
* @param reactantParent
* @param reactantAtom
* @param reactantConn
* @param productParent
* @param productAtom
* @param productConn
* @return
*/
private boolean matchesStereo(int reactantParent, int reactantAtom, int reactantConn, int productParent, int productAtom, int productConn) {
if (mReactant.getConnAtoms(reactantAtom) == 3
&& (mReactant.getAtomParity(reactantAtom) == Molecule.cAtomParity1 || mReactant.getAtomParity(reactantAtom) == Molecule.cAtomParity2)
&& mProduct.getConnAtoms(productAtom) == 3
&& (mProduct.getAtomParity(productAtom) == Molecule.cAtomParity1 || mProduct.getAtomParity(productAtom) == Molecule.cAtomParity2)) {
boolean reactantInversion = (reactantParent > reactantConn);
int lastNeighbour = -1;
for (int i=0; i reactantConn && lastNeighbour < reactantParent)
|| (lastNeighbour < reactantConn && lastNeighbour > reactantParent))
reactantInversion = !reactantInversion;
break;
}
}
boolean productInversion = (productParent > productConn);
for (int i=0; i productConn && lastNeighbour < productParent)
|| (lastNeighbour < productConn && lastNeighbour > productParent))
productInversion = !productInversion;
break;
}
}
return (reactantInversion == productInversion)
== (mReactant.getAtomParity(reactantAtom) == mProduct.getAtomParity(productAtom));
}
return true;
}
public static boolean hasOxo(StereoMolecule mol, int atom) {
for (int i=0; i 6)
return true;
return false;
}
public static boolean hasOxo(StereoMolecule mol, int atom, int notThisAtom) {
for (int i=0; i 6
&& mol.getConnAtom(atom, i) != notThisAtom)
return true;
return false;
}
public static boolean hasNonCarbonNeighbour(StereoMolecule mol, int atom) {
for (int i=0; i