All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.reaction.mapping.SimilarityGraphBasedReactionMapper Maven / Gradle / Ivy

There is a newer version: 2024.12.1
Show newest version
/*
 * Copyright (c) 1997 - 2016
 * Actelion Pharmaceuticals Ltd.
 * Gewerbestrasse 16
 * CH-4123 Allschwil, Switzerland
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of the the copyright holder nor the
 *    names of its contributors may be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * @author Thomas Sander
 */

package com.actelion.research.chem.reaction.mapping;

import com.actelion.research.chem.*;
import com.actelion.research.chem.reaction.Reaction;
import com.actelion.research.util.ByteArrayComparator;

import java.nio.charset.StandardCharsets;
import java.util.Arrays;

public class SimilarityGraphBasedReactionMapper {
	public static final boolean DEBUG = false;
	public static final boolean PRINT_SCORES = false;

	// When building the mapping graph, next neighbors are added based on directed environment similarity.
	// To determine this similarity, directed radial substructures are precalculated for every atom from
	// the perspective of every of one its neighbour atoms (fromAtom). The smallest fragment just contains
	// the neighbor and the atom itself. The next fragment is build from the first by adding all direct
	// neighbours to the atom (except fromAtom neighbours).
	private static final int SIMILARITY_SHIFT = 8;  // bit shift to allow for lower significant features to distinguish equal environment similarities
	private static final int NO_PI_PENALTY = 1;
	private static final int SKELETON_PENALTY = 2;
	private static final int MAX_ENVIRONMENT_RADIUS = 8;
	private static final int MAX_ENVIRONMENT_SIMILARITY = MAX_ENVIRONMENT_RADIUS << SIMILARITY_SHIFT;
	private static final int MAX_SKELETON_SIMILARITY = (MAX_ENVIRONMENT_RADIUS - SKELETON_PENALTY) << SIMILARITY_SHIFT;

	// Fine-tune environment similarity score modifiers
	private static final int PI_AND_HETERO_PLUS = 1;    // only for skeleton similarity
	private static final int STEREO_MATCH_PLUS = 64;
	private static final int ENDO_RING_PLUS = 128;

	private StereoMolecule mReactant,mProduct;
	private int mMapNo,mGraphMapNoCount,mMappableAtomCount,mAtomPairSequenceCount;
	private int[] mReactantMapNo,mProductMapNo,mReactantRingMembership,mProductRingMembership;
	private float mScore;
	private ByteArrayComparator mSimilarityComparator;
	private byte[][][][] mReactantConnAtomEnv,mProductConnAtomEnv;   // indexes: atom,connIndex,radius,idcode bytes
//	private byte[][][][] mReactantNoPiAtomEnv,mProductNoPiAtomEnv;   // indexes: atom,connIndex,radius,idcode bytes
	private byte[][][][] mReactantSkelAtomEnv,mProductSkelAtomEnv;   // indexes: atom,connIndex,radius,idcode bytes

	/**
	 * Entirely maps the given reaction by setting all reactant's and product's mapping numbers.
	 * If the reaction already contains manually mapped atoms, then these are untouched and used
	 * as seed atoms when building the mapping graph.
	 * @param rxn reaction with proper atom coordinates
	 */
	public void map(Reaction rxn) {
		mergeReactantsAndProducts(rxn);
		map(mReactant, mProduct, new int[mReactant.getAtoms()], new int[mProduct.getAtoms()]);
		copyMapNosToReaction(rxn, mReactantMapNo, mProductMapNo, mGraphMapNoCount);
		}

	/**
	 * Entirely maps the given reaction by setting all reactant's and product's mapping numbers.
	 * @param reactant all reactants merged into one molecule; may contain seed mapping
	 * @param product all products merged into one molecule; may contain seed mapping
	 * @param reactantMapNo array to receive the created mapping numbers
	 * @param productMapNo array to receive the created mapping numbers
	 */
	public void map(StereoMolecule reactant, StereoMolecule product, int[] reactantMapNo, int[] productMapNo) {
		mReactant = reactant;
		mProduct = product;

		mReactantMapNo = new int[reactantMapNo.length];
		mProductMapNo = new int[productMapNo.length];

		mReactantConnAtomEnv = classifyNeighbourAtomEnvironment(mReactant, false, false);
//		mReactantNoPiAtomEnv = classifyNeighbourAtomEnvironment(mReactant, false, true);
		mReactantSkelAtomEnv = classifyNeighbourAtomEnvironment(mReactant, true, false);
		mProductConnAtomEnv = classifyNeighbourAtomEnvironment(mProduct, false, false);
//		mProductNoPiAtomEnv = classifyNeighbourAtomEnvironment(mProduct, false, true);
		mProductSkelAtomEnv = classifyNeighbourAtomEnvironment(mProduct, true, false);

		initializeRingMembership();

		mSimilarityComparator = new ByteArrayComparator();
		mScore = -1e10f;

		RootAtomPairSource rootAtomPairSource = new RootAtomPairSource(reactant, product, mReactantMapNo, mProductMapNo);

		mAtomPairSequenceCount = 0;

		while (rootAtomPairSource.hasNextPairSequence()) {
			mAtomPairSequenceCount++;
			mMapNo = rootAtomPairSource.getManualMapCount();
			mMappableAtomCount = rootAtomPairSource.getMappableAtomCount();

			RootAtomPair pair = rootAtomPairSource.nextPair();
			while (pair != null) {
if (PRINT_SCORES)  { System.out.println(); System.out.println("@ SMapper.map() pair: "+pair.reactantAtom+","+pair.productAtom+" mMapNo:"+mMapNo+" sequence:"+mAtomPairSequenceCount); }
				mapFromRootAtoms(pair);
if (PRINT_SCORES)  { System.out.print("@ rMapNo:"); for (int mapNo:mReactantMapNo) System.out.print(" "+mapNo); System.out.println(); }
if (PRINT_SCORES)  { System.out.print("@ pMapNo:"); for (int mapNo:mProductMapNo) System.out.print(" "+mapNo); System.out.println(); }
				pair = rootAtomPairSource.nextPair();
				}

			mGraphMapNoCount = mMapNo;

			float score;
			if (mMapNo < mMappableAtomCount) {
				ReactionCenterMapper centerMapper = new ReactionCenterMapper(mReactant, mProduct, mReactantMapNo, mProductMapNo, mMapNo);
				score = centerMapper.completeAndScoreMapping();
				mMapNo += centerMapper.getMappedAtomCount();
				}
			else {
				MappingScorer scorer = new MappingScorer(mReactant, mProduct);
				score = scorer.scoreMapping(scorer.createReactantToProductAtomMap(mReactantMapNo, mProductMapNo));
				}
if (PRINT_SCORES) System.out.println("@ score:"+score);

if (DEBUG) {
	Reaction rxn = new Reaction();
	rxn.addReactant(new StereoMolecule(reactant));
	rxn.addProduct(new StereoMolecule(product));
	for (int atom=0; atom fragment.getConnAtoms(atomMap[atom]))
								fragment.setAtomQueryFeature(atomMap[atom], Molecule.cAtomQFMoreNeighbours, true);
						if (skeletonOnly || isWithoutPi)
							for (int bond=0; bond 1)
			return false;

		// reductive ester cleavage
		if (mReactant.getAtomicNo(reactantConn) == 8
		 && mReactant.getConnAtoms(reactantConn) == 2
		 && connAtomsOfAtomicNo(mReactant, reactantAtom, 8) > connAtomsOfAtomicNo(mProduct, productAtom, 8))
			return false;
		if (mProduct.getAtomicNo(productConn) == 8
		 && mProduct.getConnAtoms(productConn) == 2
		 && connAtomsOfAtomicNo(mReactant, reactantAtom, 8) < connAtomsOfAtomicNo(mProduct, productAtom, 8))
			return false;

		// If the number of neighbours at an atom behind an oxygen changes, then the oxygen itself may in question
		if (mReactant.getAtomicNo(reactantConn) == 8
		 && mReactant.getConnAtoms(reactantConn) == 2
		 && mProduct.getConnAtoms(productConn) == 2) {
//			int reactantConnConn = mReactant.getConnAtom(reactantConn, mReactant.getConnAtom(reactantConn, 0) == reactantAtom ? 1 : 0);
//			int productConnConn = mProduct.getConnAtom(productConn, mProduct.getConnAtom(productConn, 0) == productAtom ? 1 : 0);
//			if (mReactant.getConnAtoms(reactantConnConn) != mProduct.getConnAtoms(productConnConn))
//				return false;

			int reactantConnIndex = mReactant.getConnAtom(reactantConn, 0) == reactantAtom ? 0 : 1;
			int productConnIndex = mProduct.getConnAtom(productConn, 0) == productAtom ? 0 : 1;
			if (mSimilarityComparator.compare(mReactantSkelAtomEnv[reactantConn][reactantConnIndex][3],
											  mProductSkelAtomEnv[productConn][productConnIndex][3]) != 0)
				return false;
			}

/* strangely this causes lots of false mappings
		// attached hydroxies may not be the same
		if (mReactant.getAtomPi(reactantAtom) == 0
		 && mReactant.getAtomicNo(reactantConn) == 8
		 && mReactant.getAtomPi(reactantConn) == 0
		 && mReactant.getConnAtoms(reactantConn) == 1
		 && mProduct.getConnAtoms(productConn) > 1)
			return false;
		if (mProduct.getAtomPi(productAtom) == 0
		 && mProduct.getAtomicNo(productConn) == 8
		 && mProduct.getAtomPi(productConn) == 0
		 && mProduct.getConnAtoms(productConn) == 1
		 && mReactant.getConnAtoms(reactantConn) > 1)
			return false;
*/
		// imine formation
		if (mReactant.getAtomicNo(reactantConn) == 7
		 && connAtomsOfAtomicNo(mReactant, reactantAtom, 7) < connAtomsOfAtomicNo(mProduct, productAtom, 7))
			return false;
		if (mProduct.getAtomicNo(productConn) == 7
		 && connAtomsOfAtomicNo(mReactant, reactantAtom, 7) > connAtomsOfAtomicNo(mProduct, productAtom, 7))
			return false;

		// if a 3-membered ring is formed or broken
		if (mReactant.getBondRingSize(mReactant.getBond(reactantAtom, reactantConn)) == 3
		  ^ mProduct.getBondRingSize(mProduct.getBond(productAtom, productConn)) == 3)
			return false;

		return true;
		}

	private boolean passesSimilarityDependentRules(int reactantAtom, int reactantConn, int productAtom, int productConn, int similarity, boolean isStereoMatch) {
		if (mReactant.getAtomicNo(reactantConn) == 8
		 && mReactant.getAtomPi(reactantConn) == 0
		 && hasOxo(mReactant, reactantAtom)
		 && hasOxo(mProduct, productAtom)
		 && similarity != MAX_ENVIRONMENT_SIMILARITY)
			return false;

		// Since carboxylates,sulfonates, etc are potential leaving groups, we dont follow a bond to oxygen,
		// if on the other side is a keto on the reactant side and similarity is not high
		if (mReactant.getAtomicNo(reactantConn) == 8
		 && mReactant.getConnAtoms(reactantConn) == 2
		 && hasOxo(mReactant, getNextNeighbour(mReactant, reactantAtom, reactantConn))
		 && similarity < (3 << SIMILARITY_SHIFT))
			return false;

		// A bond from oxygen to keto (C=O, S=O, C=N, etc) won't be followed,
		// unless the first target fragment shells are identical
		if (mReactant.getAtomicNo(reactantAtom) == 8
		 && (hasOxo(mReactant, reactantConn)
		  || hasOxo(mProduct, productConn))
		 && similarity < (2 << SIMILARITY_SHIFT))
			return false;

		if (!isStereoMatch
		 && (mReactant.getAtomicNo(reactantConn) != 6
		  || !hasNonCarbonNeighbour(mReactant, reactantAtom)))
			return false;

		// potential replacement of -O-R (R:H,C,any) at non-carbon atoms
		if (mReactant.getAtomicNo(reactantConn) == 8
		 && mReactant.getAtomicNo(reactantAtom) != 6
		 && similarity != MAX_ENVIRONMENT_SIMILARITY)
			return false;

		if (mReactant.getAtomicNo(reactantAtom) == 5
		 && mReactant.getAtomicNo(reactantConn) == 6
		 && similarity < (3 << SIMILARITY_SHIFT))
			return false;

		return true;
		}

	/**
	 * Returns true, if we have no stereo situation or if the stereo center (or EZ-bond) is retained
	 * by continuing the graph with the candidate.
	 * @param reactantParent
	 * @param reactantAtom
	 * @param reactantConn
	 * @param productParent
	 * @param productAtom
	 * @param productConn
	 * @return
	 */
	private boolean matchesStereo(int reactantParent, int reactantAtom, int reactantConn, int productParent, int productAtom, int productConn) {
		if (mReactant.getConnAtoms(reactantAtom) == 3
		 && (mReactant.getAtomParity(reactantAtom) == Molecule.cAtomParity1 || mReactant.getAtomParity(reactantAtom) == Molecule.cAtomParity2)
		 && mProduct.getConnAtoms(productAtom) == 3
		 && (mProduct.getAtomParity(productAtom) == Molecule.cAtomParity1 || mProduct.getAtomParity(productAtom) == Molecule.cAtomParity2)) {
			boolean reactantInversion = (reactantParent > reactantConn);
			int lastNeighbour = -1;
			for (int i=0; i reactantConn && lastNeighbour < reactantParent)
					 || (lastNeighbour < reactantConn && lastNeighbour > reactantParent))
						reactantInversion = !reactantInversion;
					break;
					}
				}
			boolean productInversion = (productParent > productConn);
			for (int i=0; i productConn && lastNeighbour < productParent)
					 || (lastNeighbour < productConn && lastNeighbour > productParent))
						productInversion = !productInversion;
					break;
					}
				}

			return (reactantInversion == productInversion)
				== (mReactant.getAtomParity(reactantAtom) == mProduct.getAtomParity(productAtom));
			}

		return true;
		}

	public static boolean hasOxo(StereoMolecule mol, int atom) {
		for (int i=0; i 6)
				return true;
		return false;
		}

	public static boolean hasOxo(StereoMolecule mol, int atom, int notThisAtom) {
		for (int i=0; i 6
			 && mol.getConnAtom(atom, i) != notThisAtom)
				return true;
		return false;
		}

	public static boolean hasNonCarbonNeighbour(StereoMolecule mol, int atom) {
		for (int i=0; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy