All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.mcs.MCS Maven / Gradle / Ivy

There is a newer version: 2024.12.1
Show newest version
package com.actelion.research.chem.mcs;

import com.actelion.research.chem.*;
import com.actelion.research.util.datamodel.IntVec;

import java.util.*;

/*
* Copyright (c) 1997 - 2016
* Actelion Pharmaceuticals Ltd.
* Gewerbestrasse 16
* CH-4123 Allschwil, Switzerland
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
*    list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
*    this list of conditions and the following disclaimer in the documentation
*    and/or other materials provided with the distribution.
* 3. Neither the name of the the copyright holder nor the
*    names of its contributors may be used to endorse or promote products
*    derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
public class MCS
{
	public static final int PAR_CLEAVE_RINGS=0; 
	public static final int PAR_KEEP_RINGS=1;
	public static final int PAR_KEEP_AROMATIC_RINGS=2; 
	private static final boolean DEBUG = false;
	private static final int CAPACITY = (60+60)*10; 
	private StereoMolecule mol;
	private StereoMolecule frag;
	private HashSet hsIndexFragCandidates;
	private HashSet hsIndexFragGarbage;
	private HashSet hsIndexFragSolution;
	private SSSearcher sss;
	private ComparatorBitsSet comparatorBitsSet;
	private StereoMolecule molMCS;
	private boolean considerAromaticRings;
	private boolean considerRings;
	// The largest valid solutions.
	private List liMCSSolutions;
	private HashMap> hmRingBnd_ListRingBnds;
	private HashMap> hmAromaticRingBnd_ListRingBnds;
	private RingCollection ringCollection;
    boolean excluded[] = null;

    public MCS()
    {
        this(PAR_CLEAVE_RINGS,null);
	}
	
    public MCS(int ringStatus)
    {
        this(ringStatus,null);
    }
		
    public MCS(int ringStatus,SSSearcher searcher)
    {
		considerRings=false;
		considerAromaticRings=false;
		switch (ringStatus) {
		case PAR_CLEAVE_RINGS:
			break;
		case PAR_KEEP_RINGS:
			considerRings=true;
			break;
		case PAR_KEEP_AROMATIC_RINGS:
			considerAromaticRings=true;
			break;
		default:
			break;
		}
		hsIndexFragCandidates = new HashSet(CAPACITY);
		hsIndexFragGarbage = new HashSet(CAPACITY);
		hsIndexFragSolution = new HashSet(CAPACITY);
		liMCSSolutions = new ArrayList(CAPACITY);
        if (searcher == null)
		sss = new SSSearcher();
        else
            sss = searcher;
		comparatorBitsSet = new ComparatorBitsSet();
		hmRingBnd_ListRingBnds = new HashMap>();
		hmAromaticRingBnd_ListRingBnds = new HashMap>();
	}
	
    public void setSSSearcher(SSSearcher sss)
    {
        this.sss  = sss;
    }

	/**
	 * mol should contain equal or more bonds than frag.
	 * @param mol
	 * @param frag
	 */
    public void set(StereoMolecule mol, StereoMolecule frag)
    {
        set(mol,frag,null);
	}

	/**
	 * mol should contain equal or more bonds than frag.
	 * If frag contains more than one molecule only the biggest one is considered.
	 * @param mol
	 * @param frag
	 * @param excluded
	 */
    public void set(StereoMolecule mol, StereoMolecule frag, boolean excluded[])
    {

		StereoMolecule fragBiggestSub = new StereoMolecule(frag);

		fragBiggestSub.ensureHelperArrays(Molecule.cHelperRings);

		fragBiggestSub.stripSmallFragments();

		fragBiggestSub.ensureHelperArrays(Molecule.cHelperRings);

		this.mol = mol;

        this.frag = fragBiggestSub;

		this.excluded = excluded;

        init();
    }

    private void init()
    {
		hsIndexFragCandidates.clear();
		hsIndexFragGarbage.clear();
		hsIndexFragSolution.clear();
		liMCSSolutions.clear();
		hmRingBnd_ListRingBnds.clear();
		hmAromaticRingBnd_ListRingBnds.clear();
		initCandidates();
	}
	
    private void initCandidates()
    {
		ringCollection = frag.getRingSet();
		int rings = ringCollection.getSize();
		for (int i = 0; i < rings; i++) {
			int [] arrIndexBnd = ringCollection.getRingBonds(i);
			for (int j = 0; j < arrIndexBnd.length; j++) {
				if(!hmRingBnd_ListRingBnds.containsKey(arrIndexBnd[j])){
					hmRingBnd_ListRingBnds.put(arrIndexBnd[j], new ArrayList());
				}
				List li = hmRingBnd_ListRingBnds.get(arrIndexBnd[j]);
				li.add(arrIndexBnd);
			}
			if(ringCollection.isAromatic(i)){
				for (int j = 0; j < arrIndexBnd.length; j++) {
					if(!hmAromaticRingBnd_ListRingBnds.containsKey(arrIndexBnd[j])){
						hmAromaticRingBnd_ListRingBnds.put(arrIndexBnd[j], new ArrayList());
					}
					List li = hmAromaticRingBnd_ListRingBnds.get(arrIndexBnd[j]);
					li.add(arrIndexBnd);
				}
			}
		}
		// Array length for bit list.
		int nInts = (int)(((double)frag.getBonds() / Integer.SIZE) + ((double)(Integer.SIZE-1)/Integer.SIZE));
		// The vector is used as bit vector.
		// For each bond one bit.
		for (int i = 0; i < frag.getBonds(); i++) {
			IntVec iv = new IntVec(nInts);
			setBitAndAddRelatedRingBonds(i, iv);
			hsIndexFragCandidates.add(iv);
		}
	}
	
    private void setBitAndAddRelatedRingBonds(int bit, IntVec iv)
    {
		if(!considerAromaticRings && !considerRings) {
			iv.setBit(bit);
		} else if(considerRings) {
			iv.setBit(bit);
			if(hmRingBnd_ListRingBnds.containsKey(bit)){
				List li = hmRingBnd_ListRingBnds.get(bit);
				for (int i = 0; i < li.size(); i++) {
					int [] a = li.get(i);
					for (int j = 0; j < a.length; j++) {
						iv.setBit(a[j]);
					}
				}
			}
		} else if(considerAromaticRings) {
			iv.setBit(bit);
			if(hmAromaticRingBnd_ListRingBnds.containsKey(bit)){
				List li = hmAromaticRingBnd_ListRingBnds.get(bit);
				for (int i = 0; i < li.size(); i++) {
					int [] a = li.get(i);
					for (int j = 0; j < a.length; j++) {
						iv.setBit(a[j]);
					}
				}
			}
		}
		iv.calculateHashCode();
	}
	
	/**
	 * Checks first fragment for being sub structure of molecule. If true, frag is returned.
	 * The returned IntVec contains the bond information of frag. The IntVec is used bit wise. Each bit corresponds to
	 * one bond in the fragment. The information for the matching substructure in the molecule is not contained in the
	 * IntVec.
     *
	 * @return
	 */
    private List getAllSolutionsForCommonSubstructures()
    {
		sss.setMolecule(mol);
		frag.setFragment(true);
		sss.setFragment(frag);
		//
		// The MCS is the complete fragment.
		//
		try {
            if (sss.findFragmentInMolecule(SSSearcher.cCountModeOverlapping, SSSearcher.cMatchDBondToDelocalized, excluded) > 0) {
				molMCS = frag;
				List liIndexFragCandidates = new ArrayList(hsIndexFragCandidates);
                if (liIndexFragCandidates != null && !liIndexFragCandidates.isEmpty()) {
					IntVec iv = liIndexFragCandidates.get(0);
					iv.setBits(0, iv.sizeBits());
					iv.calculateHashCode();
					List li = new ArrayList();
					li.add(iv);
					return li;
				}
            }
		} catch (Exception e) {
			e.printStackTrace();
		}
		int maxSizeCandidates = 0;
		while(!hsIndexFragCandidates.isEmpty()){
			List liIndexFragCandidates = new ArrayList(hsIndexFragCandidates);
			Collections.sort(liIndexFragCandidates, comparatorBitsSet);
			// Get largest mcs.
			IntVec iv = liIndexFragCandidates.get(liIndexFragCandidates.size()-1);
			hsIndexFragCandidates.remove(iv);
			if(DEBUG) {
				System.out.println("Bits set " + iv.getBitsSet());
				if(iv.getBitsSet() == frag.getBonds()){
					System.out.println("Full structure in iv.");
				}
			}
			StereoMolecule fragSub = getSubFrag(frag, iv);
			sss.setFragment(fragSub);
			if(sss.findFragmentInMolecule(SSSearcher.cCountModeOverlapping, SSSearcher.cDefaultMatchMode,excluded)>0){
				hsIndexFragSolution.add(iv);
				removeAllSubSolutions(iv);
				if(iv.getBitsSet() != frag.getBonds()) {
					List liIV = getAllPlusOneAtomCombinations(iv, frag);
					for (IntVec ivPlus : liIV) {
						if(DEBUG) {
							if(ivPlus.getBitsSet() == frag.getBonds()){
								System.out.println("Full structure in ivPlus.");
							}
						}
						if((!hsIndexFragGarbage.contains(ivPlus)) && (!hsIndexFragSolution.contains(ivPlus))){
							hsIndexFragCandidates.add(ivPlus);	
						}
					}
                    if (DEBUG) {
						System.out.println("tsIndexFragCandidates " + hsIndexFragCandidates.size());
				}
                }
                if (maxSizeCandidates < hsIndexFragCandidates.size()) {
					maxSizeCandidates = hsIndexFragCandidates.size();
                }
			} else {
				hsIndexFragGarbage.add(iv);
			}
		}
		if(hsIndexFragSolution.size()==0){
			return null;
		}
		return getFinalSolutionSet(hsIndexFragSolution);
	}
	
	/**
	 * All molecules which are sub structures of an other molecule in the list are removed.
     *
	 * @return
	 */
    public LinkedList getAllCommonSubstructures()
    {
		List liIndexFragSolution = getAllSolutionsForCommonSubstructures();
		if(liIndexFragSolution==null){
			return null;
		}
		Collections.sort(liIndexFragSolution, comparatorBitsSet);
		IntVec ivMCS = liIndexFragSolution.get(liIndexFragSolution.size()-1);
		molMCS = getSubFrag(frag, ivMCS);
		List li = new ArrayList();
		for (IntVec iv : liIndexFragSolution) {
			li.add(getSubFrag(frag, iv));
		}
		return ExtendedMoleculeFunctions.removeSubStructures(li);
	}

	/**
	 * @return maximum common substructure at top of list or null of none common MCS was found.
	 */
    public StereoMolecule getMCS()
    {
		List liIndexFragSolution = getAllSolutionsForCommonSubstructures ();
		if(liIndexFragSolution==null){
			return null;
		}
		Collections.sort(liIndexFragSolution, comparatorBitsSet);
		IntVec ivMCS = liIndexFragSolution.get(liIndexFragSolution.size()-1);
		molMCS = getSubFrag(frag, ivMCS);
		return molMCS;
	}
	
	/**
	 * Calculates the bond arrays for molecule and fragment for their maximum common substructure.
     *
	 * @param arrBondMCSMol result array. Null or initialized with the length number of bonds in molecule. 
	 * @param arrBondFrag result array. Null or initialized with the length number of bonds in fragment.
	 * @return two arrays, in the first array the bits are set 'true' which corresponds to the bonds for maximum common substructure in the molecule. 
	 * In the second are the bits set 'true' which corresponds to the bonds for maximum common substructure in the fragment.
	 */
    public boolean[][] getMCSBondArray(boolean[] arrBondMCSMol, boolean[] arrBondFrag)
    {
		boolean [][] arrBondMol_Result = new boolean [2][];
		List liIndexFragSolution = getAllSolutionsForCommonSubstructures();
		if(liIndexFragSolution==null){
			return null;
		}
		Collections.sort(liIndexFragSolution, comparatorBitsSet);
		// Get largest mcs.
		IntVec ivMCSLargest = liIndexFragSolution.get(liIndexFragSolution.size()-1);
		int bonds = frag.getBonds();
		if(arrBondFrag == null){
			arrBondFrag = new boolean [bonds];
		} else {
			Arrays.fill(arrBondFrag, false);
		}
		for (int i = 0; i < bonds; i++) {
			if(ivMCSLargest.isBitSet(i)){
				arrBondFrag[i]=true;
			}
		}		
		StereoMolecule fragSub = getSubFrag(frag, ivMCSLargest);
		sss.setFragment(fragSub);
		//
		// The substructure has to be searched in the molecule because the mapping indices are not contained in the
		// IntVec that is the solution from the MCS search.
		if(sss.findFragmentInMolecule(SSSearcher.cCountModeOverlapping, SSSearcher.cDefaultMatchMode,excluded)>0){
			ArrayList liMatchSubFrag2Mol = sss.getMatchList();
			int [] arrMatchListFrag2Mol = getMappedMatchListFrag2Mol(fragSub, liMatchSubFrag2Mol.get(0));
			int bondsMol = mol.getBonds();
			if(arrBondMCSMol==null) {
				arrBondMCSMol = new boolean [bondsMol];
			} else {
				Arrays.fill(arrBondMCSMol, false);
			}
			getBondArrayMolecule(arrMatchListFrag2Mol, arrBondFrag, arrBondMCSMol);	
		}
		arrBondMol_Result[0]=arrBondMCSMol;
		arrBondMol_Result[1]=arrBondFrag;
		return arrBondMol_Result;
	}

    private int[] getMappedMatchListFrag2Mol(StereoMolecule fragSub, int[] arrMatchListSubFrag2Mol)
    {
		int [] arrMatchListFrag2Mol = new int [frag.getAtoms()];
//        SSSearcher sss = new SSSearcher();
		sss.setMol(fragSub, frag);
		sss.findFragmentInMolecule(SSSearcher.cCountModeOverlapping, SSSearcher.cDefaultMatchMode, null);
		ArrayList liMatchSubFrag2Mol = sss.getMatchList();
		int [] arrMatchListSubFrag2Frag = liMatchSubFrag2Mol.get(0);
		HashMap hmIndexSubFrag_IndexFrag = new HashMap();
		for (int i = 0; i < arrMatchListSubFrag2Frag.length; i++) {
			hmIndexSubFrag_IndexFrag.put(i, arrMatchListSubFrag2Frag[i]);
		}
		for (int i = 0; i < arrMatchListSubFrag2Mol.length; i++) {
			int indexAtSubFragment = i;
			int indexAtFragment = hmIndexSubFrag_IndexFrag.get(indexAtSubFragment);
			arrMatchListFrag2Mol[indexAtFragment]=arrMatchListSubFrag2Mol[indexAtSubFragment];
		}
		return arrMatchListFrag2Mol;
	}
	
    private boolean[] getBondArrayMolecule(int[] arrMatchFragment2Mol, boolean[] arrBondMCSFrag, boolean[] arrBondMCSMol)
    {
		for (int i = 0; i < arrMatchFragment2Mol.length; i++) {
			int indexAtFrag = i;
			int indexAtMol = arrMatchFragment2Mol[i];
			int nConn2Frag = frag.getConnAtoms(indexAtFrag);
			for (int j = 0; j < nConn2Frag; j++) {
				int indexAtFragConn = frag.getConnAtom(indexAtFrag, j);
				int indexBondFrag = frag.getBond(indexAtFrag, indexAtFragConn);
				if(arrBondMCSFrag[indexBondFrag]){
					int indexAtMolConn = arrMatchFragment2Mol[indexAtFragConn];
					int indexBondMol = mol.getBond(indexAtMol, indexAtMolConn);
                    if (indexBondMol > -1) {
						arrBondMCSMol[indexBondMol]=true;
                    }
				}
			}
		}
		return arrBondMCSMol;
	}

// The original procedure missed atom charge, mass, etc, which is particularly annoying,
// when providing a custom SSSearcher(), which matches these properties. TLS 11Feb2021
	private static StereoMolecule getSubFrag(StereoMolecule frag, IntVec iv) {
		boolean[] isFragmentAtom = new boolean[frag.getAtoms()];
		int atoms = 0;
		int bonds = frag.getBonds();
		for (int bond=0; bond hsAtomIndex = new HashSet();
		for (int i = 0; i < bonds; i++) {
			if(iv.isBitSet(i)){
				int indexAtom1 = frag.getBondAtom(0, i);
				int indexAtom2 = frag.getBondAtom(1, i);
				hsAtomIndex.add(indexAtom1);
				hsAtomIndex.add(indexAtom2);
			}
		}		
		StereoMolecule fragSubBonds = new StereoMolecule(hsAtomIndex.size(), bonds);
		fragSubBonds.setFragment(true);
		int [] arrMapAtom = new int [frag.getAtoms()];
		ArrayUtilsCalc.set(arrMapAtom, -1);
		for (int indexAtom : hsAtomIndex) {
			int indexAtomNew = fragSubBonds.addAtom(frag.getAtomicNo(indexAtom));
			fragSubBonds.setAtomX(indexAtomNew, frag.getAtomX(indexAtom));
			fragSubBonds.setAtomY(indexAtomNew, frag.getAtomY(indexAtom));
			fragSubBonds.setAtomZ(indexAtomNew, frag.getAtomZ(indexAtom));
			arrMapAtom[indexAtom]=indexAtomNew;
		}
		for (int i = 0; i < bonds; i++) {
			if(iv.isBitSet(i)){
				int indexAtom1 = frag.getBondAtom(0, i);
				int indexAtom2 = frag.getBondAtom(1, i);
				int indexAtomNew1 = arrMapAtom[indexAtom1];
				int indexAtomNew2 = arrMapAtom[indexAtom2];
				int type = frag.getBondType(i);
				if(frag.isDelocalizedBond(i)){
					type = Molecule.cBondTypeDelocalized;
					// fragSubBonds.setBondQueryFeature(bondIndexNew, Molecule.cBondQFDelocalized, true);
				}
				// int bondIndexNew = fragSubBonds.addBond(indexAtomNew1, indexAtomNew2, type);
				fragSubBonds.addBond(indexAtomNew1, indexAtomNew2, type);
			}
		}
		fragSubBonds.ensureHelperArrays(Molecule.cHelperRings);
		return fragSubBonds;
	}*/
	
    private List getAllPlusOneAtomCombinations(IntVec iv, StereoMolecule frag)
    {
		int bonds = frag.getBonds();
		List liIntVec = new ArrayList();
		HashSet hsAtomIndex = new HashSet();
		for (int i = 0; i < bonds; i++) {
			if(iv.isBitSet(i)){
				int indexAt1 = frag.getBondAtom(0, i);
				int indexAt2 = frag.getBondAtom(1, i);
				hsAtomIndex.add(indexAt1);
				hsAtomIndex.add(indexAt2);
			}
		}
		for (int i = 0; i < bonds; i++) {
			if(!iv.isBitSet(i)){
				int indexAt1 = frag.getBondAtom(0, i);
				int indexAt2 = frag.getBondAtom(1, i);
				if(hsAtomIndex.contains(indexAt1) || hsAtomIndex.contains(indexAt2)) {
					IntVec ivPlus = new IntVec(iv.get());
					setBitAndAddRelatedRingBonds(i, ivPlus);
					liIntVec.add(ivPlus);
				}
			}
		}
		return liIntVec;
	}

	/**
	 * Removes all sub-solutions from hsIndexFragCandidates.
     *
	 * @param ivSolution
	 */
    private void removeAllSubSolutions(IntVec ivSolution)
    {
		List liIndexFragSolution = new ArrayList(hsIndexFragCandidates);
		for (IntVec ivCandidate : liIndexFragSolution) {
			if(isCandidateInSolution(ivSolution, ivCandidate)) {
				hsIndexFragCandidates.remove(ivCandidate);
			}
		}
	}
	
	/**
	 * All sub solutions are removed.
     *
	 * @param hsIndexFragSolution
	 * @return
	 */
    private static List getFinalSolutionSet(HashSet hsIndexFragSolution)
    {
		List liIndexFragSolution = new ArrayList(hsIndexFragSolution);
		for (int i = liIndexFragSolution.size()-1; i >= 0; i--) {
			IntVec ivCandidate = liIndexFragSolution.get(i);
			for (int j = 0; j < liIndexFragSolution.size(); j++) {
				IntVec ivSolution = liIndexFragSolution.get(j);
				if(i!=j){
					if(isCandidateInSolution(ivSolution, ivCandidate)) {
						liIndexFragSolution.remove(i);
						break;
					}
				}
			}
		}
		return liIndexFragSolution;
	}
	
    private static final boolean isCandidateInSolution(IntVec ivSolution, IntVec ivCandidate)
    {
		IntVec iv = IntVec.OR(ivSolution, ivCandidate);
		if(iv.equals(ivSolution)){
			return true;
		}
		return false;
	}
	
    private static class ComparatorBitsSet implements Comparator
    {
        public int compare(IntVec iv1, IntVec iv2)
        {
			int bits1 = iv1.getBitsSet();
			int bits2 = iv2.getBitsSet();
			if(bits1>bits2){
				return 1;
			} else if(bits1




© 2015 - 2025 Weber Informatics LLC | Privacy Policy