All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.mcs.MCSFast Maven / Gradle / Ivy

There is a newer version: 2024.11.2
Show newest version
package com.actelion.research.chem.mcs;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;

import com.actelion.research.calc.ArrayUtilsCalc;
import com.actelion.research.chem.ExtendedMoleculeFunctions;
import com.actelion.research.chem.Molecule;
import com.actelion.research.chem.RingCollection;
import com.actelion.research.chem.SSSearcher;
import com.actelion.research.chem.StereoMolecule;
import com.actelion.research.util.datamodel.IntVec;

/*
* Copyright (c) 1997 - 2016
* Actelion Pharmaceuticals Ltd.
* Gewerbestrasse 16
* CH-4123 Allschwil, Switzerland
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
*    list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
*    this list of conditions and the following disclaimer in the documentation
*    and/or other materials provided with the distribution.
* 3. Neither the name of the the copyright holder nor the
*    names of its contributors may be used to endorse or promote products
*    derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
public class MCSFast {
	
	public static final int PAR_CLEAVE_RINGS=0; 
	
	public static final int PAR_KEEP_RINGS=1;
	
	public static final int PAR_KEEP_AROMATIC_RINGS=2; 
	
	private static final boolean DEBUG = false;
	
	private static final int CAPACITY = (60+60)*10; 
	
	private StereoMolecule mol;
	
	private StereoMolecule frag;
	
	private HashSet hsIndexFragCandidates;
	
	private HashSet hsIndexFragGarbage;
	
	private HashSet hsIndexFragSolution;
	
	private SSSearcher sss;
	
	private ComparatorBitsSet comparatorBitsSet;
	
	private StereoMolecule molMCS;
	
	private boolean considerAromaticRings;
	
	private boolean considerRings;
	
	// The largest valid solutions.
	private List liMCSSolutions;
	
	private HashMap> hmRingBnd_ListRingBnds;
	
	private HashMap> hmAromaticRingBnd_ListRingBnds;
	
	private RingCollection ringCollection;
	
	public MCSFast() {
		this(PAR_CLEAVE_RINGS);
	}
	
	public MCSFast(int ringStatus) {
		
		considerRings=false;
		
		considerAromaticRings=false;
		
		switch (ringStatus) {
		case PAR_CLEAVE_RINGS:
			break;
		case PAR_KEEP_RINGS:
			considerRings=true;
			break;
		case PAR_KEEP_AROMATIC_RINGS:
			considerAromaticRings=true;
			break;

		default:
			break;
		}
		
		hsIndexFragCandidates = new HashSet(CAPACITY);
		
		hsIndexFragGarbage = new HashSet(CAPACITY);
		
		hsIndexFragSolution = new HashSet(CAPACITY);
		
		liMCSSolutions = new ArrayList(CAPACITY);
		
		sss = new SSSearcher();
		
		comparatorBitsSet = new ComparatorBitsSet();
		
		hmRingBnd_ListRingBnds = new HashMap>();
		
		hmAromaticRingBnd_ListRingBnds = new HashMap>();
	}
	
	public void set(StereoMolecule mol, StereoMolecule frag) {
		
		this.mol = mol;
		
		this.frag = frag;
		
		init();
	}
	
	
	private void init(){
		
		hsIndexFragCandidates.clear();
		
		hsIndexFragGarbage.clear();
		
		hsIndexFragSolution.clear();
		
		liMCSSolutions.clear();
		
		hmRingBnd_ListRingBnds.clear();
		
		hmAromaticRingBnd_ListRingBnds.clear();
		
		initCandidates();
		
	}
	
	private void initCandidates(){
		
		ringCollection = frag.getRingSet();
		
		int rings = ringCollection.getSize();
						
		for (int i = 0; i < rings; i++) {
			
			int [] arrIndexBnd = ringCollection.getRingBonds(i);
			
			for (int j = 0; j < arrIndexBnd.length; j++) {
				
				if(!hmRingBnd_ListRingBnds.containsKey(arrIndexBnd[j])){
					
					hmRingBnd_ListRingBnds.put(arrIndexBnd[j], new ArrayList());
					
				}
				
				List li = hmRingBnd_ListRingBnds.get(arrIndexBnd[j]);
				
				li.add(arrIndexBnd);
				
			}
			
			if(ringCollection.isAromatic(i)){
				
				for (int j = 0; j < arrIndexBnd.length; j++) {
					
					if(!hmAromaticRingBnd_ListRingBnds.containsKey(arrIndexBnd[j])){
						
						hmAromaticRingBnd_ListRingBnds.put(arrIndexBnd[j], new ArrayList());
						
					}
					
					List li = hmAromaticRingBnd_ListRingBnds.get(arrIndexBnd[j]);
					
					li.add(arrIndexBnd);
					
				}
			}
			
			
		}
		
		// Array length for bit list.
		int nInts = (int)(((double)frag.getBonds() / Integer.SIZE) + ((double)(Integer.SIZE-1)/Integer.SIZE));
		
		// The vector is used as bit vector.
		// For each bond one bit.
		for (int i = 0; i < frag.getBonds(); i++) {
			
			IntVec iv = new IntVec(nInts);
			
			setBitAndAddRelatedRingBonds(i, iv);
			
			hsIndexFragCandidates.add(iv);
			
		}
	}
	
	
	private void setBitAndAddRelatedRingBonds(int bit, IntVec iv){
		if(!considerAromaticRings && !considerRings) {
			iv.setBit(bit);
			
		} else if(considerRings) {
			
			iv.setBit(bit);
			
			if(hmRingBnd_ListRingBnds.containsKey(bit)){
				List li = hmRingBnd_ListRingBnds.get(bit);
				
				for (int i = 0; i < li.size(); i++) {
					int [] a = li.get(i);
					
					for (int j = 0; j < a.length; j++) {
						iv.setBit(a[j]);
					}
				}
				
			}
			
		} else if(considerAromaticRings) {
			
			iv.setBit(bit);
			if(hmAromaticRingBnd_ListRingBnds.containsKey(bit)){
				
				List li = hmAromaticRingBnd_ListRingBnds.get(bit);
				
				for (int i = 0; i < li.size(); i++) {
					int [] a = li.get(i);
					
					for (int j = 0; j < a.length; j++) {
						iv.setBit(a[j]);
					}
				}
			}
		}
		
		iv.calculateHashCode();
	}
	
	/**
	 * Checks first fragment for being sub structure of molecule. If true, frag is returned.
	 * @return
	 */
	private List getAllSolutionsForCommonSubstructures (){
		
		sss.setMolecule(mol);
		
		frag.setFragment(true);
		
		sss.setFragment(frag);
		
		// The MCS is the complete fragment.
		try {
			if(sss.findFragmentInMolecule()>0){
				molMCS = frag;
				
				List liIndexFragCandidates = new ArrayList(hsIndexFragCandidates);
				
				IntVec iv = liIndexFragCandidates.get(0);
				
				iv.setBits(0, iv.sizeBits());
				
				iv.calculateHashCode();
				
				List li = new ArrayList();
				
				li.add(iv);
				
				return li;
			}
		} catch (Exception e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
				
		int maxSizeCandidates = 0;
		
		while(!hsIndexFragCandidates.isEmpty()){
			
			List liIndexFragCandidates = new ArrayList(hsIndexFragCandidates);
			
			Collections.sort(liIndexFragCandidates, comparatorBitsSet);
			
			IntVec iv = liIndexFragCandidates.get(liIndexFragCandidates.size()-1);
			
			hsIndexFragCandidates.remove(iv);
			
			if(DEBUG) {
				System.out.println("Bits set " + iv.getBitsSet());
				
				if(iv.getBitsSet() == frag.getBonds()){
					System.out.println("Full structure in iv.");
				}
			}
			
			StereoMolecule fragSub = getSubFrag(frag, iv);
			
			sss.setFragment(fragSub);
			
			if(sss.findFragmentInMolecule()>0){

				hsIndexFragSolution.add(iv);

				removeAllSubSolutions(iv);
				
				if(iv.getBitsSet() != frag.getBonds()) {
					List liIV = getAllPlusOneAtomCombinations(iv, frag);
					
					for (IntVec ivPlus : liIV) {
						
						if(DEBUG) {
							if(ivPlus.getBitsSet() == frag.getBonds()){
								System.out.println("Full structure in ivPlus.");
							}
						}
						
						if((!hsIndexFragGarbage.contains(ivPlus)) && (!hsIndexFragSolution.contains(ivPlus))){
							
							hsIndexFragCandidates.add(ivPlus);	
						}
						
					}
					
					if(DEBUG)
						System.out.println("tsIndexFragCandidates " + hsIndexFragCandidates.size());
				}
				
				if(maxSizeCandidates < hsIndexFragCandidates.size())
					maxSizeCandidates = hsIndexFragCandidates.size();
				
			} else {
				hsIndexFragGarbage.add(iv);
			}
		}
		
		if(hsIndexFragSolution.size()==0){
			return null;
		}
		
		return getFinalSolutionSet(hsIndexFragSolution);
	}
	
	/**
	 * All molecules which are sub structures of an other molecule in the list are removed.
	 * @return
	 */
	public List getAllCommonSubstructures (){
		
		List liIndexFragSolution = getAllSolutionsForCommonSubstructures();
		
		if(liIndexFragSolution==null){
			return null;
		}
		
		Collections.sort(liIndexFragSolution, comparatorBitsSet);
		
		IntVec ivMCS = liIndexFragSolution.get(liIndexFragSolution.size()-1);
		
		molMCS = getSubFrag(frag, ivMCS);
		
		List li = new ArrayList();
		
		for (IntVec iv : liIndexFragSolution) {
			li.add(getSubFrag(frag, iv));
		}
		
		return ExtendedMoleculeFunctions.removeSubStructures(li);
	}

	/**
	 * 
	 * @return maximum common substructure at top of list or null of none common MCS was found.
	 */
	public StereoMolecule getMCS(){
		
		List liIndexFragSolution = getAllSolutionsForCommonSubstructures ();
		
		if(liIndexFragSolution==null){
			return null;
		}
		
		Collections.sort(liIndexFragSolution, comparatorBitsSet);
		
		IntVec ivMCS = liIndexFragSolution.get(liIndexFragSolution.size()-1);
		
		molMCS = getSubFrag(frag, ivMCS);
		
		return molMCS;
	}


	private static StereoMolecule getSubFrag(StereoMolecule frag, IntVec iv){
		
		int bonds = frag.getBonds();
		
		HashSet hsAtomIndex = new HashSet();
		for (int i = 0; i < bonds; i++) {
			if(iv.isBitSet(i)){
				
				int indexAtom1 = frag.getBondAtom(0, i);
				int indexAtom2 = frag.getBondAtom(1, i);
				
				hsAtomIndex.add(indexAtom1);
				
				hsAtomIndex.add(indexAtom2);
			}
		}		
		
		StereoMolecule fragSubBonds = new StereoMolecule(hsAtomIndex.size(), bonds);
		
		fragSubBonds.setFragment(true);
		
		int [] arrMapAtom = new int [frag.getAtoms()];
		
		ArrayUtilsCalc.set(arrMapAtom, -1);
		
		for (int indexAtom : hsAtomIndex) {
			
			int indexAtomNew = fragSubBonds.addAtom(frag.getAtomicNo(indexAtom));
			
			fragSubBonds.setAtomX(indexAtomNew, frag.getAtomX(indexAtom));
			fragSubBonds.setAtomY(indexAtomNew, frag.getAtomY(indexAtom));
			fragSubBonds.setAtomZ(indexAtomNew, frag.getAtomZ(indexAtom));
			
			arrMapAtom[indexAtom]=indexAtomNew;
		}
		
		for (int i = 0; i < bonds; i++) {
			if(iv.isBitSet(i)){
				
				int indexAtom1 = frag.getBondAtom(0, i);
				int indexAtom2 = frag.getBondAtom(1, i);
				
				int indexAtomNew1 = arrMapAtom[indexAtom1];
				int indexAtomNew2 = arrMapAtom[indexAtom2];
				
				int type = frag.getBondType(i);
				
				if(frag.isDelocalizedBond(i)){
					type = Molecule.cBondTypeDelocalized;
					
					// fragSubBonds.setBondQueryFeature(bondIndexNew, Molecule.cBondQFDelocalized, true);
				}
				
				// int bondIndexNew = fragSubBonds.addBond(indexAtomNew1, indexAtomNew2, type);
				fragSubBonds.addBond(indexAtomNew1, indexAtomNew2, type);
				

			}
		}
		
		fragSubBonds.ensureHelperArrays(Molecule.cHelperRings);

		return fragSubBonds;
	}
	

	private List getAllPlusOneAtomCombinations(IntVec iv, StereoMolecule frag){
		
		int bonds = frag.getBonds();
		
		List liIntVec = new ArrayList();
		
		
		HashSet hsAtomIndex = new HashSet();
		
		for (int i = 0; i < bonds; i++) {
			
			if(iv.isBitSet(i)){
				
				int indexAt1 = frag.getBondAtom(0, i);
				
				int indexAt2 = frag.getBondAtom(1, i);
				
				hsAtomIndex.add(indexAt1);
				hsAtomIndex.add(indexAt2);
			}
		}
		
		for (int i = 0; i < bonds; i++) {
			if(!iv.isBitSet(i)){
				int indexAt1 = frag.getBondAtom(0, i);
				
				int indexAt2 = frag.getBondAtom(1, i);
				
				if(hsAtomIndex.contains(indexAt1) || hsAtomIndex.contains(indexAt2)) {
					IntVec ivPlus = new IntVec(iv.get());
					
					setBitAndAddRelatedRingBonds(i, ivPlus);
					
					liIntVec.add(ivPlus);
				}
			}
		}
		
		return liIntVec;
	}

	/**
	 * Removes all sub-solutions from hsIndexFragCandidates.
	 * @param ivSolution
	 */
	private void removeAllSubSolutions(IntVec ivSolution){
		
		List liIndexFragSolution = new ArrayList(hsIndexFragCandidates);
		
		for (IntVec ivCandidate : liIndexFragSolution) {
			if(isCandidateInSolution(ivSolution, ivCandidate)) {
				
				hsIndexFragCandidates.remove(ivCandidate);
			}
		}
	}
	
	/**
	 * All sub solutions are removed.
	 * @param hsIndexFragSolution
	 * @return
	 */
	private static List getFinalSolutionSet(HashSet hsIndexFragSolution ){
		
		List liIndexFragSolution = new ArrayList(hsIndexFragSolution);
		
		for (int i = liIndexFragSolution.size()-1; i >= 0; i--) {
			IntVec ivCandidate = liIndexFragSolution.get(i);
			
			for (int j = 0; j < liIndexFragSolution.size(); j++) {
				IntVec ivSolution = liIndexFragSolution.get(j);
				
				if(i!=j){
					if(isCandidateInSolution(ivSolution, ivCandidate)) {
						liIndexFragSolution.remove(i);
						break;
					}
				}
			}
		}
		
		return liIndexFragSolution;
	}
	
	private static final boolean isCandidateInSolution(IntVec ivSolution, IntVec ivCandidate){
		
		IntVec iv = IntVec.OR(ivSolution, ivCandidate);
		
		if(iv.equals(ivSolution)){
			return true;
		}
		
		return false;
	}
	
	private static class ComparatorBitsSet implements  Comparator {
		
		public int compare(IntVec iv1, IntVec iv2) {
			
			int bits1 = iv1.getBitsSet();
			
			int bits2 = iv2.getBitsSet();
			
			if(bits1>bits2){
				return 1;
			} else if(bits1




© 2015 - 2024 Weber Informatics LLC | Privacy Policy