com.actelion.research.chem.mcs.MCS Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of openchemlib Show documentation
Show all versions of openchemlib Show documentation
Open Source Chemistry Library
package com.actelion.research.chem.mcs;
import com.actelion.research.chem.*;
import com.actelion.research.util.datamodel.IntVec;
import java.util.*;
/*
* Copyright (c) 1997 - 2016
* Actelion Pharmaceuticals Ltd.
* Gewerbestrasse 16
* CH-4123 Allschwil, Switzerland
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
* 3. Neither the name of the the copyright holder nor the
* names of its contributors may be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
public class MCS {
public static final int PAR_CLEAVE_RINGS = 0;
public static final int PAR_KEEP_RINGS = 1;
public static final int PAR_KEEP_AROMATIC_RINGS = 2;
private static final boolean DEBUG = false;
private static final int CAPACITY = (60 + 60) * 10;
private StereoMolecule mol;
private StereoMolecule frag;
private HashSet hsIndexFragCandidates;
private HashSet hsIndexFragGarbage;
private HashSet hsIndexFragSolution;
private SSSearcher sss;
private ComparatorBitsSet comparatorBitsSet;
private StereoMolecule molMCS;
private boolean considerAromaticRings;
private boolean considerRings;
// The largest valid solutions.
private List liMCSSolutions;
private HashMap> hmRingBnd_ListRingBnds;
private HashMap> hmAromaticRingBnd_ListRingBnds;
private RingCollection ringCollection;
boolean excluded[] = null;
private int [] arrMatchListFrag2Mol;
public MCS() {
this(PAR_CLEAVE_RINGS, null);
}
public MCS(int ringStatus) {
this(ringStatus, null);
}
public MCS(int ringStatus, SSSearcher searcher) {
considerRings = false;
considerAromaticRings = false;
switch (ringStatus) {
case PAR_CLEAVE_RINGS:
break;
case PAR_KEEP_RINGS:
considerRings = true;
break;
case PAR_KEEP_AROMATIC_RINGS:
considerAromaticRings = true;
break;
default:
break;
}
hsIndexFragCandidates = new HashSet(CAPACITY);
hsIndexFragGarbage = new HashSet(CAPACITY);
hsIndexFragSolution = new HashSet(CAPACITY);
liMCSSolutions = new ArrayList(CAPACITY);
if (searcher == null)
sss = new SSSearcher();
else
sss = searcher;
comparatorBitsSet = new ComparatorBitsSet();
hmRingBnd_ListRingBnds = new HashMap>();
hmAromaticRingBnd_ListRingBnds = new HashMap>();
}
public void setSSSearcher(SSSearcher sss) {
this.sss = sss;
}
/**
* mol should contain equal or more bonds than frag.
*
* @param mol
* @param frag
*/
public void set(StereoMolecule mol, StereoMolecule frag) {
set(mol, frag, null);
}
/**
* mol should contain equal or more bonds than frag.
* If frag contains more than one molecule only the biggest one is considered.
*
* @param mol
* @param frag
* @param excluded
*/
public void set(StereoMolecule mol, StereoMolecule frag, boolean excluded[]) {
StereoMolecule fragBiggestSub = new StereoMolecule(frag);
fragBiggestSub.ensureHelperArrays(Molecule.cHelperRings);
fragBiggestSub.stripSmallFragments();
fragBiggestSub.ensureHelperArrays(Molecule.cHelperRings);
this.mol = mol;
this.frag = fragBiggestSub;
this.excluded = excluded;
init();
}
private void init() {
hsIndexFragCandidates.clear();
hsIndexFragGarbage.clear();
hsIndexFragSolution.clear();
liMCSSolutions.clear();
hmRingBnd_ListRingBnds.clear();
hmAromaticRingBnd_ListRingBnds.clear();
initCandidates();
}
private void initCandidates() {
ringCollection = frag.getRingSet();
int rings = ringCollection.getSize();
for (int i = 0; i < rings; i++) {
int[] arrIndexBnd = ringCollection.getRingBonds(i);
for (int j = 0; j < arrIndexBnd.length; j++) {
if (!hmRingBnd_ListRingBnds.containsKey(arrIndexBnd[j])) {
hmRingBnd_ListRingBnds.put(arrIndexBnd[j], new ArrayList());
}
List li = hmRingBnd_ListRingBnds.get(arrIndexBnd[j]);
li.add(arrIndexBnd);
}
if (ringCollection.isAromatic(i)) {
for (int j = 0; j < arrIndexBnd.length; j++) {
if (!hmAromaticRingBnd_ListRingBnds.containsKey(arrIndexBnd[j])) {
hmAromaticRingBnd_ListRingBnds.put(arrIndexBnd[j], new ArrayList());
}
List li = hmAromaticRingBnd_ListRingBnds.get(arrIndexBnd[j]);
li.add(arrIndexBnd);
}
}
}
// Array length for bit list.
int nInts = (int) (((double) frag.getBonds() / Integer.SIZE) + ((double) (Integer.SIZE - 1) / Integer.SIZE));
// The vector is used as bit vector.
// For each bond one bit.
for (int i = 0; i < frag.getBonds(); i++) {
IntVec iv = new IntVec(nInts);
setBitAndAddRelatedRingBonds(i, iv);
hsIndexFragCandidates.add(iv);
}
}
private void setBitAndAddRelatedRingBonds(int bit, IntVec iv) {
if (!considerAromaticRings && !considerRings) {
iv.setBit(bit);
} else if (considerRings) {
iv.setBit(bit);
if (hmRingBnd_ListRingBnds.containsKey(bit)) {
List li = hmRingBnd_ListRingBnds.get(bit);
for (int i = 0; i < li.size(); i++) {
int[] a = li.get(i);
for (int j = 0; j < a.length; j++) {
iv.setBit(a[j]);
}
}
}
} else if (considerAromaticRings) {
iv.setBit(bit);
if (hmAromaticRingBnd_ListRingBnds.containsKey(bit)) {
List li = hmAromaticRingBnd_ListRingBnds.get(bit);
for (int i = 0; i < li.size(); i++) {
int[] a = li.get(i);
for (int j = 0; j < a.length; j++) {
iv.setBit(a[j]);
}
}
}
}
iv.calculateHashCode();
}
/**
* Checks first fragment for being sub structure of molecule. If true, frag is returned.
* The returned IntVec contains the bond information of frag. The IntVec is used bit wise. Each bit corresponds to
* one bond in the fragment. The information for the matching substructure in the molecule is not contained in the
* IntVec.
*
* @return
*/
private List getAllSolutionsForCommonSubstructures() {
sss.setMolecule(mol);
frag.setFragment(true);
sss.setFragment(frag);
//
// The MCS is the complete fragment.
//
try {
if (sss.findFragmentInMolecule(SSSearcher.cCountModeOverlapping, SSSearcher.cMatchDBondToDelocalized, excluded) > 0) {
molMCS = frag;
List liIndexFragCandidates = new ArrayList(hsIndexFragCandidates);
if (liIndexFragCandidates != null && !liIndexFragCandidates.isEmpty()) {
IntVec iv = liIndexFragCandidates.get(0);
iv.setBits(0, iv.sizeBits());
iv.calculateHashCode();
List li = new ArrayList();
li.add(iv);
return li;
}
}
} catch (Exception e) {
e.printStackTrace();
}
int maxSizeCandidates = 0;
while (!hsIndexFragCandidates.isEmpty()) {
List liIndexFragCandidates = new ArrayList(hsIndexFragCandidates);
Collections.sort(liIndexFragCandidates, comparatorBitsSet);
// Get largest mcs.
IntVec iv = liIndexFragCandidates.get(liIndexFragCandidates.size() - 1);
hsIndexFragCandidates.remove(iv);
if (DEBUG) {
System.out.println("Bits set " + iv.getBitsSet());
if (iv.getBitsSet() == frag.getBonds()) {
System.out.println("Full structure in iv.");
}
}
StereoMolecule fragSub = getSubFrag(frag, iv);
sss.setFragment(fragSub);
if (sss.findFragmentInMolecule(SSSearcher.cCountModeOverlapping, SSSearcher.cDefaultMatchMode, excluded) > 0) {
hsIndexFragSolution.add(iv);
removeAllSubSolutions(iv);
if (iv.getBitsSet() != frag.getBonds()) {
List liIV = getAllPlusOneAtomCombinations(iv, frag);
for (IntVec ivPlus : liIV) {
if (DEBUG) {
if (ivPlus.getBitsSet() == frag.getBonds()) {
System.out.println("Full structure in ivPlus.");
}
}
if ((!hsIndexFragGarbage.contains(ivPlus)) && (!hsIndexFragSolution.contains(ivPlus))) {
hsIndexFragCandidates.add(ivPlus);
}
}
if (DEBUG) {
System.out.println("tsIndexFragCandidates " + hsIndexFragCandidates.size());
}
}
if (maxSizeCandidates < hsIndexFragCandidates.size()) {
maxSizeCandidates = hsIndexFragCandidates.size();
}
} else {
hsIndexFragGarbage.add(iv);
}
}
if (hsIndexFragSolution.size() == 0) {
return null;
}
return getFinalSolutionSet(hsIndexFragSolution);
}
/**
* All molecules which are sub structures of an other molecule in the list are removed.
*
* @return
*/
public LinkedList getAllCommonSubstructures() {
List liIndexFragSolution = getAllSolutionsForCommonSubstructures();
if (liIndexFragSolution == null) {
return null;
}
Collections.sort(liIndexFragSolution, comparatorBitsSet);
IntVec ivMCS = liIndexFragSolution.get(liIndexFragSolution.size() - 1);
molMCS = getSubFrag(frag, ivMCS);
List li = new ArrayList();
for (IntVec iv : liIndexFragSolution) {
li.add(getSubFrag(frag, iv));
}
return ExtendedMoleculeFunctions.removeSubStructures(li);
}
/**
* @return maximum common substructure at top of list or null of none common MCS was found.
*/
public StereoMolecule getMCS() {
List liIndexFragSolution = getAllSolutionsForCommonSubstructures();
if (liIndexFragSolution == null) {
return null;
}
Collections.sort(liIndexFragSolution, comparatorBitsSet);
IntVec ivMCS = liIndexFragSolution.get(liIndexFragSolution.size() - 1);
molMCS = getSubFrag(frag, ivMCS);
return molMCS;
}
/**
* Calculates the bond arrays for molecule and fragment for their maximum common substructure.
*
* @param arrBondMCSMol result array. Null or initialized with the length number of bonds in molecule.
* @param arrBondFrag result array. Null or initialized with the length number of bonds in fragment.
* @return two arrays, in the first array the bits are set 'true' which corresponds to the bonds for maximum common substructure in the molecule.
* In the second are the bits set 'true' which corresponds to the bonds for maximum common substructure in the fragment.
*/
public boolean[][] getMCSBondArray(boolean[] arrBondMCSMol, boolean[] arrBondFrag) {
boolean [][] arrBondMol_Result = new boolean [2][];
List liIndexFragSolution = getAllSolutionsForCommonSubstructures();
if(liIndexFragSolution==null){
return null;
}
Collections.sort(liIndexFragSolution, comparatorBitsSet);
// Get largest mcs.
IntVec ivMCSLargest = liIndexFragSolution.get(liIndexFragSolution.size()-1);
int bonds = frag.getBonds();
if(arrBondFrag == null){
arrBondFrag = new boolean [bonds];
} else {
Arrays.fill(arrBondFrag, false);
}
for (int i = 0; i < bonds; i++) {
if(ivMCSLargest.isBitSet(i)){
arrBondFrag[i]=true;
}
}
StereoMolecule fragSub = getSubFrag(frag, ivMCSLargest);
sss.setFragment(fragSub);
//
// The substructure has to be searched in the molecule because the mapping indices are not contained in the
// IntVec that is the solution from the MCS search.
arrMatchListFrag2Mol = null;
if(sss.findFragmentInMolecule(SSSearcher.cCountModeOverlapping, SSSearcher.cDefaultMatchMode,excluded)>0){
ArrayList liMatchSubFrag2Mol = sss.getMatchList();
arrMatchListFrag2Mol = getMappedMatchListFrag2Mol(fragSub, liMatchSubFrag2Mol.get(0));
int bondsMol = mol.getBonds();
if(arrBondMCSMol==null) {
arrBondMCSMol = new boolean [bondsMol];
} else {
Arrays.fill(arrBondMCSMol, false);
}
getBondArrayMolecule(arrMatchListFrag2Mol, arrBondFrag, arrBondMCSMol);
}
arrBondMol_Result[0]=arrBondMCSMol;
arrBondMol_Result[1]=arrBondFrag;
return arrBondMol_Result;
}
/**
* delivers the match list indices from getMCSBondArray(...)
* index is the Fragment atom index, value is the matched atom index in molecule.
* @return null if no match was found.
*/
public int[] getArrMatchListFrag2Mol() {
return arrMatchListFrag2Mol;
}
private int[] getMappedMatchListFrag2Mol(StereoMolecule fragSub, int[] arrMatchListSubFrag2Mol)
{
int [] arrMatchListFrag2Mol = new int [frag.getAtoms()];
// SSSearcher sss = new SSSearcher();
sss.setMol(fragSub, frag);
sss.findFragmentInMolecule(SSSearcher.cCountModeOverlapping, SSSearcher.cDefaultMatchMode, null);
ArrayList liMatchSubFrag2Mol = sss.getMatchList();
int [] arrMatchListSubFrag2Frag = liMatchSubFrag2Mol.get(0);
HashMap hmIndexSubFrag_IndexFrag = new HashMap();
for (int i = 0; i < arrMatchListSubFrag2Frag.length; i++) {
hmIndexSubFrag_IndexFrag.put(i, arrMatchListSubFrag2Frag[i]);
}
for (int i = 0; i < arrMatchListSubFrag2Mol.length; i++) {
int indexAtSubFragment = i;
int indexAtFragment = hmIndexSubFrag_IndexFrag.get(indexAtSubFragment);
arrMatchListFrag2Mol[indexAtFragment]=arrMatchListSubFrag2Mol[indexAtSubFragment];
}
return arrMatchListFrag2Mol;
}
private boolean[] getBondArrayMolecule(int[] arrMatchFragment2Mol, boolean[] arrBondMCSFrag, boolean[] arrBondMCSMol)
{
for (int i = 0; i < arrMatchFragment2Mol.length; i++) {
int indexAtFrag = i;
int indexAtMol = arrMatchFragment2Mol[i];
int nConn2Frag = frag.getConnAtoms(indexAtFrag);
for (int j = 0; j < nConn2Frag; j++) {
int indexAtFragConn = frag.getConnAtom(indexAtFrag, j);
int indexBondFrag = frag.getBond(indexAtFrag, indexAtFragConn);
if(arrBondMCSFrag[indexBondFrag]){
int indexAtMolConn = arrMatchFragment2Mol[indexAtFragConn];
int indexBondMol = mol.getBond(indexAtMol, indexAtMolConn);
if (indexBondMol > -1) {
arrBondMCSMol[indexBondMol]=true;
}
}
}
}
return arrBondMCSMol;
}
// The original procedure missed atom charge, mass, etc, which is particularly annoying,
// when providing a custom SSSearcher(), which matches these properties. TLS 11Feb2021
private static StereoMolecule getSubFrag(StereoMolecule frag, IntVec iv) {
boolean[] isFragmentAtom = new boolean[frag.getAtoms()];
int atoms = 0;
int bonds = frag.getBonds();
for (int bond=0; bond hsAtomIndex = new HashSet();
for (int i = 0; i < bonds; i++) {
if(iv.isBitSet(i)){
int indexAtom1 = frag.getBondAtom(0, i);
int indexAtom2 = frag.getBondAtom(1, i);
hsAtomIndex.add(indexAtom1);
hsAtomIndex.add(indexAtom2);
}
}
StereoMolecule fragSubBonds = new StereoMolecule(hsAtomIndex.size(), bonds);
fragSubBonds.setFragment(true);
int [] arrMapAtom = new int [frag.getAtoms()];
ArrayUtilsCalc.set(arrMapAtom, -1);
for (int indexAtom : hsAtomIndex) {
int indexAtomNew = fragSubBonds.addAtom(frag.getAtomicNo(indexAtom));
fragSubBonds.setAtomX(indexAtomNew, frag.getAtomX(indexAtom));
fragSubBonds.setAtomY(indexAtomNew, frag.getAtomY(indexAtom));
fragSubBonds.setAtomZ(indexAtomNew, frag.getAtomZ(indexAtom));
arrMapAtom[indexAtom]=indexAtomNew;
}
for (int i = 0; i < bonds; i++) {
if(iv.isBitSet(i)){
int indexAtom1 = frag.getBondAtom(0, i);
int indexAtom2 = frag.getBondAtom(1, i);
int indexAtomNew1 = arrMapAtom[indexAtom1];
int indexAtomNew2 = arrMapAtom[indexAtom2];
int type = frag.getBondType(i);
if(frag.isDelocalizedBond(i)){
type = Molecule.cBondTypeDelocalized;
// fragSubBonds.setBondQueryFeature(bondIndexNew, Molecule.cBondQFDelocalized, true);
}
// int bondIndexNew = fragSubBonds.addBond(indexAtomNew1, indexAtomNew2, type);
fragSubBonds.addBond(indexAtomNew1, indexAtomNew2, type);
}
}
fragSubBonds.ensureHelperArrays(Molecule.cHelperRings);
return fragSubBonds;
}*/
private List getAllPlusOneAtomCombinations(IntVec iv, StereoMolecule frag)
{
int bonds = frag.getBonds();
List liIntVec = new ArrayList();
HashSet hsAtomIndex = new HashSet();
for (int i = 0; i < bonds; i++) {
if(iv.isBitSet(i)){
int indexAt1 = frag.getBondAtom(0, i);
int indexAt2 = frag.getBondAtom(1, i);
hsAtomIndex.add(indexAt1);
hsAtomIndex.add(indexAt2);
}
}
for (int i = 0; i < bonds; i++) {
if(!iv.isBitSet(i)){
int indexAt1 = frag.getBondAtom(0, i);
int indexAt2 = frag.getBondAtom(1, i);
if(hsAtomIndex.contains(indexAt1) || hsAtomIndex.contains(indexAt2)) {
IntVec ivPlus = new IntVec(iv.get());
setBitAndAddRelatedRingBonds(i, ivPlus);
liIntVec.add(ivPlus);
}
}
}
return liIntVec;
}
/**
* Removes all sub-solutions from hsIndexFragCandidates.
*
* @param ivSolution
*/
private void removeAllSubSolutions(IntVec ivSolution)
{
List liIndexFragSolution = new ArrayList(hsIndexFragCandidates);
for (IntVec ivCandidate : liIndexFragSolution) {
if(isCandidateInSolution(ivSolution, ivCandidate)) {
hsIndexFragCandidates.remove(ivCandidate);
}
}
}
/**
* All sub solutions are removed.
*
* @param hsIndexFragSolution
* @return
*/
private static List getFinalSolutionSet(HashSet hsIndexFragSolution)
{
List liIndexFragSolution = new ArrayList(hsIndexFragSolution);
for (int i = liIndexFragSolution.size()-1; i >= 0; i--) {
IntVec ivCandidate = liIndexFragSolution.get(i);
for (int j = 0; j < liIndexFragSolution.size(); j++) {
IntVec ivSolution = liIndexFragSolution.get(j);
if(i!=j){
if(isCandidateInSolution(ivSolution, ivCandidate)) {
liIndexFragSolution.remove(i);
break;
}
}
}
}
return liIndexFragSolution;
}
private static final boolean isCandidateInSolution(IntVec ivSolution, IntVec ivCandidate)
{
IntVec iv = IntVec.OR(ivSolution, ivCandidate);
if(iv.equals(ivSolution)){
return true;
}
return false;
}
private static class ComparatorBitsSet implements Comparator
{
public int compare(IntVec iv1, IntVec iv2)
{
int bits1 = iv1.getBitsSet();
int bits2 = iv2.getBitsSet();
if(bits1>bits2){
return 1;
} else if(bits1