All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openscience.cdk.isomorphism.mcss.RGraph Maven / Gradle / Ivy

There is a newer version: 2.9
Show newest version
/* Copyright (C) 2002-2007  Stephane Werner 
 *
 * This code has been kindly provided by Stephane Werner
 * and Thierry Hanser from IXELIS [email protected].
 *
 * IXELIS sarl - Semantic Information Systems
 *               17 rue des C?dres 67200 Strasbourg, France
 *               Tel/Fax : +33(0)3 88 27 81 39 Email: [email protected]
 *
 * CDK Contact: [email protected]
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 */
package org.openscience.cdk.isomorphism.mcss;

import java.util.ArrayList;
import java.util.BitSet;
import java.util.Iterator;
import java.util.List;

/**
  * This class implements the Resolution Graph (RGraph).
  * The RGraph is a graph based representation of the search problem.
  * An RGraph is constructed from the two compared graphs (G1 and G2).
  * Each vertex (node) in the RGraph represents a possible association
  * from an edge in G1 with an edge in G2. Thus two compatible bonds
  * in two molecular graphs are represented by a vertex in the RGraph.
  * Each edge in the RGraph corresponds to a common adjacency relationship
  * between the 2 couple of compatible edges associated to the 2 RGraph nodes
  * forming this edge.
  *
  * 

Example: *

  *    G1 : C-C=O  and G2 : C-C-C=0
  *         1 2 3           1 2 3 4
  * 
* *

The resulting RGraph(G1,G2) will contain 3 nodes: *

    *
  • Node A : association between bond C-C : 1-2 in G1 and 1-2 in G2 *
  • Node B : association between bond C-C : 1-2 in G1 and 2-3 in G2 *
  • Node C : association between bond C=0 : 2-3 in G1 and 3-4 in G2 *
* The RGraph will also contain one edge representing the * adjacency between node B and C that is : bonds 1-2 and 2-3 in G1 * and bonds 2-3 and 3-4 in G2. * *

Once the RGraph has been built from the two compared graphs * it becomes a very interesting tool to perform all kinds of * structural search (isomorphism, substructure search, maximal common * substructure,....). * *

The search may be constrained by mandatory elements (e.g. bonds that * have to be present in the mapped common substructures). * *

Performing a query on an RGraph requires simply to set the constrains * (if any) and to invoke the parsing method (parse()) * *

The RGraph has been designed to be a generic tool. It may be constructed * from any kind of source graphs, thus it is not restricted to a chemical * context. * *

The RGraph model is independent from the CDK model and the link between * both model is performed by the RTools class. In this way the RGraph * class may be reused in other graph context (conceptual graphs,....) * *

Important note: This implementation of the algorithm has not been * optimized for speed at this stage. It has been * written with the goal to clearly retrace the * principle of the underlined search method. There is * room for optimization in many ways including the * the algorithm itself. * *

This algorithm derives from the algorithm described in * {@cdk.cite HAN90} and modified in the thesis of T. Hanser {@cdk.cite HAN93}. * * @author Stephane Werner from IXELIS [email protected] * @cdk.created 2002-07-17 * @cdk.require java1.4+ * @cdk.module standard * @cdk.githash */ public class RGraph { // an RGraph is a list of RGraph nodes // each node keeping track of its // neighbors. List graph = null; // maximal number of iterations before // search break int maxIteration = -1; // dimensions of the compared graphs int firstGraphSize = 0; int secondGraphSize = 0; // constrains BitSet c1 = null; BitSet c2 = null; // current solution list List solutionList = null; // flag to define if we want to get all possible 'mappings' boolean findAllMap = false; // flag to define if we want to get all possible 'structures' boolean findAllStructure = true; // working variables boolean stop = false; int nbIteration = 0; BitSet graphBitSet = null; private long timeout = -1; private long start; /** * Constructor for the RGraph object and creates an empty RGraph. */ public RGraph() { graph = new ArrayList(); solutionList = new ArrayList(); graphBitSet = new BitSet(); } /** * Returns the size of the first of the two * compared graphs. * @return The size of the first of the two compared graphs */ public int getFirstGraphSize() { return firstGraphSize; } /** * Returns the size of the second of the two * compared graphs. * @return The size of the second of the two compared graphs */ public int getSecondGraphSize() { return secondGraphSize; } /** * Sets the size of the first of the two * compared graphs. * @param n1 The size of the second of the two compared graphs */ public void setFirstGraphSize(int n1) { firstGraphSize = n1; } /** * Returns the size of the second of the two * compared graphs. * @param n2 The size of the second of the two compared graphs */ public void setSecondGraphSize(int n2) { secondGraphSize = n2; } /** * Reinitialisation of the TGraph. */ public void clear() { graph.clear(); graphBitSet.clear(); } /** * Returns the graph object of this RGraph. * @return The graph object, a list */ public List getGraph() { return this.graph; } /** * Adds a new node to the RGraph. * @param newNode The node to add to the graph */ public void addNode(RNode newNode) { graph.add(newNode); graphBitSet.set(graph.size() - 1); } /** * Parsing of the RGraph. This is the main method * to perform a query. Given the constrains c1 and c2 * defining mandatory elements in G1 and G2 and given * the search options, this method builds an initial set * of starting nodes (B) and parses recursively the * RGraph to find a list of solution according to * these parameters. * * @param c1 constrain on the graph G1 * @param c2 constrain on the graph G2 * @param findAllStructure true if we want all results to be generated * @param findAllMap true is we want all possible 'mappings' */ public void parse(BitSet c1, BitSet c2, boolean findAllStructure, boolean findAllMap) { // initialize the list of solution solutionList.clear(); // builds the set of starting nodes // according to the constrains BitSet b = buildB(c1, c2); // setup options setAllStructure(findAllStructure); setAllMap(findAllMap); // parse recursively the RGraph parseRec(new BitSet(b.size()), b, new BitSet(b.size())); } /** * Parsing of the RGraph. This is the recursive method * to perform a query. The method will recursively * parse the RGraph thru connected nodes and visiting the * RGraph using allowed adjacency relationship. * * @param traversed node already parsed * @param extension possible extension node (allowed neighbors) * @param forbiden node forbidden (set of node incompatible with the current solution) */ private void parseRec(BitSet traversed, BitSet extension, BitSet forbidden) { BitSet newTraversed = null; BitSet newExtension = null; BitSet newForbidden = null; BitSet potentialNode = null; // Test whether the timeout is reached. Stop searching. if (this.timeout > -1 && (System.currentTimeMillis() - this.start) > this.timeout) { stop = true; } // if there is no more extension possible we // have reached a potential new solution if (extension.isEmpty()) { solution(traversed); } // carry on with each possible extension else { // calculates the set of nodes that may still // be reached at this stage (not forbidden) potentialNode = ((BitSet) graphBitSet.clone()); potentialNode.andNot(forbidden); potentialNode.or(traversed); // checks if we must continue the search // according to the potential node set if (mustContinue(potentialNode)) { // carry on research and update iteration count nbIteration++; // for each node in the set of possible extension (neighbors of // the current partial solution, include the node to the solution // and parse recursively the RGraph with the new context. for (int x = extension.nextSetBit(0); x >= 0 && !stop; x = extension.nextSetBit(x + 1)) { // evaluates the new set of forbidden nodes // by including the nodes not compatible with the // newly accepted node. newForbidden = (BitSet) forbidden.clone(); newForbidden.or(((RNode) graph.get(x)).forbidden); // if it is the first time we are here then // traversed is empty and we initialize the set of // possible extensions to the extension of the first // accepted node in the solution. if (traversed.isEmpty()) { newExtension = (BitSet) (((RNode) graph.get(x)).extension.clone()); } // else we simply update the set of solution by // including the neighbors of the newly accepted node else { newExtension = (BitSet) extension.clone(); newExtension.or(((RNode) graph.get(x)).extension); } // extension my not contain forbidden nodes newExtension.andNot(newForbidden); // create the new set of traversed node // (update current partial solution) // and add x to the set of forbidden node // (a node may only appear once in a solution) newTraversed = (BitSet) traversed.clone(); newTraversed.set(x); forbidden.set(x); // parse recursively the RGraph parseRec(newTraversed, newExtension, newForbidden); } } } } /** * Checks if a potential solution is a real one * (not included in a previous solution) * and add this solution to the solution list * in case of success. * * @param traversed new potential solution */ private void solution(BitSet traversed) { boolean included = false; BitSet projG1 = projectG1(traversed); BitSet projG2 = projectG2(traversed); // the solution must follows the search constrains // (must contain the mandatory elements in G1 an G2) if (isContainedIn(c1, projG1) && isContainedIn(c2, projG2)) { // the solution should not be included in a previous solution // at the RGraph level. So we check against all previous solution // On the other hand if a previous solution is included in the // new one, the previous solution is removed. for (Iterator i = solutionList.listIterator(); i.hasNext() && !included;) { BitSet sol = i.next(); if (!sol.equals(traversed)) { // if we asked to save all 'mappings' then keep this mapping if (findAllMap && (projG1.equals(projectG1(sol)) || projG2.equals(projectG2(sol)))) { // do nothing } // if the new solution is included mark it as included else if (isContainedIn(projG1, projectG1(sol)) || isContainedIn(projG2, projectG2(sol))) { included = true; } // if the previous solution is contained in the new one, remove the previous solution else if (isContainedIn(projectG1(sol), projG1) || isContainedIn(projectG2(sol), projG2)) { i.remove(); } } else { // solution already exists included = true; } } if (included == false) { // if it is really a new solution add it to the // list of current solution solutionList.add(traversed); } if (!findAllStructure) { // if we need only one solution // stop the search process // (e.g. substructure search) stop = true; } } } /** * Determine if there are potential solution remaining. * @param potentialNode set of remaining potential nodes * @return true if it is worse to continue the search */ private boolean mustContinue(BitSet potentialNode) { boolean result = true; boolean cancel = false; BitSet projG1 = projectG1(potentialNode); BitSet projG2 = projectG2(potentialNode); // if we reached the maximum number of // search iterations than do not continue if (maxIteration != -1 && nbIteration >= maxIteration) { return false; } // if constrains may no more be fulfilled then stop. if (!isContainedIn(c1, projG1) || !isContainedIn(c2, projG2)) { return false; } // check if the solution potential is not included in an already // existing solution for (Iterator i = solutionList.iterator(); i.hasNext() && !cancel;) { BitSet sol = i.next(); // if we want every 'mappings' do not stop if (findAllMap && (projG1.equals(projectG1(sol)) || projG2.equals(projectG2(sol)))) { // do nothing } // if it is not possible to do better than an already existing solution than stop. else if (isContainedIn(projG1, projectG1(sol)) || isContainedIn(projG2, projectG2(sol))) { result = false; cancel = true; } } return result; } /** * Builds the initial extension set. This is the * set of node that may be used as seed for the * RGraph parsing. This set depends on the constrains * defined by the user. * @param c1 constraint in the graph G1 * @param c2 constraint in the graph G2 * @return the new extension set */ private BitSet buildB(BitSet c1, BitSet c2) { this.c1 = c1; this.c2 = c2; BitSet bs = new BitSet(); // only nodes that fulfill the initial constrains // are allowed in the initial extension set : B for (Iterator i = graph.iterator(); i.hasNext();) { RNode rn = i.next(); if ((c1.get(rn.rMap.id1) || c1.isEmpty()) && (c2.get(rn.rMap.id2) || c2.isEmpty())) { bs.set(graph.indexOf(rn)); } } return bs; } /** * Returns the list of solutions. * * @return The solution list */ public List getSolutions() { return solutionList; } /** * Converts a RGraph bitset (set of RNode) * to a list of RMap that represents the * mapping between to substructures in G1 and G2 * (the projection of the RGraph bitset on G1 * and G2). * * @param set the BitSet * @return the RMap list */ public List bitSetToRMap(BitSet set) { List rMapList = new ArrayList(); for (int x = set.nextSetBit(0); x >= 0; x = set.nextSetBit(x + 1)) { RNode xNode = graph.get(x); rMapList.add(xNode.rMap); } return rMapList; } /** * Sets the 'AllStructres' option. If true * all possible solutions will be generated. If false * the search will stop as soon as a solution is found. * (e.g. when we just want to know if a G2 is * a substructure of G1 or not). * * @param findAllStructure */ public void setAllStructure(boolean findAllStructure) { this.findAllStructure = findAllStructure; } /** * Sets the 'finAllMap' option. If true * all possible 'mappings' will be generated. If false * the search will keep only one 'mapping' per structure * association. * * @param findAllMap */ public void setAllMap(boolean findAllMap) { this.findAllMap = findAllMap; } /** * Sets the maxIteration for the RGraph parsing. If set to -1, * then no iteration maximum is taken into account. * * @param it The new maxIteration value */ public void setMaxIteration(int it) { this.maxIteration = it; } /** * Returns a string representation of the RGraph. * @return the string representation of the RGraph */ @Override public String toString() { String message = ""; int j = 0; for (Iterator i = graph.iterator(); i.hasNext();) { RNode rn = i.next(); message += "-------------\n" + "RNode " + j + "\n" + rn.toString() + "\n"; j++; } return message; } ///////////////////////////////// // BitSet tools /** * Projects a RGraph bitset on the source graph G1. * @param set RGraph BitSet to project * @return The associate BitSet in G1 */ public BitSet projectG1(BitSet set) { BitSet projection = new BitSet(firstGraphSize); RNode xNode = null; for (int x = set.nextSetBit(0); x >= 0; x = set.nextSetBit(x + 1)) { xNode = (RNode) graph.get(x); projection.set(xNode.rMap.id1); } return projection; } /** * Projects a RGraph bitset on the source graph G2. * @param set RGraph BitSet to project * @return The associate BitSet in G2 */ public BitSet projectG2(BitSet set) { BitSet projection = new BitSet(secondGraphSize); RNode xNode = null; for (int x = set.nextSetBit(0); x >= 0; x = set.nextSetBit(x + 1)) { xNode = (RNode) graph.get(x); projection.set(xNode.rMap.id2); } return projection; } /** * Test if set A is contained in set B. * @param A a bitSet * @param B a bitSet * @return true if A is contained in B */ private boolean isContainedIn(BitSet A, BitSet B) { boolean result = false; if (A.isEmpty()) { return true; } BitSet setA = (BitSet) A.clone(); setA.and(B); if (setA.equals(A)) { result = true; } return result; } /** * Sets the time in milliseconds until the substructure search will be breaked. * @param timeout * Time in milliseconds. -1 to ignore the timeout. */ public void setTimeout(long timeout) { this.timeout = timeout; } /** * @param start * The start time in milliseconds. */ public void setStart(long start) { this.start = start; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy