org.openscience.cdk.isomorphism.mcss.RGraph Maven / Gradle / Ivy
/* Copyright (C) 2002-2007 Stephane Werner
*
* This code has been kindly provided by Stephane Werner
* and Thierry Hanser from IXELIS [email protected].
*
* IXELIS sarl - Semantic Information Systems
* 17 rue des C?dres 67200 Strasbourg, France
* Tel/Fax : +33(0)3 88 27 81 39 Email: [email protected]
*
* CDK Contact: [email protected]
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.openscience.cdk.isomorphism.mcss;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Iterator;
import java.util.List;
/**
* This class implements the Resolution Graph (RGraph).
* The RGraph is a graph based representation of the search problem.
* An RGraph is constructed from the two compared graphs (G1 and G2).
* Each vertex (node) in the RGraph represents a possible association
* from an edge in G1 with an edge in G2. Thus two compatible bonds
* in two molecular graphs are represented by a vertex in the RGraph.
* Each edge in the RGraph corresponds to a common adjacency relationship
* between the 2 couple of compatible edges associated to the 2 RGraph nodes
* forming this edge.
*
* Example:
*
* G1 : C-C=O and G2 : C-C-C=0
* 1 2 3 1 2 3 4
*
*
* The resulting RGraph(G1,G2) will contain 3 nodes:
*
* - Node A : association between bond C-C : 1-2 in G1 and 1-2 in G2
*
- Node B : association between bond C-C : 1-2 in G1 and 2-3 in G2
*
- Node C : association between bond C=0 : 2-3 in G1 and 3-4 in G2
*
* The RGraph will also contain one edge representing the
* adjacency between node B and C that is : bonds 1-2 and 2-3 in G1
* and bonds 2-3 and 3-4 in G2.
*
* Once the RGraph has been built from the two compared graphs
* it becomes a very interesting tool to perform all kinds of
* structural search (isomorphism, substructure search, maximal common
* substructure,....).
*
*
The search may be constrained by mandatory elements (e.g. bonds that
* have to be present in the mapped common substructures).
*
*
Performing a query on an RGraph requires simply to set the constrains
* (if any) and to invoke the parsing method (parse())
*
*
The RGraph has been designed to be a generic tool. It may be constructed
* from any kind of source graphs, thus it is not restricted to a chemical
* context.
*
*
The RGraph model is independent from the CDK model and the link between
* both model is performed by the RTools class. In this way the RGraph
* class may be reused in other graph context (conceptual graphs,....)
*
*
Important note: This implementation of the algorithm has not been
* optimized for speed at this stage. It has been
* written with the goal to clearly retrace the
* principle of the underlined search method. There is
* room for optimization in many ways including the
* the algorithm itself.
*
*
This algorithm derives from the algorithm described in
* {@cdk.cite HAN90} and modified in the thesis of T. Hanser {@cdk.cite HAN93}.
*
* @author Stephane Werner from IXELIS [email protected]
* @cdk.created 2002-07-17
* @cdk.require java1.4+
* @cdk.module standard
* @cdk.githash
*/
public class RGraph {
// an RGraph is a list of RGraph nodes
// each node keeping track of its
// neighbors.
List graph = null;
// maximal number of iterations before
// search break
int maxIteration = -1;
// dimensions of the compared graphs
int firstGraphSize = 0;
int secondGraphSize = 0;
// constrains
BitSet c1 = null;
BitSet c2 = null;
// current solution list
List solutionList = null;
// flag to define if we want to get all possible 'mappings'
boolean findAllMap = false;
// flag to define if we want to get all possible 'structures'
boolean findAllStructure = true;
// working variables
boolean stop = false;
int nbIteration = 0;
BitSet graphBitSet = null;
private long timeout = -1;
private long start;
/**
* Constructor for the RGraph object and creates an empty RGraph.
*/
public RGraph() {
graph = new ArrayList();
solutionList = new ArrayList();
graphBitSet = new BitSet();
}
/**
* Returns the size of the first of the two
* compared graphs.
* @return The size of the first of the two compared graphs
*/
public int getFirstGraphSize() {
return firstGraphSize;
}
/**
* Returns the size of the second of the two
* compared graphs.
* @return The size of the second of the two compared graphs
*/
public int getSecondGraphSize() {
return secondGraphSize;
}
/**
* Sets the size of the first of the two
* compared graphs.
* @param n1 The size of the second of the two compared graphs
*/
public void setFirstGraphSize(int n1) {
firstGraphSize = n1;
}
/**
* Returns the size of the second of the two
* compared graphs.
* @param n2 The size of the second of the two compared graphs
*/
public void setSecondGraphSize(int n2) {
secondGraphSize = n2;
}
/**
* Reinitialisation of the TGraph.
*/
public void clear() {
graph.clear();
graphBitSet.clear();
}
/**
* Returns the graph object of this RGraph.
* @return The graph object, a list
*/
public List getGraph() {
return this.graph;
}
/**
* Adds a new node to the RGraph.
* @param newNode The node to add to the graph
*/
public void addNode(RNode newNode) {
graph.add(newNode);
graphBitSet.set(graph.size() - 1);
}
/**
* Parsing of the RGraph. This is the main method
* to perform a query. Given the constrains c1 and c2
* defining mandatory elements in G1 and G2 and given
* the search options, this method builds an initial set
* of starting nodes (B) and parses recursively the
* RGraph to find a list of solution according to
* these parameters.
*
* @param c1 constrain on the graph G1
* @param c2 constrain on the graph G2
* @param findAllStructure true if we want all results to be generated
* @param findAllMap true is we want all possible 'mappings'
*/
public void parse(BitSet c1, BitSet c2, boolean findAllStructure, boolean findAllMap) {
// initialize the list of solution
solutionList.clear();
// builds the set of starting nodes
// according to the constrains
BitSet b = buildB(c1, c2);
// setup options
setAllStructure(findAllStructure);
setAllMap(findAllMap);
// parse recursively the RGraph
parseRec(new BitSet(b.size()), b, new BitSet(b.size()));
}
/**
* Parsing of the RGraph. This is the recursive method
* to perform a query. The method will recursively
* parse the RGraph thru connected nodes and visiting the
* RGraph using allowed adjacency relationship.
*
* @param traversed node already parsed
* @param extension possible extension node (allowed neighbors)
* @param forbiden node forbidden (set of node incompatible with the current solution)
*/
private void parseRec(BitSet traversed, BitSet extension, BitSet forbidden) {
BitSet newTraversed = null;
BitSet newExtension = null;
BitSet newForbidden = null;
BitSet potentialNode = null;
// Test whether the timeout is reached. Stop searching.
if (this.timeout > -1 && (System.currentTimeMillis() - this.start) > this.timeout) {
stop = true;
}
// if there is no more extension possible we
// have reached a potential new solution
if (extension.isEmpty()) {
solution(traversed);
}
// carry on with each possible extension
else {
// calculates the set of nodes that may still
// be reached at this stage (not forbidden)
potentialNode = ((BitSet) graphBitSet.clone());
potentialNode.andNot(forbidden);
potentialNode.or(traversed);
// checks if we must continue the search
// according to the potential node set
if (mustContinue(potentialNode)) {
// carry on research and update iteration count
nbIteration++;
// for each node in the set of possible extension (neighbors of
// the current partial solution, include the node to the solution
// and parse recursively the RGraph with the new context.
for (int x = extension.nextSetBit(0); x >= 0 && !stop; x = extension.nextSetBit(x + 1)) {
// evaluates the new set of forbidden nodes
// by including the nodes not compatible with the
// newly accepted node.
newForbidden = (BitSet) forbidden.clone();
newForbidden.or(((RNode) graph.get(x)).forbidden);
// if it is the first time we are here then
// traversed is empty and we initialize the set of
// possible extensions to the extension of the first
// accepted node in the solution.
if (traversed.isEmpty()) {
newExtension = (BitSet) (((RNode) graph.get(x)).extension.clone());
}
// else we simply update the set of solution by
// including the neighbors of the newly accepted node
else {
newExtension = (BitSet) extension.clone();
newExtension.or(((RNode) graph.get(x)).extension);
}
// extension my not contain forbidden nodes
newExtension.andNot(newForbidden);
// create the new set of traversed node
// (update current partial solution)
// and add x to the set of forbidden node
// (a node may only appear once in a solution)
newTraversed = (BitSet) traversed.clone();
newTraversed.set(x);
forbidden.set(x);
// parse recursively the RGraph
parseRec(newTraversed, newExtension, newForbidden);
}
}
}
}
/**
* Checks if a potential solution is a real one
* (not included in a previous solution)
* and add this solution to the solution list
* in case of success.
*
* @param traversed new potential solution
*/
private void solution(BitSet traversed) {
boolean included = false;
BitSet projG1 = projectG1(traversed);
BitSet projG2 = projectG2(traversed);
// the solution must follows the search constrains
// (must contain the mandatory elements in G1 an G2)
if (isContainedIn(c1, projG1) && isContainedIn(c2, projG2)) {
// the solution should not be included in a previous solution
// at the RGraph level. So we check against all previous solution
// On the other hand if a previous solution is included in the
// new one, the previous solution is removed.
for (Iterator i = solutionList.listIterator(); i.hasNext() && !included;) {
BitSet sol = i.next();
if (!sol.equals(traversed)) {
// if we asked to save all 'mappings' then keep this mapping
if (findAllMap && (projG1.equals(projectG1(sol)) || projG2.equals(projectG2(sol)))) {
// do nothing
}
// if the new solution is included mark it as included
else if (isContainedIn(projG1, projectG1(sol)) || isContainedIn(projG2, projectG2(sol))) {
included = true;
}
// if the previous solution is contained in the new one, remove the previous solution
else if (isContainedIn(projectG1(sol), projG1) || isContainedIn(projectG2(sol), projG2)) {
i.remove();
}
} else {
// solution already exists
included = true;
}
}
if (included == false) {
// if it is really a new solution add it to the
// list of current solution
solutionList.add(traversed);
}
if (!findAllStructure) {
// if we need only one solution
// stop the search process
// (e.g. substructure search)
stop = true;
}
}
}
/**
* Determine if there are potential solution remaining.
* @param potentialNode set of remaining potential nodes
* @return true if it is worse to continue the search
*/
private boolean mustContinue(BitSet potentialNode) {
boolean result = true;
boolean cancel = false;
BitSet projG1 = projectG1(potentialNode);
BitSet projG2 = projectG2(potentialNode);
// if we reached the maximum number of
// search iterations than do not continue
if (maxIteration != -1 && nbIteration >= maxIteration) {
return false;
}
// if constrains may no more be fulfilled then stop.
if (!isContainedIn(c1, projG1) || !isContainedIn(c2, projG2)) {
return false;
}
// check if the solution potential is not included in an already
// existing solution
for (Iterator i = solutionList.iterator(); i.hasNext() && !cancel;) {
BitSet sol = i.next();
// if we want every 'mappings' do not stop
if (findAllMap && (projG1.equals(projectG1(sol)) || projG2.equals(projectG2(sol)))) {
// do nothing
}
// if it is not possible to do better than an already existing solution than stop.
else if (isContainedIn(projG1, projectG1(sol)) || isContainedIn(projG2, projectG2(sol))) {
result = false;
cancel = true;
}
}
return result;
}
/**
* Builds the initial extension set. This is the
* set of node that may be used as seed for the
* RGraph parsing. This set depends on the constrains
* defined by the user.
* @param c1 constraint in the graph G1
* @param c2 constraint in the graph G2
* @return the new extension set
*/
private BitSet buildB(BitSet c1, BitSet c2) {
this.c1 = c1;
this.c2 = c2;
BitSet bs = new BitSet();
// only nodes that fulfill the initial constrains
// are allowed in the initial extension set : B
for (Iterator i = graph.iterator(); i.hasNext();) {
RNode rn = i.next();
if ((c1.get(rn.rMap.id1) || c1.isEmpty()) && (c2.get(rn.rMap.id2) || c2.isEmpty())) {
bs.set(graph.indexOf(rn));
}
}
return bs;
}
/**
* Returns the list of solutions.
*
* @return The solution list
*/
public List getSolutions() {
return solutionList;
}
/**
* Converts a RGraph bitset (set of RNode)
* to a list of RMap that represents the
* mapping between to substructures in G1 and G2
* (the projection of the RGraph bitset on G1
* and G2).
*
* @param set the BitSet
* @return the RMap list
*/
public List bitSetToRMap(BitSet set) {
List rMapList = new ArrayList();
for (int x = set.nextSetBit(0); x >= 0; x = set.nextSetBit(x + 1)) {
RNode xNode = graph.get(x);
rMapList.add(xNode.rMap);
}
return rMapList;
}
/**
* Sets the 'AllStructres' option. If true
* all possible solutions will be generated. If false
* the search will stop as soon as a solution is found.
* (e.g. when we just want to know if a G2 is
* a substructure of G1 or not).
*
* @param findAllStructure
*/
public void setAllStructure(boolean findAllStructure) {
this.findAllStructure = findAllStructure;
}
/**
* Sets the 'finAllMap' option. If true
* all possible 'mappings' will be generated. If false
* the search will keep only one 'mapping' per structure
* association.
*
* @param findAllMap
*/
public void setAllMap(boolean findAllMap) {
this.findAllMap = findAllMap;
}
/**
* Sets the maxIteration for the RGraph parsing. If set to -1,
* then no iteration maximum is taken into account.
*
* @param it The new maxIteration value
*/
public void setMaxIteration(int it) {
this.maxIteration = it;
}
/**
* Returns a string representation of the RGraph.
* @return the string representation of the RGraph
*/
@Override
public String toString() {
String message = "";
int j = 0;
for (Iterator i = graph.iterator(); i.hasNext();) {
RNode rn = i.next();
message += "-------------\n" + "RNode " + j + "\n" + rn.toString() + "\n";
j++;
}
return message;
}
/////////////////////////////////
// BitSet tools
/**
* Projects a RGraph bitset on the source graph G1.
* @param set RGraph BitSet to project
* @return The associate BitSet in G1
*/
public BitSet projectG1(BitSet set) {
BitSet projection = new BitSet(firstGraphSize);
RNode xNode = null;
for (int x = set.nextSetBit(0); x >= 0; x = set.nextSetBit(x + 1)) {
xNode = (RNode) graph.get(x);
projection.set(xNode.rMap.id1);
}
return projection;
}
/**
* Projects a RGraph bitset on the source graph G2.
* @param set RGraph BitSet to project
* @return The associate BitSet in G2
*/
public BitSet projectG2(BitSet set) {
BitSet projection = new BitSet(secondGraphSize);
RNode xNode = null;
for (int x = set.nextSetBit(0); x >= 0; x = set.nextSetBit(x + 1)) {
xNode = (RNode) graph.get(x);
projection.set(xNode.rMap.id2);
}
return projection;
}
/**
* Test if set A is contained in set B.
* @param A a bitSet
* @param B a bitSet
* @return true if A is contained in B
*/
private boolean isContainedIn(BitSet A, BitSet B) {
boolean result = false;
if (A.isEmpty()) {
return true;
}
BitSet setA = (BitSet) A.clone();
setA.and(B);
if (setA.equals(A)) {
result = true;
}
return result;
}
/**
* Sets the time in milliseconds until the substructure search will be breaked.
* @param timeout
* Time in milliseconds. -1 to ignore the timeout.
*/
public void setTimeout(long timeout) {
this.timeout = timeout;
}
/**
* @param start
* The start time in milliseconds.
*/
public void setStart(long start) {
this.start = start;
}
}