org.openscience.cdk.hash.MinimumEquivalentCyclicSet Maven / Gradle / Ivy
/*
* Copyright (c) 2013 John May
*
* Contact: [email protected]
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
* All we ask is that proper credit is given for our work, which includes
* - but is not limited to - adding the above copyright notice to the beginning
* of your source code files, and to any copyright notice that you may distribute
* with programs based on this work.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U
*/
package org.openscience.cdk.hash;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.ringsearch.RingSearch;
import java.util.Collections;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
/**
* Finds the smallest set of equivalent values are members of a ring. If there
* are multiple smallest sets then the set with the lowest invariant value is
* returned. This class is intended to drive the systematic perturbation of the
* {@link org.openscience.cdk.hash.PerturbedAtomHashGenerator}.
*
* This method will not distinguish all possible molecules but represents a good
* enough approximation to quickly narrow down an identity search. At the time
* of writing (Feb, 2013) there are only 128 molecules (64 false positives) in
* PubChem-Compound (46E6 molecules) which are not separated. In many data sets
* this method will suffice however the exact {@link AllEquivalentCyclicSet} is
* provided.
*
* This method is currently the default used by the {@link
* org.openscience.cdk.hash.HashGeneratorMaker} but can also be explicitly
* specified.
*
* MoleculeHashGenerator generator =
* new HashGeneratorMaker().depth(6)
* .elemental()
* .perturbed() // use this class by default
* .molecular();
*
* // explicitly specify the method
* MoleculeHashGenerator generator =
* new HashGeneratorMaker().depth(6)
* .elemental()
* .perturbWith(new MinimumEquivalentCyclicSet())
* .molecular();
*
*
*
* @author John May
* @cdk.module hash
* @see org.openscience.cdk.hash.PerturbedAtomHashGenerator
* @see MinimumEquivalentCyclicSetUnion
* @see AllEquivalentCyclicSet
* @see org.openscience.cdk.hash.HashGeneratorMaker
* @cdk.githash
*/
final class MinimumEquivalentCyclicSet extends EquivalentSetFinder {
/**
* @inheritDoc
*/
@Override
Set find(long[] invariants, IAtomContainer container, int[][] graph) {
int n = invariants.length;
// find cyclic vertices using DFS
RingSearch ringSearch = new RingSearch(container, graph);
// ordered map of the set of vertices for each value
Map> equivalent = new TreeMap>();
// divide the invariants into equivalent indexed and ordered sets
for (int i = 0; i < invariants.length; i++) {
Long invariant = invariants[i];
Set set = equivalent.get(invariant);
if (set == null) {
if (ringSearch.cyclic(i)) {
set = new HashSet(n / 2);
set.add(i);
equivalent.put(invariant, set);
}
} else {
set.add(i);
}
}
// find the smallest set of equivalent cyclic vertices
int minSize = Integer.MAX_VALUE;
Set min = Collections.emptySet();
for (Map.Entry> e : equivalent.entrySet()) {
Set vertices = e.getValue();
if (vertices.size() < minSize && vertices.size() > 1) {
min = vertices;
minSize = vertices.size();
}
}
return min;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy