org.openscience.cdk.hash.HashGeneratorMaker Maven / Gradle / Ivy
/*
* Copyright (c) 2013 John May
*
* Contact: [email protected]
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
* All we ask is that proper credit is given for our work, which includes
* - but is not limited to - adding the above copyright notice to the beginning
* of your source code files, and to any copyright notice that you may distribute
* with programs based on this work.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U
*/
package org.openscience.cdk.hash;
import org.openscience.cdk.hash.stereo.DoubleBondElementEncoderFactory;
import org.openscience.cdk.hash.stereo.StereoEncoder;
import org.openscience.cdk.hash.stereo.GeometricCumulativeDoubleBondFactory;
import org.openscience.cdk.hash.stereo.GeometricDoubleBondEncoderFactory;
import org.openscience.cdk.hash.stereo.GeometricTetrahedralEncoderFactory;
import org.openscience.cdk.hash.stereo.StereoEncoderFactory;
import org.openscience.cdk.hash.stereo.TetrahedralElementEncoderFactory;
import org.openscience.cdk.interfaces.IAtomContainer;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.List;
/**
* Fluent API for creating hash generators. The maker is first configured with
* one or more attributes. Once fully configured the generator is made by
* invoking {@link #atomic()}, {@link #molecular()} or {@link #ensemble()}. The
* order of the built-in configuration methods does not matter however when
* specifying custom encoders with {@link #encode(AtomEncoder)} the order they
* are added is the order they will be used. Therefore one can expect different
* hash codes if there is a change in the order they are specified.
*
* Examples
*
* // simple
* MoleculeHashGenerator generator = new HashGeneratorMaker().depth(16)
* .elemental()
* .molecular();
*
* // fast
* MoleculeHashGenerator generator = new HashGeneratorMaker().depth(8)
* .elemental()
* .isotopic()
* .charged()
* .orbital()
* .molecular();
* // comprehensive
* MoleculeHashGenerator generator = new HashGeneratorMaker().depth(32)
* .elemental()
* .isotopic()
* .charged()
* .chiral()
* .perturbed()
* .molecular();
*
*
* @author John May
* @cdk.module hash
* @cdk.githash
*/
public final class HashGeneratorMaker {
/* no default depth */
private int depth = -1;
/* ordered list of custom encoders */
private List customEncoders = new ArrayList();
/* ordered set of basic encoders */
private EnumSet encoderSet = EnumSet.noneOf(BasicAtomEncoder.class);
/* list of stereo encoders */
private List stereoEncoders = new ArrayList();
/* whether we want to use perturbed hash generators */
private EquivalentSetFinder equivSetFinder = null;
/* function determines whether any atoms are suppressed */
private AtomSuppression suppression = AtomSuppression.unsuppressed();
/**
* Specify the depth of the hash generator. Larger values discriminate more
* molecules.
*
* @param depth how deep should the generator hash
* @return reference for fluent API
* @throws IllegalArgumentException if the depth was less then zero
*/
public HashGeneratorMaker depth(int depth) {
if (depth < 0) throw new IllegalArgumentException("depth must not be less than 0");
this.depth = depth;
return this;
}
/**
* Discriminate elements.
*
* @return fluent API reference (self)
* @see BasicAtomEncoder#ATOMIC_NUMBER
*/
public HashGeneratorMaker elemental() {
encoderSet.add(BasicAtomEncoder.ATOMIC_NUMBER);
return this;
}
/**
* Discriminate isotopes.
*
* @return fluent API reference (self)
* @see BasicAtomEncoder#MASS_NUMBER
*/
public HashGeneratorMaker isotopic() {
encoderSet.add(BasicAtomEncoder.MASS_NUMBER);
return this;
}
/**
* Discriminate protonation states.
*
* @return fluent API reference (self)
* @see BasicAtomEncoder#FORMAL_CHARGE
*/
public HashGeneratorMaker charged() {
encoderSet.add(BasicAtomEncoder.FORMAL_CHARGE);
return this;
}
/**
* Discriminate atomic orbitals.
*
* @return fluent API reference (self)
* @see BasicAtomEncoder#ORBITAL_HYBRIDIZATION
*/
public HashGeneratorMaker orbital() {
encoderSet.add(BasicAtomEncoder.ORBITAL_HYBRIDIZATION);
return this;
}
/**
* Discriminate free radicals.
*
* @return fluent API reference (self)
* @see BasicAtomEncoder#FREE_RADICALS
*/
public HashGeneratorMaker radical() {
encoderSet.add(BasicAtomEncoder.FREE_RADICALS);
return this;
}
/**
* Generate different hash codes for stereoisomers. The currently supported
* geometries are:
*
*
* - Tetrahedral
* - Double Bond
* - Cumulative Double Bonds
*
*
* @return fluent API reference (self)
*/
public HashGeneratorMaker chiral() {
this.stereoEncoders.add(new GeometricTetrahedralEncoderFactory());
this.stereoEncoders.add(new GeometricDoubleBondEncoderFactory());
this.stereoEncoders.add(new GeometricCumulativeDoubleBondFactory());
this.stereoEncoders.add(new TetrahedralElementEncoderFactory());
this.stereoEncoders.add(new DoubleBondElementEncoderFactory());
return this;
}
/**
* Suppress any explicit hydrogens in the encoding of hash values. The
* generation of hashes acts as though the hydrogens are not present and as
* such preserves stereo-encoding.
*
* @return fluent API reference (self)
*/
public HashGeneratorMaker suppressHydrogens() {
this.suppression = AtomSuppression.anyHydrogens();
return this;
}
/**
* Discriminate atoms experiencing uniform environments. This method uses
* {@link MinimumEquivalentCyclicSet} to break symmetry but depending on
* application one may need a more comprehensive method. Please refer to
* {@link #perturbWith(EquivalentSetFinder)} for further configuration
* details.
*
* @return fluent API reference (self)
* @see MinimumEquivalentCyclicSet
* @see #perturbWith(EquivalentSetFinder)
*/
public HashGeneratorMaker perturbed() {
return perturbWith(new MinimumEquivalentCyclicSet());
}
/**
* Discriminate atoms experiencing uniform environments using the provided
* method. Depending on the level of identity required one can choose how
* the atoms a perturbed in an attempt to break symmetry. As with all
* hashing there is always a probability of collision but some of these
* collisions may be due to an insufficiency in the algorithm opposed to a
* random chance of collision. Currently there are three strategies but one
* should choose either to use the fast, but good, heuristic {@link
* MinimumEquivalentCyclicSet} or the exact {@link AllEquivalentCyclicSet}.
* In practice {@link MinimumEquivalentCyclicSet} is good enough for most
* applications but it is important to understand the potential trade off.
* The {@link MinimumEquivalentCyclicSetUnion} is provided for demonstration
* only, and as such, is deprecated.
*
* - MinimumEquivalentCyclicSet - fastest, attempt to break symmetry
* by changing a single smallest set of the equivalent atoms which occur in
* a ring
MinimumEquivalentCyclicSetUnion
* (deprecated) - distinguishes more molecules by changing all smallest sets
* of the equivalent atoms which occur in a ring. This method is provided
* from example only - AllEquivalentCyclicSet - slowest,
* systematically perturb all equivalent atoms that occur in a ring
*
*
* At the time of writing (Feb, 2013) the number of known false possibles
* found in PubChem-Compound (aprx. 46,000,000 structures) are as follows:
*
* - MinimumEquivalentCyclicSet - 128 molecules, 64 false positives
* (128/2)
- MinimumEquivalentCyclicSetUnion - 8 molecules, 4 false
* positives (8/2)
- AllEquivalentCyclicSet - 0 molecules
*
* @param equivSetFinder equivalent set finder, used to determine which
* atoms will be perturbed to try and break symmetry.
* @return fluent API reference (self)
* @see AllEquivalentCyclicSet
* @see MinimumEquivalentCyclicSet
* @see MinimumEquivalentCyclicSetUnion
*/
HashGeneratorMaker perturbWith(EquivalentSetFinder equivSetFinder) {
this.equivSetFinder = equivSetFinder;
return this;
}
/**
* Add a custom encoder to the hash generator which will be built. Although
* not enforced, the encoder should be stateless and should not modify any
* passed inputs.
*
* @param encoder an atom encoder
* @return fluent API reference (self)
* @throws NullPointerException no encoder provided
*/
public HashGeneratorMaker encode(AtomEncoder encoder) {
if (encoder == null) throw new NullPointerException("no encoder provided");
customEncoders.add(encoder);
return this;
}
/**
* Combines the separate stereo encoder factories into a single factory.
*
* @return a single stereo encoder factory
*/
private StereoEncoderFactory makeStereoEncoderFactory() {
if (stereoEncoders.isEmpty()) {
return StereoEncoderFactory.EMPTY;
} else if (stereoEncoders.size() == 1) {
return stereoEncoders.get(0);
} else {
StereoEncoderFactory factory = new ConjugatedEncoderFactory(stereoEncoders.get(0), stereoEncoders.get(1));
for (int i = 2; i < stereoEncoders.size(); i++) {
factory = new ConjugatedEncoderFactory(factory, stereoEncoders.get(i));
}
return factory;
}
}
/**
* Given the current configuration create an {@link EnsembleHashGenerator}.
*
* @return instance of the generator
* @throws IllegalArgumentException no depth or encoders were configured
*/
public EnsembleHashGenerator ensemble() {
throw new UnsupportedOperationException("not yet supported");
}
/**
* Given the current configuration create an {@link MoleculeHashGenerator}.
*
* @return instance of the generator
* @throws IllegalArgumentException no depth or encoders were configured
*/
public MoleculeHashGenerator molecular() {
return new BasicMoleculeHashGenerator(atomic());
}
/**
* Given the current configuration create an {@link AtomHashGenerator}.
*
* @return instance of the generator
* @throws IllegalArgumentException no depth or encoders were configured
*/
public AtomHashGenerator atomic() {
if (depth < 0) throw new IllegalArgumentException("no depth specified, use .depth(int)");
List encoders = new ArrayList();
// set is ordered
for (AtomEncoder encoder : encoderSet) {
encoders.add(encoder);
}
encoders.addAll(this.customEncoders);
// check if suppression of atoms is wanted - if not use a default value
// we also use the 'Basic' generator (see below)
boolean suppress = suppression != AtomSuppression.unsuppressed();
AtomEncoder encoder = new ConjugatedAtomEncoder(encoders);
SeedGenerator seeds = new SeedGenerator(encoder, suppression);
AbstractAtomHashGenerator simple = suppress ? new SuppressedAtomHashGenerator(seeds, new Xorshift(),
makeStereoEncoderFactory(), suppression, depth) : new BasicAtomHashGenerator(seeds, new Xorshift(),
makeStereoEncoderFactory(), depth);
// if there is a finder for checking equivalent vertices then the user
// wants to 'perturb' the hashed
if (equivSetFinder != null) {
return new PerturbedAtomHashGenerator(seeds, simple, new Xorshift(), makeStereoEncoderFactory(),
equivSetFinder, suppression);
} else {
// no equivalence set finder - just use the simple hash
return simple;
}
}
/**
* Help class to combined two stereo encoder factories
*/
private final class ConjugatedEncoderFactory implements StereoEncoderFactory {
private final StereoEncoderFactory left, right;
/**
* Create a new conjugated encoder factory from the left and right
* factories.
*
* @param left encoder factory
* @param right encoder factory
*/
private ConjugatedEncoderFactory(StereoEncoderFactory left, StereoEncoderFactory right) {
this.left = left;
this.right = right;
}
/**
* @inheritDoc
*/
@Override
public StereoEncoder create(IAtomContainer container, int[][] graph) {
return new ConjugatedEncoder(left.create(container, graph), right.create(container, graph));
}
}
/**
* Help class to combined two stereo encoders
*/
private final class ConjugatedEncoder implements StereoEncoder {
private final StereoEncoder left, right;
/**
* Create a new conjugated encoder from a left and right encoder.
*
* @param left encoder
* @param right encoder
*/
private ConjugatedEncoder(StereoEncoder left, StereoEncoder right) {
this.left = left;
this.right = right;
}
/**
* Encodes using the left and then the right encoder.
*
* @param current current invariants
* @param next next invariants
* @return whether either encoder modified any values
*/
@Override
public boolean encode(long[] current, long[] next) {
boolean modified = left.encode(current, next);
return right.encode(current, next) || modified;
}
/**
* reset the left and right encoders
*/
@Override
public void reset() {
left.reset();
right.reset();
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy