All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openscience.cdk.fingerprint.FingerprinterTool Maven / Gradle / Ivy

There is a newer version: 2.9
Show newest version
/* Copyright (C) 2002-2007  Christoph Steinbeck 
 *
 * Contact: [email protected]
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 * All we ask is that proper credit is given for our work, which includes
 * - but is not limited to - adding the above copyright notice to the beginning
 * of your source code files, and to any copyright notice that you may distribute
 * with programs based on this work.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 */
package org.openscience.cdk.fingerprint;

import java.util.ArrayList;
import java.util.BitSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;

import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;

/**
 * Tool with helper methods for IFingerprint.
 *
 * @author         steinbeck
 * @cdk.created    2002-02-24
 * @cdk.keyword    fingerprint
 * @cdk.module     standard
 * @cdk.githash
 */
public class FingerprinterTool {

    private final static ILoggingTool LOGGER = LoggingToolFactory.createLoggingTool(FingerprinterTool.class);

    /**
     *  Checks whether all the positive bits in BitSet bs2 occur in BitSet bs1. If
     *  so, the molecular structure from which bs2 was generated is a possible
     *  substructure of bs1. 

* * Example:

     *  Molecule mol = MoleculeFactory.makeIndole();
     *  BitSet bs = Fingerprinter.getBitFingerprint(mol);
     *  Molecule frag1 = MoleculeFactory.makePyrrole();
     *  BitSet bs1 = Fingerprinter.getBitFingerprint(frag1);
     *  if (Fingerprinter.isSubset(bs, bs1)) {
     *      System.out.println("Pyrrole is subset of Indole.");
     *  }
     *  
* *@param bs1 The reference BitSet *@param bs2 The BitSet which is compared with bs1 *@return True, if bs2 is a subset of bs1 *@cdk.keyword substructure search */ public static boolean isSubset(BitSet bs1, BitSet bs2) { BitSet clone = (BitSet) bs1.clone(); clone.and(bs2); if (clone.equals(bs2)) { return true; } return false; } /** * This lists all bits set in bs2 and not in bs2 (other way round not considered) in a list and to logger. * See. {@link #differences(java.util.BitSet, java.util.BitSet)} for a method to list all differences, * including those missing present in bs2 but not bs1. * * @param bs1 First bitset * @param bs2 Second bitset * @return An arrayList of Integers * @see #differences(java.util.BitSet, java.util.BitSet) */ public static List listDifferences(BitSet bs1, BitSet bs2) { List l = new ArrayList(); LOGGER.debug("Listing bit positions set in bs2 but not in bs1"); for (int f = 0; f < bs2.size(); f++) { if (bs2.get(f) && !bs1.get(f)) { l.add(f); LOGGER.debug("Bit " + f + " not set in bs1"); } } return l; } /** * List all differences between the two bit vectors. Unlike {@link * #listDifferences(java.util.BitSet, java.util.BitSet)} which only list * those which are set in s but not in t. * * @param s a bit vector * @param t another bit vector * @return all differences between s and t */ public static Set differences(BitSet s, BitSet t) { BitSet u = (BitSet) s.clone(); u.xor(t); Set differences = new TreeSet(); for (int i = u.nextSetBit(0); i >= 0; i = u.nextSetBit(i + 1)) { differences.add(i); } return differences; } /** * Convert a mapping of features and their counts to a 1024-bit binary fingerprint. A single * bit is set for each pattern. * * @param features features to include * @return the continuous fingerprint * @see #makeBitFingerprint(java.util.Map, int, int) */ public static IBitFingerprint makeBitFingerprint(final Map features) { return makeBitFingerprint(features, 1024, 1); } /** * Convert a mapping of features and their counts to a binary fingerprint. A single bit is * set for each pattern. * * @param features features to include * @param len fingerprint length * @return the continuous fingerprint * @see #makeBitFingerprint(java.util.Map, int, int) */ public static IBitFingerprint makeBitFingerprint(final Map features, int len) { return makeBitFingerprint(features, len, 1); } /** * Convert a mapping of features and their counts to a binary fingerprint. Each feature * can set 1-n hashes, the amount is modified by the {@code bits} operand. * * @param features features to include * @param len fingerprint length * @param bits number of bits to set for each pattern * @return the continuous fingerprint */ public static IBitFingerprint makeBitFingerprint(final Map features, int len, int bits) { final BitSetFingerprint fingerprint = new BitSetFingerprint(len); final Random rand = new Random(); for (String feature : features.keySet()) { int hash = feature.hashCode(); fingerprint.set(Math.abs(hash % len)); for (int i = 1; i < bits; i++) { rand.setSeed(hash); fingerprint.set(hash = rand.nextInt(len)); } } return fingerprint; } /** * Wrap a mapping of features and their counts to a continuous (count based) fingerprint. * * @param features features to include * @return the continuous fingerprint */ public static ICountFingerprint makeCountFingerprint(final Map features) { return new IntArrayCountFingerprint(features); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy