All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.CanonizerUtil Maven / Gradle / Ivy

There is a newer version: 2024.12.1
Show newest version
/*
* Copyright (c) 1997 - 2016
* Actelion Pharmaceuticals Ltd.
* Gewerbestrasse 16
* CH-4123 Allschwil, Switzerland
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
*    list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
*    this list of conditions and the following disclaimer in the documentation
*    and/or other materials provided with the distribution.
* 3. Neither the name of the the copyright holder nor the
*    names of its contributors may be used to endorse or promote products
*    derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/


package com.actelion.research.chem;


public class CanonizerUtil
{
	public enum IDCODE_TYPE
	{
		NORMAL, NOSTEREO, BACKBONE, TAUTOMER, NOSTEREO_TAUTOMER
	}

	/**
	 * Generates an IDCODE for a defined and simplified state of a given molecule.
	 * Optionally the simplification and idcode generation are done for the largest
	 * unconnected fragment rather than considering all molecule fragments.
	 * Allowed simplification types are:
* - NORMAL (with complete stereo information)
* - NOSTEREO (without stereo information)
* - TAUTOMER (generic tautomer: tautomer regions marked as [cBondQFSingle | cBondQFDouble] plus atom label encoding count of pi electrons, deuterium and tritium)
* - NOSTEREO_TAUTOMER (combines NOSTEREO and TAUTOMER)
* - BACKBONE (without stereo information; all bonds are changed to single bonds)
* * @param mol source molecule to generate the IDCODE * @param type type of IDCODE requested * @param largestFragmentOnly * @return */ public static String getIDCode(StereoMolecule mol, IDCODE_TYPE type, boolean largestFragmentOnly) { switch (type) { case NORMAL: return getIDCode(mol, largestFragmentOnly); case NOSTEREO: return getIDCodeNoStereo(mol, largestFragmentOnly); case BACKBONE: return getIDCodeBackBone(mol, largestFragmentOnly); case TAUTOMER: return getIDCodeTautomer(mol, largestFragmentOnly); case NOSTEREO_TAUTOMER: return getIDCodeNoStereoTautomer(mol, largestFragmentOnly); default: break; } return null; } public static long getHash(StereoMolecule m, IDCODE_TYPE type, boolean largestFragmentOnly) { String code = getIDCode(m, type, largestFragmentOnly); return code != null ? StrongHasher.hash(code) : 0; } public static long getNoStereoHash(StereoMolecule m, boolean largestFragmentOnly) { String code = getIDCodeNoStereo(m, largestFragmentOnly); return code != null ? StrongHasher.hash(code) : 0; } public static long getTautomerHash(StereoMolecule m, boolean largestFragmentOnly) { String code = getIDCodeTautomer(m, largestFragmentOnly); return code != null ? StrongHasher.hash(code) : 0; } public static long getNoStereoTautomerHash(StereoMolecule m, boolean largestFragmentOnly) { String code = getIDCodeNoStereoTautomer(m, largestFragmentOnly); return code != null ? StrongHasher.hash(code) : 0; } public static long getBackboneHash(StereoMolecule m, boolean largestFragmentOnly) { String code = getIDCodeBackBone(m, largestFragmentOnly); return code != null ? StrongHasher.hash(code) : 0; } private static String getIDCode(StereoMolecule mol, boolean largestFragmentOnly) { try { if (!largestFragmentOnly) return new Canonizer(mol).getIDCode(); mol = mol.getCompactCopy(); mol.stripSmallFragments(true); MoleculeNeutralizer.neutralizeChargedMolecule(mol); return new Canonizer(mol).getIDCode(); } catch (Throwable e) { System.err.println("WARN: getIDCode() " + e); return null; } } private static String getIDCodeNoStereo(StereoMolecule mol, boolean largestFragmentOnly) { try { mol = mol.getCompactCopy(); if (largestFragmentOnly) { mol.stripSmallFragments(true); MoleculeNeutralizer.neutralizeChargedMolecule(mol); } mol.stripStereoInformation(); return new Canonizer(mol).getIDCode(); } catch (Throwable e) { System.err.println("WARN: getIDCodeNoStereo() " + e); return null; } } private static String getIDCodeTautomer(StereoMolecule mol, boolean largestFragmentOnly) { try { if (largestFragmentOnly) { mol = mol.getCompactCopy(); mol.stripSmallFragments(true); MoleculeNeutralizer.neutralizeChargedMolecule(mol); } StereoMolecule genericTautomer = new TautomerHelper(mol).createGenericTautomer(); return new Canonizer(genericTautomer, Canonizer.ENCODE_ATOM_CUSTOM_LABELS).getIDCode(); } catch (Throwable e) { System.err.println("WARN: getIDCodeTautomer() " + e); return null; } } private static String getIDCodeNoStereoTautomer(StereoMolecule mol, boolean largestFragmentOnly) { try { mol = mol.getCompactCopy(); if (largestFragmentOnly) { mol.stripSmallFragments(true); MoleculeNeutralizer.neutralizeChargedMolecule(mol); } mol.stripStereoInformation(); StereoMolecule genericTautomer = new TautomerHelper(mol).createGenericTautomer(); return new Canonizer(genericTautomer, Canonizer.ENCODE_ATOM_CUSTOM_LABELS).getIDCode(); } catch (Throwable e) { System.err.println("WARN: getIDCodeNoStereoTautomer() " + e); return null; } } private static String getIDCodeBackBone(StereoMolecule mol, boolean largestFragmentOnly) { try { mol = mol.getCompactCopy(); if (largestFragmentOnly) { mol.stripSmallFragments(true); MoleculeNeutralizer.neutralizeChargedMolecule(mol); } mol.stripStereoInformation(); int b = mol.getAllBonds(); for (int i = 0; i < b; i++) mol.setBondType(i, Molecule.cBondTypeSingle); return new Canonizer(mol).getIDCode(); } catch (Throwable e) { System.err.println("WARN: getIDCodeBackBone() " + e); return null; } } /** * 64 bit hash, derived from numerical recipes * Will move this class later to dd_core. */ public static class StrongHasher { private static final long[] byteTable; private static final long HSTART = 0xBB40E64DA205B064L; private static final long HMULT = 7664345821815920749L; static { byteTable = new long[256]; long h = 0x544B2FBACAAF1684L; for (int i = 0; i < 256; i++) { for (int j = 0; j < 31; j++) { h = (h >>> 7) ^ h; h = (h << 11) ^ h; h = (h >>> 10) ^ h; } byteTable[i] = h; } } public static long hash(String cs) { if (cs == null) return 1L; long h = HSTART; final long hmult = HMULT; final long[] ht = byteTable; for (int i = cs.length()-1; i >= 0; i--) { char ch = cs.charAt(i); h = (h * hmult) ^ ht[ch & 0xff]; h = (h * hmult) ^ ht[(ch >>> 8) & 0xff]; } return h; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy