All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.SimpleCanonizer Maven / Gradle / Ivy

There is a newer version: 2024.12.1
Show newest version
/*
* Copyright (c) 1997 - 2016
* Actelion Pharmaceuticals Ltd.
* Gewerbestrasse 16
* CH-4123 Allschwil, Switzerland
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
*    list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice,
*    this list of conditions and the following disclaimer in the documentation
*    and/or other materials provided with the distribution.
* 3. Neither the name of the the copyright holder nor the
*    names of its contributors may be used to endorse or promote products
*    derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/

package com.actelion.research.chem;

public class SimpleCanonizer {
    private static final int cIDCodeVersion3 = 9;
    // productive version since May 2006 based on the molfile version 3
    // being compatible with MDL's "Enhanced Stereo Representation"

    private ExtendedMolecule mMol;
	private int mCanRank[];
	private long mCanBaseValue[];

	private boolean mGraphGenerated;
	private int mGraphRings;
	private int mGraphAtom[];
	private int mGraphBond[];
	private int mGraphFrom[];
	private int mGraphClosure[];

    public static final int MAX_ATOM_BITS = 8;

	private String         	mIDCode,mCoordinates;
    private StringBuffer    mEncodingBuffer;
    private int             mEncodingBitsAvail,mEncodingTempData;
    private boolean			mZCoordinatesAvailable;

	public SimpleCanonizer(ExtendedMolecule mol) {
		mMol = mol;
		mMol.ensureHelperArrays(Molecule.cHelperRings);

        for (int atom=0; atom 1)
					break;
				}
			for (int atom=0; atomj; k--)
					connRank[k] = connRank[k-1];
				connRank[j] = rank;
				}
			
			mCanBaseValue[atom] = 0;
			for (int i=0; i mCanBaseValue[atom])
					lowest = mCanBaseValue[atom];

			if (lowest != 0x7fffffffffffffffL) {
				canRank++;
				for (int atom=0; atom mCanRank[startAtom])
				startAtom = atom;

		boolean atomHandled[] = new boolean[mMol.getAtoms()];
		boolean bondHandled[] = new boolean[mMol.getBonds()];
		int newAtomNo[] = new int[mMol.getAtoms()];
		mGraphAtom = new int[mMol.getAtoms()];
		mGraphFrom = new int[mMol.getAtoms()];
		mGraphBond = new int[mMol.getBonds()];
		mGraphAtom[0] = startAtom;
		atomHandled[startAtom] = true;

		int atomsWithoutParents = 1;	// the startatom has no parent
		int firstUnhandled = 0;
		int firstUnused = 1;
		int graphBonds = 0;
		while (firstUnhandled < mMol.getAtoms()) {
			if (firstUnhandled < firstUnused) {	// attach neighbours in rank order to unhandled
				while (true) {
					int highestRankingConnAtom = 0;
					int highestRankingConnBond = 0;
					int highestRank = -1;
					for (int i=0; i highestRank) {
							highestRankingConnAtom = connAtom;
							highestRankingConnBond = mMol.getConnBond(mGraphAtom[firstUnhandled],i);
							highestRank = mCanRank[connAtom];
							}
						}

					if (highestRank == -1)
						break;

					newAtomNo[highestRankingConnAtom] = firstUnused;
					mGraphFrom[firstUnused] = firstUnhandled;
					mGraphAtom[firstUnused++] = highestRankingConnAtom;
					mGraphBond[graphBonds++] = highestRankingConnBond;
					atomHandled[highestRankingConnAtom] = true;
					bondHandled[highestRankingConnBond] = true;
					}
				firstUnhandled++;
				}
			else {
				int highestRankingAtom = 0;
				int highestRank = -1;
				for (int atom=0; atom highestRank) {
						highestRankingAtom = atom;
						highestRank = mCanRank[atom];
						}
					}
				atomsWithoutParents++;
				newAtomNo[highestRankingAtom] = firstUnused;
				mGraphFrom[firstUnused] = -1;	// no parent atom in graph tree
				mGraphAtom[firstUnused++] = highestRankingAtom;
				atomHandled[highestRankingAtom] = true;
				}
			}

		mGraphClosure = new int[2 * (mMol.getBonds() - graphBonds)];
		mGraphRings = 0;
		while (true) {	// add ring closure bonds (those with lowest new atom numbers first)
			int lowAtomNo1 = mMol.getMaxAtoms();
			int lowAtomNo2 = mMol.getMaxAtoms();
			int lowBond = -1;
			for (int bond=0; bond 0) {
                encodeBits(1, 1);   //  more data to come
                encodeBits(8, 4);   //  8 = datatype 'AtomList'
                encodeBits(count, nbits);
                for (int atom=0; atom> Molecule.cAtomRadicalStateShift, 2);
                    }
                }
            }

		if (mMol.isFragment()) {	// more QueryFeatures and fragment specific properties
            isSecondFeatureBlock |= addAtomQueryFeatures(22, isSecondFeatureBlock, nbits, Molecule.cAtomQFFlatNitrogen, 1, -1);
            isSecondFeatureBlock |= addBondQueryFeatures(23, isSecondFeatureBlock, nbits, Molecule.cBondQFMatchStereo, 1, -1);
            isSecondFeatureBlock |= addBondQueryFeatures(24, isSecondFeatureBlock, nbits,
            											 Molecule.cBondQFAromState,
            											 Molecule.cBondQFAromStateBits,
            											 Molecule.cBondQFAromStateShift);
			}

		boolean[] isAromaticSPBond = getAromaticSPBonds();
		if (isAromaticSPBond != null) {
			count = 0;
			for (int bond=0; bond 15) {
            ensureSecondFeatureBlock(isSecondFeatureBlock);
            codeNo -= 16;
            }
        
        encodeBits(1, 1);           //  more data to come
        encodeBits(codeNo, 4);      //  datatype
        encodeBits(count, nbits);
        for (int atom=0; atom> qfShift, qfBits);
            }
        }
    
    return true;
    }


	private boolean addBondQueryFeatures(int codeNo, boolean isSecondFeatureBlock, int nbits, int qfMask, int qfBits, int qfShift) {
	    int count = 0;
	    for (int bond=0; bond 15) {
            ensureSecondFeatureBlock(isSecondFeatureBlock);
            codeNo -= 16;
            }
        
	    encodeBits(1, 1);           //  more data to come
	    encodeBits(codeNo, 4);      //  datatype
	    encodeBits(count, nbits);
	    for (int bond=0; bond> qfShift, qfBits);
	            }
	        }

	    return true;
	    }


	private boolean[] getAromaticSPBonds() {
		boolean[] isAromaticSPBond = null;
		for (int bond=0; bond© 2015 - 2025 Weber Informatics LLC | Privacy Policy