All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.descriptor.SimpleFragmentGraph Maven / Gradle / Ivy

There is a newer version: 2024.12.1
Show newest version
package com.actelion.research.chem.descriptor;

import com.actelion.research.chem.Molecule;
import com.actelion.research.chem.StereoMolecule;
import com.actelion.research.util.BurtleHasher;

import java.util.Arrays;

/**
 * This class contains a fragment buffer used to describe and canonicalize a molecular fragment
 * defined by certain bond and atom features and their connectivity. Its sole purpose is to
 * calculate a hash value to populate a fragment specific bit in a fingerprint.
 * By just using a small subset of the atom and bond features and neglecting any stereochemistry
 * of a normal molecule, the creation of a canonical representation is much faster than the
 * alternative of using Molecule.copyMoleculeByBonds() and a Canonizer().
 */
public class SimpleFragmentGraph {
	private static final int HASH_INIT = 13;
	private static final int MAX_CONN_ATOMS = 8;
	private static final int ATOM_INDEX_BITS = 4;    // bits needed to store either atom index or atom rank
	private static final int BOND_DESC_BITS = 2;     // bits needed to store bond descriptor
	private static final long ATOM_INDEX_MASK = 0xF;
	// MAX_CONN_ATOMS * (ATOM_INDEX_BITS + BOND_DESC_BITS) + ATOM_INDEX_BITS must not exceed 64 bits (Long.SIZE)

	private int[] mConnAtoms,mFragmentAtomFromOrig,mCanRank,mGraphAtom,mGraphIndex;
	private int[][] mConnAtom,mConnBond,mConnRank;
	private int mAtoms,mBonds;
	private byte[] mAtomDescriptor,mBondDescriptor,mBuffer;

	public SimpleFragmentGraph(int maxFragmentBonds) {
		init(maxFragmentBonds, 256);
	}

	public SimpleFragmentGraph(StereoMolecule mol, int[] bondMember, int bondCount) {
		mol.ensureHelperArrays(Molecule.cHelperNeighbours);
		init(bondCount, mol.getAtoms());
		setMolecule(mol, bondMember, bondCount);
	}

	private void init(int maxFragmentBonds, int atomCount) {
		int maxFragmentAtoms = maxFragmentBonds+1;
		mAtomDescriptor = new byte[maxFragmentAtoms];
		mBondDescriptor = new byte[maxFragmentBonds];
		mConnAtoms = new int[maxFragmentAtoms];
		mCanRank = new int[maxFragmentAtoms];
		mGraphAtom = new int[maxFragmentAtoms];
		mGraphIndex = new int[maxFragmentAtoms];
		mFragmentAtomFromOrig = new int[atomCount];
		mConnAtom = new int[maxFragmentAtoms][MAX_CONN_ATOMS];
		mConnBond = new int[maxFragmentAtoms][MAX_CONN_ATOMS];
		mConnRank = new int[1+MAX_CONN_ATOMS][];
		for (int i=1; i<=MAX_CONN_ATOMS; i++)    // we keep a buffer for every connAtoms value
			mConnRank[i] = new int[i];
		mBuffer = new byte[3*maxFragmentBonds];
	}

	public void set(StereoMolecule mol, int[] bondMember, int bondCount) {
		mAtoms = 0;
		mBonds = 0;

		int maxAtomCount = mFragmentAtomFromOrig.length;
		if (maxAtomCount < mol.getAtoms()) {
			do {
				maxAtomCount *= 2;
			} while (maxAtomCount < mol.getAtoms());
			mFragmentAtomFromOrig = new int[maxAtomCount];
		}

		mol.ensureHelperArrays(Molecule.cHelperNeighbours);
		setMolecule(mol, bondMember, bondCount);
		}

	private void setMolecule(StereoMolecule mol, int[] bondMember, int bondCount) {
		boolean[] includeAtom = new boolean[mol.getAtoms()];
		Arrays.fill(mConnAtoms, 0);
		for (int i=0; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy