All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.actelion.research.chem.ExtendedMolecule Maven / Gradle / Ivy

There is a newer version: 2024.11.2
Show newest version
/*
 * Copyright (c) 1997 - 2016
 * Actelion Pharmaceuticals Ltd.
 * Gewerbestrasse 16
 * CH-4123 Allschwil, Switzerland
 *
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *
 * 1. Redistributions of source code must retain the above copyright notice, this
 *    list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright notice,
 *    this list of conditions and the following disclaimer in the documentation
 *    and/or other materials provided with the distribution.
 * 3. Neither the name of the the copyright holder nor the
 *    names of its contributors may be used to endorse or promote products
 *    derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 *
 * @author Thomas Sander
 */

package com.actelion.research.chem;

import com.actelion.research.util.Angle;

import java.io.*;
import java.util.Arrays;

/**
 * While the Molecule class covers all primary molecule information as atom and bond properties,
 * the atom connectivity and coordinates, its derived class ExtendedMolecule handles secondary,
 * i.e. calculated molecule information. Most important are the directly connected atoms and bonds
 * of every atom, information about rings and whether they are aromatic, and some atom properties
 * that depend on their near neighbors. This calculated information is cached in helper arrays and
 * stays valid as long as the molecule's primary information is not changed. High level methods,
 * e.g. getPath(), that require valid helper arrays take care of updating the cache themselves.
 * Low level methods, e.g. isAromaticAtom(), which typically are called very often, do not check
 * the validity of or update the helper arrays themselves for performance reasons. If you use low
 * level methods, then you need to make sure that the required helper array information is valid by
 * calling ensureHelperArrays().
 * Typically you will never instantiate an ExtendedMolecule, but rather use a StereoMolecule.
 */
public class ExtendedMolecule extends Molecule implements Serializable {
	static final long serialVersionUID = 0x2006CAFE;

	/**
	 * To interpret a stereo center as fisher projection, all non stereo
	 * bonds must be vertical and all stereo bonds must be horizontal.
	 * FISCHER_PROJECTION_LIMIT is the allowed tolerance (currently 5.0 degrees).
	 * In addition in large rings we don't assume Fisher projections,
	 * because coordinates, if just in a circle, may have subsequent almost vertical bonds.
	 */
	public static final float FISCHER_PROJECTION_LIMIT = (float)Math.PI / 36;
	public static final float FISCHER_PROJECTION_RING_LIMIT = 24;

	public static final float STEREO_ANGLE_LIMIT = (float)Math.PI / 36;   // 5 degrees

	public static final int cMaxConnAtoms = 16; // ExtendedMolecule is not restricted anymore
											   // However, this is a suggestion for editors and other classes
	transient private int mAtoms,mBonds;
	transient private RingCollection mRingSet;

	transient private int mPi[];
	transient private int mConnAtoms[];	// non-H neighbour counts
	transient private int mAllConnAtoms[];	// neighbour counts including explicit hydrogen
	transient private int mConnAtom[][];
	transient private int mConnBond[][];
	transient private int mConnBondOrder[][];

	public ExtendedMolecule() {
		}


	public ExtendedMolecule(int maxAtoms, int maxBonds) {
		super(maxAtoms, maxBonds);
		}

	public ExtendedMolecule(Molecule mol) {
		super(mol==null ? 256 : mol.getMaxAtoms(), mol==null ? 256 : mol.getMaxBonds());
		if (mol != null)
			mol.copyMolecule(this);
		}


	/**
	 * Clears destmol and then copies a part of this Molecule into destMol, being defined by a mask of atoms to be included.
	 * If not all atoms are copied, then destMol is set to be a substructure fragment.
	 * @param destMol receives the part of this Molecule
	 * @param includeAtom defines atoms to be copied; its size may be this.getAtoms() or this.getAllAtoms()
	 * @param recognizeDelocalizedBonds defines whether disconnected delocalized bonds will keep their
	 * single/double bond status or whether the query feature 'delocalized bond' will be set
	 * @param atomMap null or int[] not smaller than includeAtom.length; receives atom indices of dest molecule or -1 if not copied
	 */
	public void copyMoleculeByAtoms(ExtendedMolecule destMol, boolean[] includeAtom, boolean recognizeDelocalizedBonds, int[] atomMap) {
		ensureHelperArrays(recognizeDelocalizedBonds ? cHelperRings : cHelperNeighbours);

		destMol.mAtomList = null;
		if (mIsFragment)
			destMol.setFragment(true);

		int atomCount = includeAtom.length;

		if (atomMap == null)
			atomMap = new int[atomCount];

		destMol.mAllAtoms = 0;
		for (int atom=0; atom 1) {
						for (int i=0; i 1) {
							for (int j=0; j 3
			 && isAtomStereoCenter(atom)) {
				int remainingNeighbours = 0;
				int lostStereoBond = -1;
				int lostAtom = -1;
				for (int i=0; i= 3) {
					double angle = getBondAngle(atom, lostAtom);
					double minAngleDif = 10.0;
					int minConnBond = -1;
					for (int i=0; i angleDif) {
								minAngleDif = angleDif;
								minConnBond = mConnBond[atom][i];
								}
							}
						}
					if (minConnBond != -1) {
						int destBond = bondMap[minConnBond];
						destMol.setBondType(destBond, mBondType[minConnBond] == cBondTypeUp ? cBondTypeDown : cBondTypeUp);
						if (mBondAtom[0][minConnBond] != atom) {
							destMol.setBondAtom(1, destBond, atomMap[mBondAtom[0][minConnBond]]);
							destMol.setBondAtom(0, destBond, atomMap[atom]);
							}
						}
					}
				}
			}
		}


	private void rescueImplicitHigherValences(ExtendedMolecule destMol, int sourceAtomCount, int[] atomMap) {
		destMol.ensureHelperArrays(Molecule.cHelperNeighbours);
		for (int atom=0; atom
	 * 1. non-hydrogen atoms (bond order 1 and above) and unusual hydrogen atoms (non-natural abundance isotopes, custom labelled hydrogen, etc.)
* 2. plain-hydrogen atoms (natural abundance, bond order 1)
* 3. loosely connected atoms (bond order 0, i.e. metal ligand bond)
* Only valid after calling ensureHelperArrays(cHelperNeighbours or higher); * @param atom * @return count of category 1 & 2 neighbour atoms (excludes neighbours connected with zero bond order) */ public int getAllConnAtoms(int atom) { return mAllConnAtoms[atom]; } /** * @param atom * @return the number of connected plain explicit and implicit hydrogen atoms (does not include 2H,3H, custom labelled H) */ public int getPlainHydrogens(int atom) { return getExplicitHydrogens(atom) + getImplicitHydrogens(atom); } /** * @param atom * @return the number of connected explicit and implicit hydrogen atoms (includes 2H,3H, custom labelled H) */ public int getAllHydrogens(int atom) { return mAllConnAtoms[atom] - getNonHydrogenNeighbourCount(atom) + getImplicitHydrogens(atom); } /** * A validated molecule (after helper array creation) contains a sorted list of all atoms * with the plain (negligible) hydrogen atoms at the end of the list. negligible hydrogen atoms * a those that can be considered implicit, because they have no attached relevant information. * Hydrogen atoms that cannot be neglected are special isotopes (mass != 0), if they carry a * custom label, if they are connected to another atom with bond order different from 1, or * if they are connected to another negligible hydrogen.
* Only valid after calling ensureHelperArrays(cHelperNeighbours or higher); * @return the number of relevant atoms not including negligible hydrogen atoms */ public int getAtoms() { return mAtoms; } /** * @param atom * @return count of neighbour atoms connected by a 0-order metal ligand bond */ public int getMetalBondedConnAtoms(int atom) { return mConnAtom[atom].length - mAllConnAtoms[atom]; } /** * This is different from the Hendrickson pi-value, which considers pi-bonds to carbons only. * @param atom * @return the number pi electrons at atom (the central atom of acetone would have 1) */ public int getAtomPi(int atom) { return mPi[atom]; } /** * Requires helper arrays state cHelperNeighbours. * @param atom * @return number of electronegative neighbour atoms */ public int getAtomElectronegativeNeighbours(int atom) { int e = 0; for (int i=0; i * 1. non-hydrogen atoms (bond order 1 and above) and unusual hydrogen atoms (non natural abundance isotops, custom labelled hydrogen, etc.)
* 2. plain-hydrogen atoms (natural abundance, bond order 1)
* 3. loosely connected atoms (bond order 0, i.e. metall ligand bond)
* Only valid after calling ensureHelperArrays(cHelperNeighbours or higher); * @param atom * @param i index into sorted neighbour list * @return the i-th neighbor atom of atom */ public int getConnAtom(int atom, int i) { return mConnAtom[atom][i]; } /** * The neighbours (connected atoms) of any atom are sorted by their relevance:
* 1. non-hydrogen atoms (bond order 1 and above) and unusual hydrogen atoms (non-natural abundance isotops, custom labelled hydrogen, etc.)
* 2. plain-hydrogen atoms (natural abundance, bond order 1)
* 3. loosely connected atoms (bond order 0, i.e. metall ligand bond)
* Only valid after calling ensureHelperArrays(cHelperNeighbours or higher); * Note: This method includes neighbours marked as being part of an exclude group! * @param atom * @return count of category 1 neighbour atoms (excludes plain H and bond zero orders) */ public int getNotExcludedConnAtoms(int atom) { return mConnAtoms[atom] - getExcludedNeighbourCount(atom); } /** * The neighbours (connected atoms) of any atom are sorted by their relevance:
* 1. non-hydrogen atoms (bond order 1 and above) and unusual hydrogen atoms (non-natural abundance isotops, custom labelled hydrogen, etc.)
* 2. plain-hydrogen atoms (natural abundance, bond order 1)
* 3. loosely connected atoms (bond order 0, i.e. metall ligand bond)
* Only valid after calling ensureHelperArrays(cHelperNeighbours or higher); * Note: This method includes neighbours marked as being part of an exclude group! * @param atom * @return count of category 1 neighbour atoms (excludes plain H and bond zero orders) */ public int getConnAtoms(int atom) { return mConnAtoms[atom]; } /** * The neighbours (connected atoms) of any atom are sorted by their relevance:
* 1. non-hydrogen atoms (bond order 1 and above) and unusual hydrogen atoms (non-natural abundance isotops, custom labelled hydrogen, etc.)
* 2. plain-hydrogen atoms (natural abundance, bond order 1)
* 3. loosely connected atoms (bond order 0, i.e. metall ligand bond)
* Only valid after calling ensureHelperArrays(cHelperNeighbours or higher); * @param atom * @return count of category 1 & 2 & 3 neighbour atoms */ public int getAllConnAtomsPlusMetalBonds(int atom) { return mConnAtom[atom].length; } /** * The neighbours (connected atoms) of any atom are sorted by their relevance:
* 1. non-hydrogen atoms (bond order 1 and above) and unusual hydrogen atoms (non-natural abundance isotops, custom labelled hydrogen, etc.)
* 2. plain-hydrogen atoms (natural abundance, bond order 1)
* 3. loosely connected atoms (bond order 0, i.e. metall ligand bond)
* Only valid after calling ensureHelperArrays(cHelperNeighbours or higher); * @param atom * @param i index into sorted neighbour list * @return index of bond connecting atom with its i-th neighbor */ public int getConnBond(int atom, int i) { return mConnBond[atom][i]; } /** * The neighbours (connected atoms) of any atom are sorted by their relevance:
* 1. non-hydrogen atoms (bond order 1 and above) and unusual hydrogen atoms (non-natural abundance isotops, custom labelled hydrogen, etc.)
* 2. plain-hydrogen atoms (natural abundance, bond order 1)
* 3. loosely connected atoms (bond order 0, i.e. metall ligand bond)
* Only valid after calling ensureHelperArrays(cHelperNeighbours or higher); * Orders of delocalized bonds, i.e. bonds in an aromatic 6-membered ring, are returned as 1. * @param atom * @param i index into sorted neighbour list * @return order of bond connecting atom with its i-th neighbor */ public int getConnBondOrder(int atom, int i) { return mConnBondOrder[atom][i]; } /** * This method returns the non-hydrogen neighbour count of atom. * It excludes any hydrogen atoms in contrast to getConnAtoms(), which only * excludes plain hydrogen (not deuterium, tritium, custom labelled hydrogen, etc.). * Don't use this method's return value for loops with getConnAtom(), * getConnBond(), or getConnBondOrder(). * @param atom * @return the number of non-hydrogen neighbor atoms */ public int getNonHydrogenNeighbourCount(int atom) { int count = mConnAtoms[atom]; for (int i = 0; i< mConnAtoms[atom]; i++) if (mAtomicNo[mConnAtom[atom][i]] == 1) count--; return count; } /** * This method returns the count of atom neighbours which are marked as being an exclude group. * @param atom * @return the number of non-hydrogen neighbor atoms marked as being part of an exclude group */ public int getExcludedNeighbourCount(int atom) { int count = 0; if (mIsFragment) for (int i=0; i 1) piElectronsFound = true; int bond = mConnBond[atom][i]; if (mBondType[bond] == cBondTypeDelocalized) delocalizedBondFound = true; } } if (delocalizedBondFound && !piElectronsFound) valence++; return valence; } /** * The free valence is the number of potential additional single bonded * neighbours to reach the atom's maximum valence. Atomic numbers that have * multiple possible valences, the highest value is taken. * Atom charges are considered. Implicit hydrogens are not considered. * Thus, the oxygen in a R-O(-) has a free valence of 0, the nitrogen in R3N(+) * has a free valence of 1. Chlorine in Cl(-) has a free valence of 6. If you need * the free valence taking the lowest possible valence into account, use * getLowestFreeValence(), which would return 0 for Cl(-). * @param atom * @return */ public int getFreeValence(int atom) { return getMaxValence(atom) - getOccupiedValence(atom); } /** * The lowest free valence is the number of potential additional single bonded * neighbours to reach the atom's lowest valence above or equal its current * occupied valence. Atom charges are considered. Implicit hydrogens are not considered. * Thus, the phosphor atoms in PF2 and PF4 both have a lowest free valence of 1. * The oxygen in R-O(-) has a lowest free valence of 0, the nitrogen in R3N(+) * has a free valence of 1. If you need the maximum possible free valence, * use getFreeValence(), which would give 6 for Cl(-) and HCl.
* Of course, the lowest free valce depends on the atomic number. If this molecule * is a fragment and if an atom list is associated with this atom, then the lowest free * valence is calculated for all atomic numbers in the list and the highest of them is returned. * @param atom * @return */ public int getLowestFreeValence(int atom) { if (!mIsFragment || mAtomList == null || mAtomList[atom] == null) return getLowestFreeValence(atom, mAtomicNo[atom]); int valence = 0; for (int atomicNo:mAtomList[atom]) valence = Math.max(valence, getLowestFreeValence(atom, atomicNo)); return valence; } protected int getLowestFreeValence(int atom, int atomicNo) { int occupiedValence = getOccupiedValence(atom); int correction = getElectronValenceCorrection(atom, occupiedValence, atomicNo); int valence = getAtomAbnormalValence(atom); if (valence == -1) { byte[] valenceList = getAllowedValences(mAtomicNo[atom]); int i= 0; while ((occupiedValence > valenceList[i] + correction) && (i * standard P valence is 3, used valence is 4, implicit abnormal valence is 5. * The molecule is interpreted as O=PH2-OMe. Requires cHelperNeighbours! * @param atom * @param neglectExplicitHydrogen * @return abnormal valence or -1 if valence doesn't exceed standard valence */ public int getImplicitHigherValence(int atom, boolean neglectExplicitHydrogen) { int occupiedValence = getOccupiedValence(atom); occupiedValence -= getElectronValenceCorrection(atom, occupiedValence); if (neglectExplicitHydrogen) occupiedValence -= mAllConnAtoms[atom] - mConnAtoms[atom]; byte[] valences = getAllowedValences(mAtomicNo[atom]); if (occupiedValence <= valences[0]) return -1; // stepwise try higher allowed valences for (int i=1; i= occupiedValence) return valences[i]; // if we have a compatible higher allowed valence, then use that, otherwise the occupied valence return occupiedValence; } /** * Calculates for every non-H atom the mean value of all shortest routes (bonds in between) * to any other atom of the same fragment. * @return */ public float[] getAverageTopologicalAtomDistance() { ensureHelperArrays(cHelperNeighbours); float[] meanDistance = new float[mAtoms]; int[] graphAtom = new int[mAtoms]; for (int startAtom=0; startAtom 0) { pathAtom[index-1] = parentAtom[pathAtom[index]]; index--; } return graphLevel[parent]; } if (graphLevel[candidate] == 0 && (neglectAtom == null || neglectAtom.length <= candidate || !neglectAtom[candidate])) { graphAtom[++highest] = candidate; graphLevel[candidate] = graphLevel[parent]+1; parentAtom[candidate] = parent; } } } current++; } return -1; } /** * Finds bonds of a path that is defined by an atom sequence. * @param pathAtom pathAtom[0]...[pathLength] -> list of atoms on path * @param pathBond int array not smaller than pathLength * @param pathLength no of path bonds == no of path atoms - 1 */ public void getPathBonds(int[] pathAtom, int[] pathBond, int pathLength) { ensureHelperArrays(cHelperNeighbours); for (int i=0; i= 171; // amino acids } /** * Calculates and return the number of implicit hydrogens at atom. * If atom is itself a hydrogen atom, a metal except Al, or a noble gas, * then 0 is returned. For all other atom kinds the number of * implicit hydrogens is basically the lowest typical valence that is compatible * with the occupied valence, minus the occupied valence corrected by atom charge * and radical state. * @param atom * @return */ public int getImplicitHydrogens(int atom) { if (mIsFragment && (mAtomQueryFeatures[atom] & cAtomQFNoMoreNeighbours) == 0) return 0; // H, metals except Al, noble gases don't have implicit hydrogens if (!supportsImplicitHydrogen(atom)) return 0; ensureHelperArrays(cHelperNeighbours); int occupiedValence = 0; for (int i=0; i> 1; } occupiedValence -= getElectronValenceCorrection(atom, occupiedValence); int maxValence = getAtomAbnormalValence(atom); if (maxValence == -1) { byte[] valenceList = Molecule.getAllowedValences(mAtomicNo[atom]); maxValence = valenceList[0]; for (int i=1; (maxValence= 171 && mAtomicNo[atom] <= 190) { int connAtoms = mAllConnAtoms[atom]; if (connAtoms > 2) molweight -= (connAtoms - 2) * cRoundedMass[1]; } } return molweight; } /** * Simple method to calculate rotatable bonds. This method counts all single * bonds provided that they
* - are not a terminal bond
* - are not part of a ring
* - are not an amide bond
* - are not the second of two equivalent bonds next to the same triple bond
* @return */ public int getRotatableBondCount() { int rCount = 0; ensureHelperArrays(Molecule.cHelperRings); for (int bond=0; bond 1 && !isAromaticAtom(connAtom) && isElectronegative(connAtom)) { isRotatable = false; break; // amid bond } } } } if (isRotatable && !isPseudoRotatableBond(bond)) rCount++; } } return rCount; } /** * In a consecutive sequence of sp-hybridized atoms multiple single bonds * cause redundant torsions. Only that single bond with the smallest bond index * is considered really rotatable; all other single bonds are pseudo rotatable. * If one/both end(s) of the sp-atom sequence doesn't carry atoms * outside of the straight line then no bond is considered rotatable. * A simple terminal single bond * @param bond * @return true, if this bond is not considered rotatable because of a redundancy */ public boolean isPseudoRotatableBond(int bond) { if (getBondOrder(bond) != 1) return false; for (int i=0; i<2; i++) { int atom = mBondAtom[i][bond]; int rearAtom = mBondAtom[1-i][bond]; while (mPi[atom] == 2 && mConnAtoms[atom] == 2 && mAtomicNo[atom] < 10) { for (int j=0; j<2; j++) { int connAtom = mConnAtom[atom][j]; if (connAtom != rearAtom) { if (mConnAtoms[connAtom] == 1) return true; int connBond = mConnBond[atom][j]; if (getBondOrder(connBond) == 1 && connBond < bond) return true; rearAtom = atom; atom = connAtom; break; } } } if (mConnAtoms[atom] == 1) return true; } return false; } public int getAromaticRingCount() { ensureHelperArrays(cHelperRings); int count = 0; for (int i=0; i * This method tells the molecule that current atom/bond parities are valid, even if the * stereo perception not has been performed. In addition to the stereo parities one may * declare CIP parities and/or symmetry ranks also to be valid (helperStereoBits != 0). * setParitiesValid(0) should be called if no coordinates are available but the parities are valid * nevertheless, e.g. after the IDCodeParser has parsed an idcode without coordinates. * (Note: After idcode parsing unknown stereo centers have parities cAtomParityNone * instead of cAtomParityUnknown. Thus, calling isStereoCenter(atom) returns false!!!) * Declaring parities valid prevents the Canonizer to run the stereo recognition again when * ensureHelperArrays(cHelperParities or higher) is called.
* May also be called after filling valences with explicit hydrogen atoms, which have no * coordinates, to tell the molecule that the earlier created stereo flags are still valid. * @param helperStereoBits 0 or combinations of cHelperBitCIP,cHelperBitSymmetry...,cHelperBitIncludeNitrogenParities */ public void setParitiesValid(int helperStereoBits) { mValidHelperArrays |= (cHelperBitsStereo & (cHelperBitParities | helperStereoBits)); } /** * This converts one single bond per parity into a stereo up/down bond to * correctly reflect the given parity. This works for tetrahedral and * allene atom parities as well as for BINAP type of bond parities. * Should only be called with valid TH and EZ parities and valid coordinates, * e.g. after idcode parsing with coordinates or after coordinate generation. */ public void setStereoBondsFromParity() { assert((mValidHelperArrays & cHelperBitParities) != 0); ensureHelperArrays(cHelperRings); // in case we miss ring and neighbour information // Convert all stereo bonds into single bonds for (int bond=0; bond= 15) { for (int i=0; i 4) { setAtomParity(atom, cAtomParityNone, false); return; } int allConnAtoms = mAllConnAtoms[atom]; // We may have a rare case without any single bond (e.g. O=S(=NH)=NMe) with parities assigned from 3D-coords boolean singleBondFound = false; for (int i=0; i sortedConn[i-1]) && lowestConnAtom > mConnAtom[atom][j]) { lowestConnAtom = mConnAtom[atom][j]; lowestConnIndex = j; } } sortedConn[i] = lowestConnAtom; if (mConnBond[atom][lowestConnIndex] == preferredBond) preferredBondIndex = i; } */ for (int i=1; i angle[2]) && (angle[1] - angle[2] > Math.PI))); break; case 1: inverted = (angle[2] - angle[0] > Math.PI); break; case 2: inverted = (angle[1] - angle[0] < Math.PI); break; } bondType = ((getAtomParity(atom) == cAtomParity1) ^ inverted) ? cBondTypeUp : cBondTypeDown; /* // original handling where parity depended solely on the clockwise/anti-clockwise of three angles bondType = ((getAtomParity(atom) == cAtomParity1) ^ (angle[1] > angle[2])) ? cBondTypeDown : cBondTypeUp; */ } else { int order = 0; if (angle[1] <= angle[2] && angle[2] <= angle[3]) order = 0; else if (angle[1] <= angle[3] && angle[3] <= angle[2]) order = 1; else if (angle[2] <= angle[1] && angle[1] <= angle[3]) order = 2; else if (angle[2] <= angle[3] && angle[3] <= angle[1]) order = 3; else if (angle[3] <= angle[1] && angle[1] <= angle[2]) order = 4; else if (angle[3] <= angle[2] && angle[2] <= angle[1]) order = 5; bondType = ((getAtomParity(atom) == cAtomParity1) ^ (up_down[order][preferredBondIndex] == 1)) ? cBondTypeDown : cBondTypeUp; } mBondType[preferredBond] = bondType; } private boolean setFisherProjectionStereoBondsFromParity(int atom, int[] sortedConnMap, double[] angle) { int allConnAtoms = mAllConnAtoms[atom]; int[] direction = new int[allConnAtoms]; int parity = getFisherProjectionParity(atom, sortedConnMap, angle, direction); if (parity == cAtomParityUnknown) return false; int bondType = (getAtomParity(atom) == parity) ? cBondTypeUp : cBondTypeDown; for (int i=0; i FISCHER_PROJECTION_RING_LIMIT) return cAtomParityUnknown; int allConnAtoms = mAllConnAtoms[atom]; if (direction == null) direction = new int[allConnAtoms]; if (!getFisherProjectionBondDirections(atom, sortedConnMap, angle, direction)) return cAtomParityUnknown; int horizontalBondType = -1; for (int i=0; i 4) return false; boolean[] isUsed = new boolean[4]; for (int i=0; i FISCHER_PROJECTION_LIMIT) return false; direction[i] = 3 & (int)(a / (Math.PI/2)); if (isUsed[direction[i]]) return false; isUsed[direction[i]] = true; if ((direction[i] & 1) == 0) { // vertical bond if (mBondType[mConnBond[atom][sortedConnMap[i]]] != cBondTypeSingle) return false; } else { if (!isStereoBond(mConnBond[atom][sortedConnMap[i]], atom)) return false; } } return isUsed[0] && isUsed[2]; } private void setAlleneStereoBondFromParity(int atom) { // find preferred bond to serve as stereobond if (mConnAtoms[atom] != 2 || mConnBondOrder[atom][0] != 2 || mConnBondOrder[atom][1] != 2 || mConnAtoms[mConnAtom[atom][0]] < 2 || mConnAtoms[mConnAtom[atom][1]] < 2 || mPi[mConnAtom[atom][0]] != 1 || mPi[mConnAtom[atom][1]] != 1) { setAtomParity(atom, cAtomParityNone, false); return; } int preferredBond = -1; int preferredAtom = -1; int preferredAlleneAtom = -1; int oppositeAlleneAtom = -1; int bestScore = 0; for (int i=0; i<2; i++) { int alleneAtom = mConnAtom[atom][i]; for (int j=0; j connAtom)) highPriorityAtom = connAtom; } int[] oppositeAtom = new int[2]; int oppositeAtoms = 0; for (int i = 0; i< mConnAtoms[oppositeAlleneAtom]; i++) { int connAtom = mConnAtom[oppositeAlleneAtom][i]; if (connAtom != atom) oppositeAtom[oppositeAtoms++] = connAtom; } double alleneAngle = getBondAngle(atom, oppositeAlleneAtom); double angleDif = 0.0; if (oppositeAtoms == 2) { if (oppositeAtom[0] > oppositeAtom[1]) { int temp = oppositeAtom[0]; oppositeAtom[0] = oppositeAtom[1]; oppositeAtom[1] = temp; } double hpAngleDif = getAngleDif(alleneAngle, getBondAngle(oppositeAlleneAtom, oppositeAtom[0])); double lpAngleDif = getAngleDif(alleneAngle, getBondAngle(oppositeAlleneAtom, oppositeAtom[1])); angleDif = hpAngleDif - lpAngleDif; } else { angleDif = getAngleDif(alleneAngle, getBondAngle(oppositeAlleneAtom, oppositeAtom[0])); } if ((angleDif < 0.0) ^ (getAtomParity(atom) == cAtomParity1) ^ (highPriorityAtom == preferredAtom)) mBondType[preferredBond] = cBondTypeUp; else mBondType[preferredBond] = cBondTypeDown; } /** * In case bond is a BINAP kind of chiral bond with defined parity, * then the preferred neighbour single bond is converted into a * stereo bond to correctly reflect its defined parity. * This method assumes that none of the potential stereo bonds to indicate * the axial configuration is a stereo bond already. * @param bond */ public void setStereoBondFromBondParity(int bond) { // set an optimal bond to up/down to reflect the atom parity if (getBondParity(bond) == Molecule.cBondParityNone || getBondParity(bond) == Molecule.cBondParityUnknown || !isBINAPChiralityBond(bond)) return; int preferredBond = -1; int preferredAtom = -1; int preferredBINAPAtom = -1; int oppositeBINAPAtom = -1; int bestScore = 0; for (int i=0; i<2; i++) { int atom = mBondAtom[i][bond]; for (int j=0; j connAtom)) highPriorityAtom = connAtom; } int[] oppositeAtom = new int[2]; int oppositeAtoms = 0; for (int i = 0; i< mConnAtoms[oppositeBINAPAtom]; i++) if (mConnBond[oppositeBINAPAtom][i] != bond) oppositeAtom[oppositeAtoms++] = mConnAtom[oppositeBINAPAtom][i]; double binapAngle = getBondAngle(preferredBINAPAtom, oppositeBINAPAtom); double angleDif = 0.0; if (oppositeAtoms == 2) { if (oppositeAtom[0] > oppositeAtom[1]) { int temp = oppositeAtom[0]; oppositeAtom[0] = oppositeAtom[1]; oppositeAtom[1] = temp; } double hpAngleDif = getAngleDif(binapAngle, getBondAngle(oppositeBINAPAtom, oppositeAtom[0])); double lpAngleDif = getAngleDif(binapAngle, getBondAngle(oppositeBINAPAtom, oppositeAtom[1])); angleDif = hpAngleDif - lpAngleDif; } else { angleDif = getAngleDif(binapAngle, getBondAngle(oppositeBINAPAtom, oppositeAtom[0])); } if ((angleDif < 0.0) ^ (getBondParity(bond) == cBondParityZor2) ^ (highPriorityAtom == preferredAtom)) mBondType[preferredBond] = cBondTypeUp; else mBondType[preferredBond] = cBondTypeDown; } protected boolean bondsAreParallel(double angle1, double angle2) { double angleDif = Math.abs(getAngleDif(angle1, angle2)); return (angleDif < 0.08 || angleDif > Math.PI - 0.08); } /** * * @param bond * @return */ public int getPreferredDoubleBondSide(int bond) { int[] value = new int[cMaxConnAtoms]; double[] angle = new double[cMaxConnAtoms]; double[] bondAngle = new double[2]; int angles = 0; for (int i=0; i<2; i++) { int atom = mBondAtom[i][bond]; for (int j=0; j bondAngle[0]) && (angle[i] < bondAngle[1])) side -= value[i]; else side += value[i]; } return (changed) ? -side : side; } private int preferredTHStereoBond(int atom, boolean excludeStereoBonds) { // If we have two (anti-)parallel bonds, then we need to select // that one of those that is closest to the other bonds. int allConnAtoms = mAllConnAtoms[atom]; double[] angle = new double[allConnAtoms]; for (int i=0; i dif) closestRightDif = dif; } } isPreferred[i] = (closestRightDif - closestLeftDif < Math.PI); } }*/ int preferredBond = -1; int bestScore = 0; for (int i=0; i= 6) for (int i = 0; i< mConnAtoms[atom]; i++) if (isBINAPChiralityBond(mConnBond[atom][i])) return mConnAtom[atom][i]; return -1; } /** * Checks whether atom is one of the two atoms of an axial chirality bond of BINAP type. * Condition: non-aromatic single bond connecting two aromatic rings with 6 or more members * that together bear at least three ortho substituents. A stereo bond indicating the * chirality is not(!!!) a condition. * @param atom to check, whether it is part of a bond, which has BINAP type of axial chirality * @return axial chirality bond or -1 if axial chirality conditions are not met */ public int findBINAPChiralityBond(int atom) { if (mConnAtoms[atom] == 3 && isAromaticAtom(atom) && getAtomRingSize(atom) >= 5) for (int i = 0; i< mConnAtoms[atom]; i++) if (isBINAPChiralityBond(mConnBond[atom][i])) return mConnBond[atom][i]; return -1; } /** * Evaluates, whether bond is an amide bond, thio-amide, or amidine bond. * @param bond * @return */ public boolean isAmideTypeBond(int bond) { ensureHelperArrays(cHelperNeighbours); for (int i=0; i<2; i++) { int atom1 = mBondAtom[i][bond]; if (mAtomicNo[atom1] == 7) { int atom2 = mBondAtom[1-i][bond]; for (int j = 0; j< mConnAtoms[atom2]; j++) { int connAtom = mConnAtom[atom2][j]; int connBond = mConnBond[atom2][j]; if ((mAtomicNo[connAtom] == 7 || mAtomicNo[connAtom] == 8 || mAtomicNo[connAtom] == 16) && getBondOrder(connBond) >= 2) return true; } } } return false; } /** * @param atom * @return whether this atom is the central atom of an allene */ public boolean isCentralAlleneAtom(int atom) { return mPi[atom] == 2 && mConnAtoms[atom] == 2 && mConnBondOrder[atom][0] == 2 && mConnBondOrder[atom][1] == 2 && mAtomicNo[atom] <= 7; } /** * Checks whether this nitrogen atom is flat, because it has a double bond, * is member of an aromatic ring or is part of amide, an enamine or * in resonance with an aromatic ring. It is also checked that ortho * substituents don't force the amine into a non-resonance torsion. * State of helper arrays must be at least cHelperRings. * @param atom * @return */ public boolean isFlatNitrogen(int atom) { return isFlatNitrogen(atom, true); } private boolean isFlatNitrogen(int atom, boolean checkForPyramidalBridgeHead) { if (mAtomicNo[atom] != 7 || mConnAtoms[atom] == 4) return false; if (isAromaticAtom(atom) || mPi[atom] != 0 || (mAtomQueryFeatures[atom] & cAtomQFFlatNitrogen) != 0) return true; if (mAtomCharge[atom] == 1) return false; for (int i=0; i= 5) { int orthoSubstituentCount = 0; for (int j=0; j= 3) orthoSubstituentCount++; } int nitrogenNeighbourCount = getNonHydrogenNeighbourCount(atom); if ((orthoSubstituentCount == 2 && nitrogenNeighbourCount >= 2) || (orthoSubstituentCount == 1 && nitrogenNeighbourCount == 3)) continue; // the nitrogen is rotated out of PI-plane } return !checkForPyramidalBridgeHead || !isPyramidalBridgeHead(atom); } // vinyloge amides, etc. for (int j = 0; j< mConnAtoms[connAtom]; j++) { if ((mConnBondOrder[connAtom][j] == 2 || isAromaticBond(mConnBond[connAtom][j]))) return !checkForPyramidalBridgeHead || !isPyramidalBridgeHead(atom); } } } } if (heteroCount < 2) { for (int i = 0; i< mConnAtoms[atom]; i++) { int connAtom = mConnAtom[atom][i]; boolean isStabilized = false; boolean hasCompetitor = false; for (int j = 0; j< mConnAtoms[connAtom]; j++) { if (mConnAtom[connAtom][j] != atom) { if (mConnBondOrder[connAtom][j] != 1 && (mAtomicNo[mConnAtom[connAtom][j]] == 7 || mAtomicNo[mConnAtom[connAtom][j]] == 8 || mAtomicNo[mConnAtom[connAtom][j]] == 16)) isStabilized = true; if (mConnBondOrder[connAtom][j] == 1 && mAtomicNo[mConnAtom[connAtom][j]] == 7) hasCompetitor = true; } } if (isStabilized && (!hasCompetitor || heteroCount == 0)) return !checkForPyramidalBridgeHead || !isPyramidalBridgeHead(atom); } } return false; } /** * Use heuristics to determine whether atom has at least three rings bonds and whether that ring system * forces the atom into a pyramidal geometry due to the length of ring bridges.
* For this, after checking for at least 3 ring bonds, we find the smallest ring that atom is member of. * If this ring is not larger than 7 members, then we check for all neighbours (1 or 2) * that do not belong to the smallest ring:
* - We find the bridge size (atom count) to where it touches the smallest ring.
* - We also find the path length from the touch point on the smallest ring back to atom.
* - Using heuristics we decide with this information, whether the ring system prevents a flat geometry of atom. * e.g.: Catalytic Asymmetric Synthesis of Tröger’s Base Analogues with Nitrogen Stereocenter * Chun Ma, Yue Sun, Junfeng Yang, Hao Guo, and Junliang Zhang * ACS Central Science 2023 9 (1), 64-71 * DOI: 10.1021/acscentsci.2c01121 * @param atom * @return true, if the attached ring system prevents a flat geometry of atom */ public boolean isPyramidalBridgeHead(int atom) { if (isAromaticAtom(atom) || mPi[atom] != 0 || (mAtomQueryFeatures[atom] & cAtomQFFlatNitrogen) != 0 || getAtomRingBondCount(atom) < 3) return false; int smallRingSize = getAtomRingSize(atom); if (smallRingSize > 7) return false; int smallRingNo = 0; while (smallRingNo= RingCollection.MAX_SMALL_RING_COUNT && smallRingNo == mRingSet.getSize()) return false; // very rare case, but found with wrongly highly bridged CSD entry JORFAZ for (int i=0; i= 3) { int[] ringAtom = mRingSet.getRingAtoms(ringNo); for (int i=0; i<6; i++) { if (atom == ringAtom[i]) { int potentialOtherBridgeHeadIndex = mRingSet.validateMemberIndex(ringNo, (bridgeHead == ringAtom[mRingSet.validateMemberIndex(ringNo, i+2)]) ? i-2 : i+2); int potentialOtherBridgeHead = ringAtom[potentialOtherBridgeHeadIndex]; if (getAtomRingBondCount(potentialOtherBridgeHead) >= 3 && getPathLength(pathAtom[1], potentialOtherBridgeHead, 2, null) == 2) return true; // adamantane like break; } } } } boolean bridgeHeadIsFlat = (getAtomPi(bridgeHead) == 1 || isAromaticAtom(bridgeHead) || isFlatNitrogen(bridgeHead, false)); boolean bridgeHeadMayInvert = !bridgeHeadIsFlat && getAtomicNo(bridgeHead) == 7 && getAtomCharge(bridgeHead) != 1; if (bondCountToBridgeHead == 1) return !bridgeHeadIsFlat && !bridgeHeadMayInvert && smallestRingSize <= 4 && bridgeAtomCount <= 3; switch (smallestRingSize) { // case 3 is fully handled case 4: // must be bondCountToBridgeHead == 2 if (!bridgeHeadIsFlat && !bridgeHeadMayInvert && bridgeAtomCount <= 4) return true; break; case 5: // must be bondCountToBridgeHead == 2 if (bridgeHeadMayInvert) { if (bridgeAtomCount <= 3) return true; } else if (!bridgeHeadIsFlat) { if (bridgeAtomCount <= 4) return true; } break; case 6: if (bondCountToBridgeHead == 2) { if (bridgeHeadIsFlat) { if (bridgeAtomCount <= 4) return true; } else if (!bridgeHeadMayInvert) { if (bridgeAtomCount <= 3) return true; } } else if (bondCountToBridgeHead == 3) { if (bridgeHeadIsFlat) { if (bridgeAtomCount <= 6) return true; } else { if (bridgeAtomCount <= 4) return true; } } break; case 7: if (bondCountToBridgeHead == 3) { if (bridgeAtomCount <= 3) return true; } break; } return false; } /** * Checks whether bond is an axial chirality bond of the BINAP type. * Condition: non-aromatic, non-small-ring (<= 7 members) single bond * connecting two aromatic rings with 6 or more members each * that together bear at least three ortho substituents. A stereo bond indicating the * chirality is not(!!!) a condition. * @param bond * @return true if axial chirality conditions are met */ public boolean isBINAPChiralityBond(int bond) { if (mBondType[bond] != cBondTypeSingle || isAromaticBond(bond) || (isRingBond(bond) && getBondRingSize(bond) < 7)) return false; int atom1 = mBondAtom[0][bond]; if (!isAromaticAtom(atom1) || getAtomRingSize(atom1) < 5) return false; int atom2 = mBondAtom[1][bond]; if (!isAromaticAtom(atom2) || getAtomRingSize(atom2) < 5) return false; int orthoSubstituentCount1 = getOrthoSubstituentCount(atom1, atom2); int orthoSubstituentCount2 = getOrthoSubstituentCount(atom2, atom1); // with aromatic 6-membered (or larger) rings only, we have a simple logic if (getAtomRingSize(atom1) > 5 && getAtomRingSize(atom2) > 5) return orthoSubstituentCount1 + orthoSubstituentCount2 > 2; int secondOrderOrthoSubstituentCount1 = getSecondOrderOrthoSubstituentCount(atom1, atom2); int secondOrderOrthoSubstituentCount2 = getSecondOrderOrthoSubstituentCount(atom2, atom1); // for 5-membered rings we need stronger constraints (currently we don't distignuid 5-5 and 5-6) if (orthoSubstituentCount1 == 2 && secondOrderOrthoSubstituentCount2 >= 1) return true; if (orthoSubstituentCount2 == 2 && secondOrderOrthoSubstituentCount1 >= 1) return true; if (secondOrderOrthoSubstituentCount1 == 2 && (orthoSubstituentCount2 >= 1 || secondOrderOrthoSubstituentCount2 >= 1)) return true; if (secondOrderOrthoSubstituentCount2 == 2 && (orthoSubstituentCount1 >= 1 || secondOrderOrthoSubstituentCount1 >= 1)) return true; return false; } private int getOrthoSubstituentCount(int atom, int otherBondAtom) { int count = 0; for (int i=0; i 2) count++; } return count; } private int getSecondOrderOrthoSubstituentCount(int atom, int otherBondAtom) { int count = 0; for (int i=0; i 2) innerCount++; } // if we have two next neighbours with 3 neighbours each, then one of them must be a second order ortho if (innerCount == 2) count++; } } return count; } protected boolean validateBondType(int bond, int type) { boolean ok = super.validateBondType(bond, type); if (ok && type == cBondTypeCross) { ensureHelperArrays(Molecule.cHelperRings); ok &= !isSmallRingBond(bond); } return ok; } public void validate() throws Exception { double avbl = getAverageBondLength(); double minDistanceSquare = avbl * avbl / 16.0; for (int atom1=1; atom1 getMaxValence(atom)) throw new Exception("atom valence exceeded"); allCharge += mAtomCharge[atom]; } if (allCharge != 0) throw new Exception("unbalanced atom charge"); } /** * Normalizes different forms of functional groups (e.g. nitro) * to a preferred one. This step should precede any canonicalization. * @return true if the molecule was changed */ public boolean normalizeAmbiguousBonds() { ensureHelperArrays(cHelperNeighbours); // Molecules from Marvin Sketch, if generated from SMILES, may contain delocalized bonds // using the Daylight aromaticity model, e.g. with aromatic carbonyl carbons in case of pyridinone. // Before generating idcodes, we need to convert to cumulated double bonds and use our own // aromaticity model to locate delocalized bonds. normalizeExplicitlyDelocalizedBonds(); boolean found = false; for (int atom=0; atom 0 && mAtomCharge[mBondAtom[1][bond]] < 0) { atom1 = mBondAtom[0][bond]; atom2 = mBondAtom[1][bond]; } else if (mAtomCharge[mBondAtom[0][bond]] < 0 && mAtomCharge[mBondAtom[1][bond]] > 0) { atom1 = mBondAtom[1][bond]; atom2 = mBondAtom[0][bond]; } else continue; if (isMetalAtom(atom1) || isMetalAtom(atom2)) continue; if ((mAtomicNo[atom1] < 9 && getOccupiedValence(atom1) > 3) || (mAtomicNo[atom2] < 9 && getOccupiedValence(atom2) > 3)) continue; boolean hasImplicitHydrogen = (getImplicitHydrogens(atom1) != 0); mAtomCharge[atom1] -= 1; mAtomCharge[atom2] += 1; // If the formerly positive atom had an implicit hydrogen, then removing // both charges mean shifting the implicit hydrogen to atom2. // If it had no implicit hydrogen, but an open valence, then we shift // the electron pair from the negative atom into the bond and increase the // bond order. if (!hasImplicitHydrogen) { int oldBondType = mBondType[bond]; if (bondOrder == 1) mBondType[bond] = cBondTypeDouble; else mBondType[bond] = cBondTypeTriple; if (oldBondType == cBondTypeDown || oldBondType == cBondTypeUp) { // retain stereo information int stereoCenter = mBondAtom[0][bond]; int newStereoBond = preferredTHStereoBond(stereoCenter, false); if (mBondAtom[0][newStereoBond] != stereoCenter) { mBondAtom[1][newStereoBond] = mBondAtom[0][newStereoBond]; mBondAtom[1][newStereoBond] = stereoCenter; } } } mValidHelperArrays = cHelperNone; } } int overallCharge = 0; int negativeAtomCount = 0; int negativeAdjustableCharge = 0; for (int atom=0; atom 0) { if (!hasNegativeNeighbour(atom) && isElectronegative(atom)) { int chargeReduction = Math.min(getImplicitHydrogens(atom), mAtomCharge[atom]); if (chargeReduction != 0 && positiveChargeForRemoval >= chargeReduction) { overallCharge -= chargeReduction; overallChargeChange -= chargeReduction; positiveChargeForRemoval -= chargeReduction; mAtomCharge[atom] -= chargeReduction; mValidHelperArrays &= cHelperNeighbours; } } } } int negativeChargeForRemoval = doNeutralize ? overallCharge : overallChargeChange; if (negativeChargeForRemoval < 0) { int[] negativeAtom = new int[negativeAtomCount]; negativeAtomCount = 0; for (int atom=0; atom=negativeAtom.length-negativeAtomCount); i--) { int atom = negativeAtom[i] & 0x003FFFFF; if (isElectronegative(atom)) { int chargeReduction = Math.min(-negativeChargeForRemoval, -mAtomCharge[atom]); overallCharge += chargeReduction; negativeChargeForRemoval += chargeReduction; mAtomCharge[atom] += chargeReduction; mValidHelperArrays &= cHelperNeighbours; } } } return overallCharge; } /** * @param atom * @return true if atom has a neighbour with a negative charge */ private boolean hasNegativeNeighbour(int atom) { for (int i = 0; i< mConnAtoms[atom]; i++) if (mAtomCharge[mConnAtom[atom][i]] < 0) return true; return false; } /** * @param atom * @return true if atom has a neighbour with a positive charge */ private boolean hasPositiveNeighbour(int atom) { for (int i = 0; i< mConnAtoms[atom]; i++) if (mAtomCharge[mConnAtom[atom][i]] > 0) return true; return false; } /** * Provided that the bond parity of a double bond is available, * this method determines, whether connAtom has a counterpart with * Z- (cis) configuration at the other end of the double bond. * If there is no Z-counterpart, then -1 is returned. * Requires cHelperParities. * @param connAtom directly connected to one of the double bond atoms * @param bond double bond with available bond parity * @return -1 or counterpart to connAtom in Z-configuration */ public int getZNeighbour(int connAtom, int bond) { if (getBondOrder(bond) != 2 && !isAromaticBond(bond)) return -1; int parity = getBondParity(bond); if (parity != cBondParityEor1 && parity != cBondCIPParityZorM) return -1; for (int i=0; i<2; i++) { int atom1 = mBondAtom[i][bond]; int atom2 = mBondAtom[1-i][bond]; int other1 = -1; boolean found = false; for (int j = 0; j< mConnAtoms[atom1]; j++) { int conn = mConnAtom[atom1][j]; if (conn != atom2) { if (conn == connAtom) found = true; else other1 = conn; } } if (found) { int lowConn = -1; int highConn = -1; for (int j = 0; j< mConnAtoms[atom2]; j++) { int conn = mConnAtom[atom2][j]; if (conn != atom1) { if (lowConn == -1) lowConn = conn; else if (conn > lowConn) highConn = conn; else { highConn = lowConn; lowConn = conn; } } } if (mConnAtoms[atom1] == 2) { if (mConnAtoms[atom2] == 2) return parity == cBondCIPParityZorM ? lowConn : -1; return (parity == cBondCIPParityZorM) ? lowConn : highConn; } else { if (mConnAtoms[atom2] == 2) return (parity == cBondCIPParityZorM) ^ (connAtom < other1) ? -1 : lowConn; return (parity == cBondCIPParityZorM) ^ (connAtom < other1) ? highConn : lowConn; } } } return -1; } public int getHelperArrayStatus() { return mValidHelperArrays; } /** * While the Molecule class covers all primary molecule information, its derived class * ExtendedMolecule handles secondary, i.e. calculated molecule information, which is cached * in helper arrays and stays valid as long as the molecule's primary information is not changed. * Most methods of ExtendedMolecule require some of the helper array's information. High level * methods, e.g. getPath(), take care of updating an outdated cache themselves. Low level methods, * e.g. isAromaticAtom(), which typically are called very often, do not check for validity * nor update the helper arrays themselves. If you use low level methods, then you need to make * sure that the needed helper array information is valid by this method.
* For performance reasons there are distinct levels of helper information. (A higher * level always includes all properties of the previous level):
* cHelperNeighbours: explicit hydrogen atoms are moved to the end of the atom table and * bonds leading to them are moved to the end of the bond table. This way algorithms can skip * hydrogen atoms easily. For every atom directly connected atoms and bonds (with and without * hydrogens) are determined. The number of pi electrons is counted.
* cHelperRings: Aromatic and non-aromatic rings are detected. Atom and bond ring * properties are set and a ring collection provides a total set of small rings (7 or less atoms). * Atoms being in allylic/benzylic or stabilized (neighbor of a carbonyl or similar group) position * are flagged as such.
* cHelperParities: Atom (tetrahedral or axial) and bond (E/Z or atrop) parities are calculated * from the stereo configurations.
* cHelperCIP: Cahn-Ingold-Prelog stereo information for atoms and bonds.
*
cHelperParities and cHelperCIP require a StereoMolecule!!!
* @param required one of cHelperNeighbours,cHelperRings,cHelperParities,cHelperCIP * @return true if the molecule was changed */ public void ensureHelperArrays(int required) { // cHelperNeighbours: mConnAtoms,mConnBonds,mPi for all atoms // cHelperRings: rings,aromaticity/allylic/stabilized for non-H-atoms only // cHelperParities: stereo parities for non-H-atoms/bonds only // cHelperCIP: mCanonizer, stereo parities for non-H-atoms/bonds only if ((required & ~mValidHelperArrays) == 0) return; if ((mValidHelperArrays & cHelperBitNeighbours) == 0) { handleHydrogens(); calculateNeighbours(); mValidHelperArrays |= cHelperBitNeighbours; if (convertHydrogenToQueryFeatures()) { handleHydrogens(); calculateNeighbours(); mValidHelperArrays |= cHelperBitNeighbours; } } if ((required & ~mValidHelperArrays) == 0) return; if ((mValidHelperArrays & ~(cHelperBitRingsSimple | cHelperBitRings)) != 0) { for (int atom=0; atom 1) { if (mAtomicNo[mConnAtom[connAtom][j]] == 6) mAtomFlags[atom] |= cAtomFlagAllylic; else { if (!isAromaticBond(mConnBond[connAtom][j]) && isElectronegative(mConnAtom[connAtom][j])) mAtomFlags[atom] |= cAtomFlagStabilized; } } } } } // propagate stabilized flags to vinylic positions while (true) { boolean found = false; for (int atom=0; atom 0 && (mAtomFlags[atom] & cAtomFlagStabilized) != 0 && !mRingSet.isAromaticAtom(atom)) { for (int i = 0; i< mConnAtoms[atom]; i++) { if (mConnBondOrder[atom][i] > 1) { int connAtom = mConnAtom[atom][i]; int connBond = mConnBond[atom][i]; for (int j = 0; j< mConnAtoms[connAtom]; j++) { if (mConnBond[connAtom][j] != connBond) { int candidate = mConnAtom[connAtom][j]; if ((mAtomFlags[candidate] & cAtomFlagStabilized) == 0) { mAtomFlags[candidate] |= cAtomFlagStabilized; found = true; } } } } } } } if (!found) break; } } mValidHelperArrays |= (cHelperBitRingsSimple | cHelperBitRings); } /** * Usually explicit hydrogen atoms can be removed without changing a molecule, * because the removal just converts explicit into implicit hydrogen atoms. * Exceptions are hydrogen with isotop information, hydrogens not connected to a non-H atom, * hydrogens carrying a custom label, hydrogens whose existence implicitly defines a neighbour * atom to have an abnormal valence, hydrogens with a different bonding environment than exactly * one single bond, and hydrogen atoms connected to metal atoms.
* This method moves all simple hydrogen atoms and associated bonds to the end of the atom/bond tables. * It sets mAtoms to exclude simple hydrogen atoms and mBonds to exclude bonds leading to them. * Simple hydrogens are not deleted, though. They are always displayed and the stereo perception * still considers up/down bonds leading to hydrogen atoms. Most other functions, however, can * happily neglect them.
* If non-simple hydrogen atoms (D,T and custom labelled hydrogen) exist, then these are positioned * after the non-hydrogen atoms and just before the simple hydrogen atoms. mAtoms and mBonds include * these atoms.
* mConnAtoms/mConnBonds/mConnBondOrder are neither used nor updated.
* Note: This method changes the order among the non-hydrogen atoms. To translate to the original * order use getHandleHydrogenMap() before calling ensureHelperArrays() if the original atom order is relevant. */ private void handleHydrogens() { // find all hydrogens that are connected to a non-H atom and therefore can be implicit boolean[] isHydrogen = findSimpleHydrogens(); // move all hydrogen atoms to end of atom table int lastNonHAtom = mAllAtoms; do lastNonHAtom--; while ((lastNonHAtom >= 0) && isHydrogen[lastNonHAtom]); for (int atom=0; atom= 0) && isHydrogenBond[lastNonHBond]); for (int bond=0; bond= 0) && isSimpleHydrogen[lastNonHAtom]); for (int i=0; i= 0) && isHydrogenBond[lastNonHBond]); for (int i=0; i * Note: This method returns true for uncharged, natural abundance hydrogens without * custom labels even if they have a non-standard bonding situation (everything being different * from having one single bonded non-simple-hydrogen neighbour, e.g. unbonded hydrogen, H2, * a metal ligand bond to another atom, two single bonds, etc.) * If unusual bonding needs to be considered, check for that independently from this method. * @param atom * @return */ public boolean isSimpleHydrogen(int atom) { return mAtomicNo[atom] == 1 && mAtomMass[atom] == 0 && mAtomCharge[atom] == 0 // && mAtomMapNo[atom] == 0 Since a mapNo is not part of an idcode, a mapped but otherwise simple H must be considered simple; TLS 29Aug2020 && (mAtomFlags[atom] & cAtomFlagsValence) == 0 && (mAtomCustomLabel == null || mAtomCustomLabel[atom] == null); } /** * Removes all plain explicit hydrogens atoms from the molecule, converting them effectively to implicit ones. * Assuming that this molecule has 2D-coordinates, this method perceives stereo configurations from up/down-bonds * to explicit hydrogens before deleting them and turns another bond into a stereo bond to indicate the proper configuration. * If the removal of a hydrogen atom would change an atom's implicit valence, the atom's abnormal valence is set accordingly. */ public void removeExplicitHydrogens() { removeExplicitHydrogens(true, false); } /** * Removes all plain explicit hydrogens atoms from the molecule, converting them effectively to implicit ones. * If the molecules has 2D-coordinates (hasValid2DCoords==true), then this method initially perceives stereo * configurations E/Z/R/S from coordinates and up/down-bonds to explicit hydrogens before deleting them * and turns another bond into a stereo bond to indicate the proper configuration. * If the removal of a hydrogen atom would change an atom's implicit valence, the atom's abnormal valence is set accordingly. * @param hasValid2DCoords pass true, if molecule has 2D-atom-coordinates * @param hasValid3DCoords pass true, if molecule has 3D-atom-coordinates */ public void removeExplicitHydrogens(boolean hasValid2DCoords, boolean hasValid3DCoords) { // calculate EZ and TH stereo parities if 2D-atom coordinates are available ensureHelperArrays(hasValid2DCoords ? cHelperParities : cHelperNeighbours); mAllAtoms = mAtoms; mAllBonds = mBonds; for (int atom=0; atom 1) mPi[atom] += order - 1; else if (mBondType[bnd] == cBondTypeDelocalized) mPi[atom] = 1; } } } for(int bnd=mBonds; bnd 3) mAtomFlags[atom] |= cAtomFlags4RingBonds; } for (int ringNo=0; ringNo 0) { if ((mAtomQueryFeatures[atom] & cAtomQFNoMoreNeighbours) == 0) { // add query feature hydrogen to explicit hydrogens int queryFeatureHydrogens = (mAtomQueryFeatures[atom] & cAtomQFHydrogen) == (cAtomQFNot0Hydrogen | cAtomQFNot1Hydrogen | cAtomQFNot2Hydrogen) ? 3 : (mAtomQueryFeatures[atom] & cAtomQFHydrogen) == (cAtomQFNot0Hydrogen | cAtomQFNot1Hydrogen) ? 2 : (mAtomQueryFeatures[atom] & cAtomQFNot0Hydrogen) == cAtomQFNot0Hydrogen ? 1 : 0; // For atoms with no specific charge definition a potential charge is not considered by getFreeValence(). // Non-carbon atoms with a charge may have an increased valence. We need to consider that. int freeValence = getFreeValence(atom); if (mAtomCharge[atom] == 0 && (mAtomQueryFeatures[atom] & cAtomQFCharge) == 0 && mAtomicNo[atom] != 6) freeValence++; int queryFeatureShift = explicitHydrogens; if (queryFeatureShift > 3 - queryFeatureHydrogens) queryFeatureShift = 3 - queryFeatureHydrogens; if (queryFeatureShift > freeValence + explicitHydrogens - queryFeatureHydrogens) queryFeatureShift = freeValence + explicitHydrogens - queryFeatureHydrogens; if (queryFeatureShift > 0) { long queryFeatures = (queryFeatureHydrogens == 0) ? // purge 'less than' options 0 : (mAtomQueryFeatures[atom] & cAtomQFHydrogen) << queryFeatureShift; queryFeatures |= (queryFeatureShift == 3 ? 7 : explicitHydrogens == 2 ? 3 : 1) << cAtomQFHydrogenShift; mAtomQueryFeatures[atom] &= ~cAtomQFHydrogen; mAtomQueryFeatures[atom] |= (cAtomQFHydrogen & queryFeatures); } } for (int i=mConnAtoms[atom]; i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy