All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openscience.cdk.aromaticity.Kekulization Maven / Gradle / Ivy

There is a newer version: 2.10
Show newest version
/*
 * Copyright (c) 2014 European Bioinformatics Institute (EMBL-EBI)
 *                    John May 
 *
 * Contact: [email protected]
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or (at
 * your option) any later version. All we ask is that proper credit is given
 * for our work, which includes - but is not limited to - adding the above
 * copyright notice to the beginning of your source code files, and to any
 * copyright notice that you may distribute with programs based on this work.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 * License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 U
 */

package org.openscience.cdk.aromaticity;

import org.openscience.cdk.config.Elements;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.graph.GraphUtil;
import org.openscience.cdk.graph.Matching;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;

import java.util.BitSet;

import static org.openscience.cdk.CDKConstants.ISAROMATIC;
import static org.openscience.cdk.graph.GraphUtil.EdgeToBondMap;
import static org.openscience.cdk.interfaces.IBond.Order.DOUBLE;
import static org.openscience.cdk.interfaces.IBond.Order.SINGLE;
import static org.openscience.cdk.interfaces.IBond.Order.UNSET;

/**
 * Assign a Kekulé representation to the aromatic systems of a compound. Input
 * from some file-formats provides some bonds as aromatic / delocalised bond
 * types. This method localises the electrons and assigns single and double
 * bonds. Different atom and bond orderings may produce distinct but valid
 * Kekulé forms. Only bond orders are adjusted and any aromatic flags will
 * remain untouched.
 * 

* * The procedure requires that all atoms have defined implicit hydrogens counts * and formal charges. If this information is not present it should be assigned * first.

* * For some inputs it may not be possible to assign a Kekulé form. In general * theses cases are rare but usually occur for one of two reasons. * 1) Missing / ambiguous implicit hydrogens, this is fundamental to determining the * Kekulé form and if guessed may be wrong. Some formats (e.g. molfile) can not * include the exact number of implicit hydrogens attached to atom whilst others * may omit it or optionally skip encoding. The typical example is found in the * example for 1H-pyrrole, a correct SMILES encoding should include the hydrogen * on the aromatic nitrogen '[nH]1cccc1' (not: 'n1cccc1'). * 2) The aromaticity perception algorithm has allowed atoms with abnormal * valence. This usually happens when a non-convalent bond has be upgraded * to a sigma bond during format conversion.

* * @author John May * @cdk.keyword kekule * @cdk.keyword kekulize * @cdk.keyword dearomatize * @cdk.keyword aromatic * @cdk.keyword fix bond orders * @cdk.keyword deduce bond orders */ public final class Kekulization { /** * Assign a Kekulé representation to the aromatic systems of a compound. * * @param ac structural representation * @throws CDKException a Kekulé form could not be assigned */ public static void kekulize(final IAtomContainer ac) throws CDKException { // storage of pairs of atoms that have pi-bonded final Matching matching = Matching.withCapacity(ac.getAtomCount()); // exract data structures for efficient access final IAtom[] atoms = AtomContainerManipulator.getAtomArray(ac); final EdgeToBondMap bonds = EdgeToBondMap.withSpaceFor(ac); final int[][] graph = GraphUtil.toAdjList(ac, bonds); // determine which atoms are available to have a pi bond placed final BitSet available = available(graph, atoms, bonds); // attempt to find a perfect matching such that a pi bond is placed // next to each available atom. if not found the solution is ambiguous if (!matching.perfect(graph, available)) throw new CDKException("Cannot assign Kekulé structure without randomly creating radicals."); // propegate bond order information from the matching for (final IBond bond : ac.bonds()) { if (bond.getOrder() == UNSET && bond.isAromatic()) bond.setOrder(SINGLE); } for (int v = available.nextSetBit(0); v >= 0; v = available.nextSetBit(v + 1)) { final int w = matching.other(v); final IBond bond = bonds.get(v, w); // sanity check, something wrong if this happens if (bond.getOrder().numeric() > 1) throw new CDKException( "Cannot assign Kekulé structure, non-sigma bond order has already been assigned?"); bond.setOrder(IBond.Order.DOUBLE); available.clear(w); } } /** * Determine the set of atoms that are available to have a double-bond. * * @param graph adjacent list representation * @param atoms array of atoms * @param bonds map of atom indices to bonds * @return atoms that can require a double-bond */ private static BitSet available(int[][] graph, IAtom[] atoms, EdgeToBondMap bonds) { final BitSet available = new BitSet(); // for all atoms, select those that require a double-bond ATOMS: for (int i = 0; i < atoms.length; i++) { final IAtom atom = atoms[i]; // preconditions if (atom.getAtomicNumber() == null) throw new IllegalArgumentException("atom " + (i + 1) + " had unset atomic number"); if (atom.getFormalCharge() == null) throw new IllegalArgumentException("atom " + (i + 1) + " had unset formal charge"); if (atom.getImplicitHydrogenCount() == null) throw new IllegalArgumentException("atom " + (i + 1) + " had unset implicit hydrogen count"); if (!atom.getFlag(ISAROMATIC)) continue; // count preexisting pi-bonds, a higher bond order causes a skip int nPiBonds = 0; for (final int w : graph[i]) { IBond.Order order = bonds.get(i, w).getOrder(); if (order == DOUBLE) { nPiBonds++; } else if (order.numeric() > 2) { continue ATOMS; } } // check if a pi bond can be assigned final int element = atom.getAtomicNumber(); final int charge = atom.getFormalCharge(); final int valence = graph[i].length + atom.getImplicitHydrogenCount() + nPiBonds; if (available(element, charge, valence)) { available.set(i); } } return available; } /** * Determine if the specified element with the provided charge and valance * requires a pi bond? * * @param element atomic number >= 0 * @param charge formal charge * @param valence bonded electrons * @return a double-bond is required */ private static boolean available(final int element, final int charge, final int valence) { // higher atomic number elements aren't likely to be found but // we have them for rare corner cases (tellurium). // Germanium, Silicon, Tin and Antimony are a bit bonkers... switch (Elements.ofNumber(element)) { case Boron: if (charge == 0 && valence <= 2) return true; if (charge == -1 && valence <= 3) return true; break; case Carbon: case Silicon: case Germanium: case Tin: if (charge == 0 && valence <= 3) return true; break; case Nitrogen: case Phosphorus: case Arsenic: case Antimony: if (charge == 0) return valence <= 2 || valence == 4; if (charge == 1) return valence <= 3; break; case Oxygen: case Sulfur: case Selenium: case Tellurium: // valence of three or five are really only for sulphur but // are applied generally to all of group eight for simplicity if (charge == 0) return valence <= 1 || valence == 3 || valence == 5; if (charge == 1) return valence <= 2 || valence == 4; break; } return false; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy