All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.xmlcml.cml.inchi.InChIGenerator Maven / Gradle / Ivy

The newest version!
/**
 *    Copyright 2011 Peter Murray-Rust et. al.
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */

package org.xmlcml.cml.inchi;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import net.sf.jniinchi.INCHI_BOND_TYPE;
import net.sf.jniinchi.INCHI_PARITY;
import net.sf.jniinchi.INCHI_RADICAL;
import net.sf.jniinchi.INCHI_RET;
import net.sf.jniinchi.JniInchiAtom;
import net.sf.jniinchi.JniInchiBond;
import net.sf.jniinchi.JniInchiException;
import net.sf.jniinchi.JniInchiInput;
import net.sf.jniinchi.JniInchiOutput;
import net.sf.jniinchi.JniInchiStereo0D;
import net.sf.jniinchi.JniInchiWrapper;
import nu.xom.Text;

import org.xmlcml.cml.base.CMLElement;
import org.xmlcml.cml.base.CMLElements;
import org.xmlcml.cml.element.CMLAtom;
import org.xmlcml.cml.element.CMLAtomParity;
import org.xmlcml.cml.element.CMLBond;
import org.xmlcml.cml.element.CMLBondStereo;
import org.xmlcml.cml.element.CMLIdentifier;
import org.xmlcml.cml.element.CMLMolecule;
import org.xmlcml.euclid.EuclidConstants;
import org.xmlcml.molutil.ChemicalElement.AS;

/**
 * 

* This class generates the IUPAC International Chemical Identifier (InChI) for * a CMLMolecule. It places calls to a JNI wrapper for the InChI C++ library. * *

* If the molecule has 3D coordinates for all of its atoms then they will be * used, otherwise 2D coordinates will be used if available. * * Bond stereochemistry and atom parities are supported :-) * If 3D coordinates are available then the bond stereochemistry and atom parities * should be ignored by InChI. *

* *

* *

* Not typesafe. *

* *

Example usage

* * // Generate factory - if native code does not load
* InChIGeneratorFactory factory = new InChIGeneratorFactory();
* // Get InChIGenerator
* InChIGenerator gen = factory.getInChIGenerator(molecule);
* // Optionally
* gen.setProcessingOptions(new ProcessingOptions[]{ProcessingOptions.USE_BONDS}); * gen.generate(); * INCHI_RET ret = gen.getReturnStatus();
* if (ret == INCHI_RET.WARNING) {
* // InChI generated, but with warning message
* System.out.println("InChI warning: " + gen.getMessage());
* } else if (ret != INCHI_RET.OKAY) {
* // InChI generation failed
* throw new RuntimeException("InChI failed: " + ret.toString()
* + " [" + gen.getMessage() + S_RSQUARE);
* }
*
* String inchi = gen.getInchi();
* String auxinfo = gen.getAuxInfo();
* *

* * @author Sam Adams * @author Jim Downing * @author Daniel Lowe (stereochemistry) * * @since 5.3 */ public class InChIGenerator implements EuclidConstants, InChIGeneratorInterface { protected JniInchiInput input; protected JniInchiOutput output; /** * Convention to use when constructing CMLIdentifier to hold InChI. */ protected static final String CML_INCHI_CONVENTION = "iupac:inchi"; // private static final Log LOG = LogFactory.getLog(InChIGenerator.class); private static final ProcessingOptions[] DEFAULT_PROCESSING_OPTIONS = new ProcessingOptions[] { ProcessingOptions.USE_BONDS }; /** * Molecule instance refers to. */ protected CMLMolecule molecule; private Problems preInChiProblem = null; private ProcessingOptions[] processingOptions = DEFAULT_PROCESSING_OPTIONS; private boolean generated; /** *

* Constructor. Generates InChI from CMLMolecule. * *

* Reads atoms, bonds etc from molecule and converts to format InChI library * requires, then calls the library. * * @param molecule * Molecule to generate InChI for. * @throws RuntimeException */ protected InChIGenerator(CMLMolecule molecule) { this.molecule = molecule; try { input = new JniInchiInput(""); } catch (JniInchiException e) { throw new RuntimeException(e); } } /** *

* Constructor. Generates InChI from CMLMolecule. * *

* Reads atoms, bonds etc from molecule and converts to format InChI library * requires, then calls the library. * * @param molecule * Molecule to generate InChI for. * @param options * Space delimited string of options to pass to InChI library. * Each option may optionally be preceded by a command line * switch (/ or -). * @throws RuntimeException */ protected InChIGenerator(CMLMolecule molecule, String options) { try { this.molecule = molecule; input = new JniInchiInput(options); } catch (JniInchiException jie) { throw new RuntimeException(jie); } } /** *

* Constructor. Generates InChI from CMLMolecule. * *

* Reads atoms, bonds etc from molecule and converts to format InChI library * requires, then calls the library. * * @param molecule * Molecule to generate InChI for. * @param options * List of INCHI_OPTION. * @throws RuntimeException */ protected InChIGenerator(CMLMolecule molecule, List options) { try { this.molecule = molecule; input = new JniInchiInput(options); } catch (JniInchiException jie) { throw new RuntimeException(jie); } } /** * Does the work of calling InChI. Can be called only once for each * generator. * * @throws RuntimeException * @throws IllegalStateException * if generation has already been done. * @since 5.4 */ public void generate() { if (generated) { throw new IllegalStateException("Generator cannot be reused"); } generateInchiFromCMLMolecule(molecule); generated = true; } /** * Called from the output getter methods for API back compatibility. This * means that errors that formerly throw a checked exception now throw a * Runtime. */ private void lazyGenerate() { if (!generated) { generate(); } } /** *

* Reads atoms, bonds etc from molecule and converts to format InChI library * requires, then makes call to library, generating InChI. * * @param molecule * @throws RuntimeException */ protected void generateInchiFromCMLMolecule(CMLMolecule molecule) { List atoms = molecule.getAtoms(); List bonds = molecule.getBonds(); // Create map of atom neighbours - required to calculate implicit // hydrogen counts Map> atomNeighbours = new HashMap>(); for (int i = 0; i < atoms.size(); i++) { atomNeighbours.put(atoms.get(i), new ArrayList(4)); } for (int i = 0; i < bonds.size(); i++) { CMLBond bond = (CMLBond) bonds.get(i); CMLAtom at0 = bond.getAtom(0); CMLAtom at1 = bond.getAtom(1); atomNeighbours.get(at0).add(at1); atomNeighbours.get(at1).add(at0); } // Check for 3d coordinates boolean all3d = true; boolean all2d = true; for (int i = 0; i < atoms.size(); i++) { CMLAtom atom = atoms.get(i); if (!atom.hasCoordinates(CMLElement.CoordinateType.CARTESIAN)) { all3d = false; } if (!atom.hasCoordinates(CMLElement.CoordinateType.TWOD)) { all2d = false; } } // Process atoms Map atomMap = new HashMap(); for (int i = 0; i < atoms.size(); i++) { CMLAtom atom = atoms.get(i); double x, y, z; if (all3d) { x = atom.getX3(); y = atom.getY3(); z = atom.getZ3(); } else if (all2d) { x = atom.getX2(); y = atom.getY2(); z = 0; } else { x = 0; y = 0; z = 0; } String el = atom.getElementType(); JniInchiAtom iatom = input.addAtom(new JniInchiAtom(x, y, z, el)); atomMap.put(atom, iatom); int charge = atom .getFormalCharge(CMLElement.FormalChargeControl.DEFAULT); if (charge != 0) { iatom.setCharge(charge); } try { int spinMultiplicity = atom.getSpinMultiplicity(); if (spinMultiplicity == 0) { iatom.setRadical(INCHI_RADICAL.NONE); } else if (spinMultiplicity == 1) { iatom.setRadical(INCHI_RADICAL.SINGLET); } else if (spinMultiplicity == 2) { iatom.setRadical(INCHI_RADICAL.DOUBLET); } else if (spinMultiplicity == 3) { iatom.setRadical(INCHI_RADICAL.TRIPLET); } else { throw new RuntimeException( "Failed to generate InChI: Unsupported spin multiplicity: " + spinMultiplicity); } } catch (RuntimeException cre) { // Spin multiplicity not set } try { int isotopeNumber = atom.getIsotopeNumber(); iatom.setIsotopicMass(isotopeNumber); } catch (RuntimeException cre) { // Isotope number not set } // Calculate implicit hydrogens int hcount; if (atom.getHydrogenCountAttribute() == null) { hcount = -1; } else { hcount = atom.getHydrogenCount(); // getHydrogenCount returns total hydrogens, InChI wants implict // so we must remove number of hydrogen ligands List neighbours = atomNeighbours.get(atom); for (int j = 0; j < neighbours.size(); j++) { CMLAtom neigh = neighbours.get(j); if (AS.H.equals(neigh.getElementType())) { hcount--; } } if (hcount < 0) { throw new RuntimeException( "Negative implicit hydrogen count: " + atom); } } iatom.setImplicitH(hcount); } for (CMLAtom atom : atoms) {//add atomParities CMLElements atomParities = atom.getAtomParityElements();//expect none or 1 for (CMLAtomParity atomParity : atomParities) { CMLAtom[] atomRefs4 = atomParity.getAtomRefs4(molecule); if (atomRefs4 != null){ INCHI_PARITY parity =INCHI_PARITY.UNKNOWN; if (atomParity.getIntegerValue() > 0){ parity =INCHI_PARITY.EVEN; } else if (atomParity.getIntegerValue() < 0){ parity =INCHI_PARITY.ODD; } input.addStereo0D(JniInchiStereo0D.createNewTetrahedralStereo0D(atomMap.get(atom), atomMap.get(atomRefs4[0]), atomMap.get(atomRefs4[1]), atomMap.get(atomRefs4[2]), atomMap.get(atomRefs4[3]), parity)); } } } if (optionsContains(ProcessingOptions.USE_BONDS)) { // Process bonds for (int i = 0; i < bonds.size(); i++) { CMLBond bond = (CMLBond) bonds.get(i); JniInchiAtom at0 = atomMap.get(bond.getAtom(0)); JniInchiAtom at1 = atomMap.get(bond.getAtom(1)); INCHI_BOND_TYPE order; String bo = bond.getOrder(); if (CMLBond.isSingle(bo) || bo == null) { order = INCHI_BOND_TYPE.SINGLE; } else if (CMLBond.isDouble(bo)) { order = INCHI_BOND_TYPE.DOUBLE; } else if (CMLBond.isTriple(bo)) { order = INCHI_BOND_TYPE.TRIPLE; } else if (CMLBond.AROMATIC.equals(bo)) { order = INCHI_BOND_TYPE.ALTERN; } else { System.out.println("Unsupported bond order: " + bo); preInChiProblem = Problems.BOND_ORDER; return; } input.addBond(new JniInchiBond(at0, at1, order)); } } for (CMLBond bond : bonds) {//add bondStereos CMLElements bondStereos = bond.getBondStereoElements();//expect none or 1 for (CMLBondStereo bondStereo : bondStereos) { String[] atomRefs4Ids = bondStereo.getAtomRefs4(); if(atomRefs4Ids==null){ continue; } List jniAtoms = new ArrayList(); for (String atomRefId : atomRefs4Ids) { jniAtoms.add(atomMap.get(molecule.getAtomById(atomRefId))); } if (jniAtoms.size()==4){ if (CMLBond.CIS.equals(bondStereo.getXMLContent())){ input.addStereo0D(JniInchiStereo0D.createNewDoublebondStereo0D(jniAtoms.get(0), jniAtoms.get(1), jniAtoms.get(2), jniAtoms.get(3), INCHI_PARITY.ODD)); } else if (CMLBond.TRANS.equals(bondStereo.getXMLContent())){ input.addStereo0D(JniInchiStereo0D.createNewDoublebondStereo0D(jniAtoms.get(0), jniAtoms.get(1), jniAtoms.get(2), jniAtoms.get(3), INCHI_PARITY.EVEN)); } } } } try { output = JniInchiWrapper.getInchi(input); } catch (JniInchiException jie) { throw new RuntimeException("Failed to generate InChI: " + jie.getMessage()); } } private boolean optionsContains(ProcessingOptions option) { return Arrays.asList(processingOptions).contains(option); } /** * Adds CMLIdentifier containing InChI to CMLMolecule. * * @throws RuntimeException */ public void appendToMolecule() { appendToElement(molecule); } /** * Adds CMLIdentifier containing InChI to specified element. * * @param element * @throws RuntimeException */ public void appendToElement(CMLElement element) { if (output.getInchi() == null) { throw new RuntimeException("Failed to generate InChI"); } CMLIdentifier identifier = new CMLIdentifier(); identifier.setConvention(CML_INCHI_CONVENTION); identifier.appendChild(new Text(output.getInchi())); element.appendChild(identifier); } /** * Gets return status from InChI process. OKAY and WARNING indicate InChI * has been generated, in all other cases InChI generation has failed. * * @return INCHI_RET */ public INCHI_RET getReturnStatus() { lazyGenerate(); return (output.getReturnStatus()); } public boolean isOK() { INCHI_RET ret = getReturnStatus(); if (INCHI_RET.OKAY.equals(ret) || INCHI_RET.WARNING.equals(ret)){ return true; } return false; } /** * Gets generated InChI string. * * @return string */ public String getInchi() { lazyGenerate(); return (output.getInchi()); } /** * Gets generated InChI string. * * @return string */ public String getAuxInfo() { lazyGenerate(); return (output.getAuxInfo()); } /** * Gets generated (error/warning) messages. * * @return string */ public String getMessage() { lazyGenerate(); return (output.getMessage()); } /** * Gets generated log. * * @return string */ public String getLog() { lazyGenerate(); return (output.getLog()); } /** * Get the array (for convenience) of processing options used by this * generator. * * @return array * @since 5.4 */ public ProcessingOptions[] getProcessingOptions() { return processingOptions; } /** * Set the processing options for this generator to use. * * @param processingOptions * @since 5.4 */ public void setProcessingOptions(ProcessingOptions[] processingOptions) { this.processingOptions = processingOptions; } /** * Has this generator been used (or is it safe to call generate?). * * @return true if so * @since 5.4 */ public boolean isGenerated() { return generated; } /** * If a problem occurred before we got to InChI it will be here, else this * will return null. * * @return The problem * @since 5.4 */ public Problems getPreInChiProblem() { return preInChiProblem; } }