All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.chem.format.MDL_SDF Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.chem.format;


import java.io.BufferedReader;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.hfg.chem.Atom;
import com.hfg.chem.CovalentBond;
import com.hfg.chem.Element;
import com.hfg.chem.Molecule;
import com.hfg.bio.seq.format.SeqIOException;
import com.hfg.chem.ValenceModel;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;

//------------------------------------------------------------------------------
/**
 Basic implementation of the MDL SDF format.
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class MDL_SDF extends ReadableChemFormatBase { private static final Pattern ATTRIBUTE_HEADER_PATTERN = Pattern.compile(">\\s+<(\\S+)>"); // TODO: Add a way to specify the valence model or whether the MDL file uses the pre or post 2017 valence models private ValenceModel mValenceModel = ValenceModel.MDL_2017; private List mMolLines = new ArrayList<>(50); private Integer mAtomCount; private Integer mBondCount; //########################################################################### // CONSTRUCTORS //########################################################################### //--------------------------------------------------------------------------- public MDL_SDF() { super(null); } //--------------------------------------------------------------------------- public MDL_SDF(MoleculeFactory inMoleculeFactory) { super(inMoleculeFactory); } //########################################################################### // PUBLIC METHODS //########################################################################### //--------------------------------------------------------------------------- @Override public boolean hasJanusDelimiter() { return false; } //--------------------------------------------------------------------------- @Override public boolean isEndOfRecord(String inLine) { return inLine.trim().equals("$$$$"); } //--------------------------------------------------------------------------- @Override public synchronized T readRecord(BufferedReader inReader) throws ChemIOException { if (null == getMoleculeFactory()) { throw new SeqIOException("No BioSequence factory has been specified!"); } T mol; try { mol = getMoleculeFactory().createMoleculeObj(); mMolLines.clear(); mAtomCount = null; mBondCount = null; StringBuilderPlus currentAttributeValue = new StringBuilderPlus().setDelimiter("\n"); String currentAttributeName = null; String line; boolean structureComplete = false; int lineCount = 1; while ((line = inReader.readLine()) != null) { if (! structureComplete) { // The might not be an molfile section Matcher m = ATTRIBUTE_HEADER_PATTERN.matcher(line); if (m.matches()) { structureComplete = true; if (mMolLines.size() > 0) { setStructure(mol, mMolLines); } } else { mMolLines.add(line); if (line.trim().matches("M\\s+END")) { structureComplete = true; setStructure(mol, mMolLines); continue; } } } if (structureComplete) { line = line.trim(); // The molecule record can optionally be followed by attributes if (!StringUtil.isSet(line)) // A blank line is used to separate attributes { if (StringUtil.isSet(currentAttributeName)) { mol.setAttribute(currentAttributeName, (currentAttributeValue.length() > 0 ? currentAttributeValue.toString() : null)); currentAttributeName = null; } } else { Matcher m = ATTRIBUTE_HEADER_PATTERN.matcher(line); if (m.matches()) { if (currentAttributeName != null) { // The blank line between attributes may have been missing mol.setAttribute(currentAttributeName, (currentAttributeValue.length() > 0 ? currentAttributeValue.toString() : null)); } currentAttributeName = m.group(1); currentAttributeValue.setLength(0); } else if (currentAttributeName != null) { currentAttributeValue.delimitedAppend(line); } } } } } catch (Exception e) { if (e instanceof ChemIOException) { throw (ChemIOException) e; } else { throw new ChemIOException(e); } } return mol; } //--------------------------------------------------------------------------- private void setStructure(T inMolecule, List inMolLines) { // inMolecule.setAttribute("molfile", inMolLines); parseMolHeader(inMolecule, inMolLines); // Examine the Counts line // Ex: ' 9 8 0 0 0 0 0 0 0999 V2000' parseCountsLine(inMolecule, inMolLines); parseAtomsBlock(inMolecule, inMolLines); parseBondsBlock(inMolecule, inMolLines); parseProperties(inMolecule, inMolLines); if (inMolecule.getAtoms() != null) { // Set implicit hydrogen counts for (Atom molAtom : new ArrayList<>(inMolecule.getAtoms())) { int implicitHCount = mValenceModel.calculateImplicitHCount(molAtom); if (implicitHCount > 0) { for (int i = 0; i < implicitHCount; i++) { Atom hAtom = new Atom(Element.HYDROGEN); inMolecule.addAtom(hAtom); CovalentBond bond = new CovalentBond(molAtom, hAtom); molAtom.addBond(bond); hAtom.addBond(bond); } } } } } //--------------------------------------------------------------------------- private void parseMolHeader(T inMolecule, List inMolLines) { if (inMolLines.size() >= 3) { // The first 3 line constitute the record header // The first line may contain the name of the molecule String structureStringName = inMolLines.get(0).trim(); if (StringUtil.isSet(structureStringName) && ! structureStringName.equals("NO STRUCTURE")) { inMolecule.setName(structureStringName); } // Line 2 optionally contains the details of the software used to generate the record // Line 3 contains an optional comment } } //--------------------------------------------------------------------------- private void parseCountsLine(T inMolecule, List inMolLines) { // The 4th line may contain the Counts line. // The Counts line is composed of 12 fixed-length fields - the first eleven // are 3 characters long, and the last 6 characters long. // The first two fields are the number of atoms and bonds respectively. // Ex: ' 9 8 0 0 0 0 0 0 0999 V2000' if (inMolLines.size() >= 4) { String countsLine = inMolLines.get(3); if (StringUtil.isSet(countsLine)) { if (countsLine.length() != 39) { throw new ChemIOException("Unexpected Counts line length for " + StringUtil.singleQuote(countsLine) + "!"); } // The first field is the number of atoms mAtomCount = Integer.parseInt(countsLine.substring(0, 3).trim()); // The first field is the number of bonds mBondCount = Integer.parseInt(countsLine.substring(3, 6).trim()); // TODO: Chirality should be the 5th field (but 4th could be empty?) } } } //--------------------------------------------------------------------------- private void parseAtomsBlock(T inMolecule, List inMolLines) { // The 5th line may be the first atom line // Ex: ' 1.9050 -0.7932 0.0000 C 0 0 0 0 0 0 0 0 0 0 0 0' if (mAtomCount != null && inMolLines.size() >= 4 + mAtomCount) { for (int i = 0; i < mAtomCount; i++) { String atomLine = inMolLines.get(4 + i); String[] fields = atomLine.trim().split("\\s+"); Element element = Element.valueOf(fields[3]); Atom atom = new Atom(element) .setXCoordinate(Float.valueOf(fields[0])) .setYCoordinate(Float.valueOf(fields[1])) .setZCoordinate(Float.valueOf(fields[2])); int chargeValue = Integer.parseInt(fields[5]); if (chargeValue != 0) { int charge = 0; switch (chargeValue) { case 7: charge = -3; break; case 6: charge = -2; break; case 5: charge = -1; break; case 3: charge = 1; break; case 2: charge = 2; break; case 1: charge = 3; break; // TODO: 4 ==> Doublet radical } atom.setCharge(charge); } inMolecule.addAtom(atom); } } } //--------------------------------------------------------------------------- private void parseBondsBlock(T inMolecule, List inMolLines) { // Bond lines may follow the atom lines // Ex: ' 2 1 1 0 0 0 0' if (mBondCount != null && inMolLines.size() >= 4 + mAtomCount + mBondCount) { List atoms = inMolecule.getAtoms(); for (int i = 0; i < mBondCount; i++) { String bondLine = inMolLines.get(4 + mAtomCount + i); String[] fields = bondLine.trim().split("\\s+"); int atom1Num = Integer.parseInt(fields[0]); int atom2Num = Integer.parseInt(fields[1]); Atom atom1 = atoms.get(atom1Num - 1); Atom atom2 = atoms.get(atom2Num - 1); CovalentBond bond = new CovalentBond(atom1, atom2); int bondOrder = Integer.parseInt(fields[2]); if (bondOrder <= 3) { bond.setBondOrder(bondOrder); } else if (bondOrder == 4) { bond.setBondOrder(1); atom1.setIsAromatic(true); atom2.setIsAromatic(true); bond.setIsAromatic(); } atom1.addBond(bond); atom2.addBond(bond); } } } //--------------------------------------------------------------------------- private void parseProperties(T inMolecule, List inMolLines) { // Property lines may follow the Bonds block and will start with an 'M' // Charge Ex: 'M CHG 1 1 2' // Isotope Ex: 'M ISO 1 1 2' if (mAtomCount != null && mBondCount != null && inMolLines.size() >= 4 + mAtomCount + mBondCount) { for (int i = 4 + mAtomCount + mBondCount; i < inMolLines.size(); i++) { String propertyLine = inMolLines.get(i); if (propertyLine.startsWith("M CHG")) { // Charge // The 1st field specifies the number of defined charges (up to 8). // Ea. defined charge consists of the atom # (1-based) and a charge String[] fields = propertyLine.split("\\s+"); for (int index = 3; index < fields.length - 1; index+=2) { int atomNum = Integer.parseInt(fields[index]); int charge = Integer.parseInt(fields[index + 1]); Atom atom = inMolecule.getAtoms().get(atomNum - 1); atom.setCharge(charge); } } else if (propertyLine.startsWith("M ISO")) { // Isotope // TODO } } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy