All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.chem.format.MDL_SDF Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.chem.format;


import java.io.BufferedReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import com.hfg.chem.Molecule;
import com.hfg.bio.seq.format.SeqIOException;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;

//------------------------------------------------------------------------------
/**
 Basic implementation of the MDL SDF format.
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class MDL_SDF extends ReadableChemFormatBase { private static final Pattern ATTRIBUTE_HEADER_PATTERN = Pattern.compile(">\\s+<(\\S+)>"); //########################################################################### // CONSTRUCTORS //########################################################################### //--------------------------------------------------------------------------- public MDL_SDF() { super(null); } //--------------------------------------------------------------------------- public MDL_SDF(MoleculeFactory inMoleculeFactory) { super(inMoleculeFactory); } //########################################################################### // PUBLIC METHODS //########################################################################### //--------------------------------------------------------------------------- @Override public boolean hasJanusDelimiter() { return false; } //--------------------------------------------------------------------------- @Override public boolean isEndOfRecord(String inLine) { return inLine.trim().equals("$$$$"); } //--------------------------------------------------------------------------- @Override public T readRecord(BufferedReader inReader) throws ChemIOException { if (null == getMoleculeFactory()) { throw new SeqIOException("No BioSequence factory has been specified!"); } T mol; try { mol = getMoleculeFactory().createMoleculeObj(); StringBuilderPlus structure = new StringBuilderPlus(); StringBuilderPlus currentAttributeValue = new StringBuilderPlus().setDelimiter("\n"); String currentAttributeName = null; String line; boolean structureComplete = false; int lineCount = 1; while ((line = inReader.readLine()) != null) { if (! structureComplete) { // The might not be an molfile section Matcher m = ATTRIBUTE_HEADER_PATTERN.matcher(line); if (m.matches()) { structureComplete = true; if (structure.length() > 0) { setStructure(mol, structure.toString()); } } else { structure.appendln(line); if (line.trim().matches("M\\s+END")) { structureComplete = true; setStructure(mol, structure.toString()); continue; } } } if (structureComplete) { line = line.trim(); if (!StringUtil.isSet(line)) // A blank line is used to separate attributes { if (StringUtil.isSet(currentAttributeName)) { mol.setAttribute(currentAttributeName, (currentAttributeValue.length() > 0 ? currentAttributeValue.toString() : null)); currentAttributeName = null; } } else { Matcher m = ATTRIBUTE_HEADER_PATTERN.matcher(line); if (m.matches()) { if (currentAttributeName != null) { // The blank line between attributes may have been missing mol.setAttribute(currentAttributeName, (currentAttributeValue.length() > 0 ? currentAttributeValue.toString() : null)); } currentAttributeName = m.group(1); currentAttributeValue.setLength(0); } else if (currentAttributeName != null) { currentAttributeValue.delimitedAppend(line); } } } } } catch (Exception e) { if (e instanceof ChemIOException) { throw (ChemIOException) e; } else { throw new ChemIOException(e); } } return mol; } //--------------------------------------------------------------------------- private void setStructure(T inMolecule, String inStructure) { inMolecule.setAttribute("molfile", inStructure); String structureStringName = inStructure.trim().split("\n")[0].trim(); if (! structureStringName.equals("NO STRUCTURE")) { inMolecule.setName(structureStringName); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy