com.hfg.chem.format.MDL_SDF Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.chem.format;
import java.io.BufferedReader;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import com.hfg.chem.Molecule;
import com.hfg.bio.seq.format.SeqIOException;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;
//------------------------------------------------------------------------------
/**
Basic implementation of the MDL SDF format.
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class MDL_SDF extends ReadableChemFormatBase
{
private static final Pattern ATTRIBUTE_HEADER_PATTERN = Pattern.compile(">\\s+<(\\S+)>");
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//---------------------------------------------------------------------------
public MDL_SDF()
{
super(null);
}
//---------------------------------------------------------------------------
public MDL_SDF(MoleculeFactory inMoleculeFactory)
{
super(inMoleculeFactory);
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
@Override
public boolean hasJanusDelimiter()
{
return false;
}
//---------------------------------------------------------------------------
@Override
public boolean isEndOfRecord(String inLine)
{
return inLine.trim().equals("$$$$");
}
//---------------------------------------------------------------------------
@Override
public T readRecord(BufferedReader inReader) throws ChemIOException
{
if (null == getMoleculeFactory())
{
throw new SeqIOException("No BioSequence factory has been specified!");
}
T mol;
try
{
mol = getMoleculeFactory().createMoleculeObj();
StringBuilderPlus structure = new StringBuilderPlus();
StringBuilderPlus currentAttributeValue = new StringBuilderPlus().setDelimiter("\n");
String currentAttributeName = null;
String line;
boolean structureComplete = false;
int lineCount = 1;
while ((line = inReader.readLine()) != null)
{
if (! structureComplete)
{
// The might not be an molfile section
Matcher m = ATTRIBUTE_HEADER_PATTERN.matcher(line);
if (m.matches())
{
structureComplete = true;
if (structure.length() > 0)
{
setStructure(mol, structure.toString());
}
}
else
{
structure.appendln(line);
if (line.trim().matches("M\\s+END"))
{
structureComplete = true;
setStructure(mol, structure.toString());
continue;
}
}
}
if (structureComplete)
{
line = line.trim();
if (!StringUtil.isSet(line)) // A blank line is used to separate attributes
{
if (StringUtil.isSet(currentAttributeName))
{
mol.setAttribute(currentAttributeName, (currentAttributeValue.length() > 0 ? currentAttributeValue.toString() : null));
currentAttributeName = null;
}
}
else
{
Matcher m = ATTRIBUTE_HEADER_PATTERN.matcher(line);
if (m.matches())
{
if (currentAttributeName != null)
{
// The blank line between attributes may have been missing
mol.setAttribute(currentAttributeName, (currentAttributeValue.length() > 0 ? currentAttributeValue.toString() : null));
}
currentAttributeName = m.group(1);
currentAttributeValue.setLength(0);
}
else if (currentAttributeName != null)
{
currentAttributeValue.delimitedAppend(line);
}
}
}
}
}
catch (Exception e)
{
if (e instanceof ChemIOException)
{
throw (ChemIOException) e;
}
else
{
throw new ChemIOException(e);
}
}
return mol;
}
//---------------------------------------------------------------------------
private void setStructure(T inMolecule, String inStructure)
{
inMolecule.setAttribute("molfile", inStructure);
String structureStringName = inStructure.trim().split("\n")[0].trim();
if (! structureStringName.equals("NO STRUCTURE"))
{
inMolecule.setName(structureStringName);
}
}
}