org.openscience.cdk.io.MDLV2000Reader Maven / Gradle / Ivy
/* Copyright (C) 1997-2007 Christoph Steinbeck
* 2010 Egon Willighagen
* 2014 Mark B Vine (orcid:0000-0002-7794-0426)
*
* Contact: [email protected]
*
* This program is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation; either version 2.1
* of the License, or (at your option) any later version.
* All we ask is that proper credit is given for our work, which includes
* - but is not limited to - adding the above copyright notice to the beginning
* of your source code files, and to any copyright notice that you may distribute
* with programs based on this work.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.openscience.cdk.io;
import com.google.common.collect.ImmutableSet;
import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.config.IsotopeFactory;
import org.openscience.cdk.config.Isotopes;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IAtomContainerSet;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IChemFile;
import org.openscience.cdk.interfaces.IChemModel;
import org.openscience.cdk.interfaces.IChemObject;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.interfaces.IChemSequence;
import org.openscience.cdk.interfaces.IIsotope;
import org.openscience.cdk.interfaces.IPseudoAtom;
import org.openscience.cdk.interfaces.ISingleElectron;
import org.openscience.cdk.interfaces.IStereoElement;
import org.openscience.cdk.interfaces.ITetrahedralChirality;
import org.openscience.cdk.interfaces.ITetrahedralChirality.Stereo;
import org.openscience.cdk.io.formats.IResourceFormat;
import org.openscience.cdk.io.formats.MDLV2000Format;
import org.openscience.cdk.io.setting.BooleanIOSetting;
import org.openscience.cdk.io.setting.IOSetting;
import org.openscience.cdk.isomorphism.matchers.CTFileQueryBond;
import org.openscience.cdk.isomorphism.matchers.QueryAtomContainer;
import org.openscience.cdk.sgroup.Sgroup;
import org.openscience.cdk.sgroup.SgroupBracket;
import org.openscience.cdk.sgroup.SgroupKey;
import org.openscience.cdk.sgroup.SgroupType;
import org.openscience.cdk.stereo.StereoElementFactory;
import org.openscience.cdk.stereo.TetrahedralChirality;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import org.openscience.cdk.tools.periodictable.PeriodicTable;
import javax.vecmath.Point2d;
import javax.vecmath.Point3d;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import static org.openscience.cdk.io.MDLV2000Writer.SPIN_MULTIPLICITY;
/**
* Reads content from MDL molfiles and SD files. It can read a {@link
* IAtomContainer} or {@link IChemModel} from an MDL molfile, and a {@link
* IChemFile} from a SD file, with a {@link IChemSequence} of {@link
* IChemModel}'s, where each IChemModel will contain one {@link IAtomContainer}.
*
* From the Atom block it reads atomic coordinates, element types and formal
* charges. From the Bond block it reads the bonds and the orders. Additionally,
* it reads 'M CHG', 'G ', 'M RAD' and 'M ISO' lines from the property
* block.
*
*
If all z coordinates are 0.0, then the xy coordinates are taken as 2D,
* otherwise the coordinates are read as 3D.
*
*
The title of the MOL file is read and can be retrieved with:
*
* molecule.getProperty(CDKConstants.TITLE);
*
*
* RGroups which are saved in the MDL molfile as R#, are renamed according to
* their appearance, e.g. the first R# is named R1. With PseudAtom.getLabel()
* "R1" is returned (instead of R#). This is introduced due to the SAR table
* generation procedure of Scitegics PipelinePilot.
*
* @author steinbeck
* @author Egon Willighagen
* @cdk.module io
* @cdk.githash
* @cdk.iooptions
* @cdk.created 2000-10-02
* @cdk.keyword file format, MDL molfile
* @cdk.keyword file format, SDF
* @cdk.bug 1587283
*/
public class MDLV2000Reader extends DefaultChemObjectReader {
BufferedReader input = null;
private static ILoggingTool logger = LoggingToolFactory.createLoggingTool(MDLV2000Reader.class);
private BooleanIOSetting forceReadAs3DCoords;
private BooleanIOSetting interpretHydrogenIsotopes;
private BooleanIOSetting addStereoElements;
// Pattern to remove trailing space (String.trim() will remove leading space, which we don't want)
private static final Pattern TRAILING_SPACE = Pattern.compile("\\s+$");
/** Delimits Structure-Data (SD) Files. */
private static final String RECORD_DELIMITER = "$$$$";
/**
* @deprecated Incorrect spelling
*/
private static final Set PSUEDO_LABELS = ImmutableSet. builder().add("*").add("A").add("Q")
.add("L").add("LP").add("R") // XXX: not in spec
.add("R#").build();
/** Valid pseudo labels. */
private static final Set PSEUDO_LABELS = ImmutableSet. builder().add("*").add("A").add("Q")
.add("L").add("LP").add("R") // XXX: not in spec
.add("R#").build();
public MDLV2000Reader() {
this(new StringReader(""));
}
/**
* Constructs a new MDLReader that can read Molecule from a given
* InputStream.
*
* @param in The InputStream to read from
*/
public MDLV2000Reader(InputStream in) {
this(new InputStreamReader(in));
}
public MDLV2000Reader(InputStream in, Mode mode) {
this(new InputStreamReader(in), mode);
}
/**
* Constructs a new MDLReader that can read Molecule from a given Reader.
*
* @param in The Reader to read from
*/
public MDLV2000Reader(Reader in) {
this(in, Mode.RELAXED);
}
public MDLV2000Reader(Reader in, Mode mode) {
input = new BufferedReader(in);
initIOSettings();
super.mode = mode;
}
@Override
public IResourceFormat getFormat() {
return MDLV2000Format.getInstance();
}
@Override
public void setReader(Reader input) throws CDKException {
if (input instanceof BufferedReader) {
this.input = (BufferedReader) input;
} else {
this.input = new BufferedReader(input);
}
}
@Override
public void setReader(InputStream input) throws CDKException {
setReader(new InputStreamReader(input));
}
@SuppressWarnings("unchecked")
@Override
public boolean accepts(Class extends IChemObject> classObject) {
Class>[] interfaces = classObject.getInterfaces();
for (Class> anInterface : interfaces) {
if (IChemFile.class.equals(anInterface)) return true;
if (IChemModel.class.equals(anInterface)) return true;
if (IAtomContainer.class.equals(anInterface)) return true;
}
if (IAtomContainer.class.equals(classObject)) return true;
if (IChemFile.class.equals(classObject)) return true;
if (IChemModel.class.equals(classObject)) return true;
Class superClass = classObject.getSuperclass();
return superClass != null && this.accepts(superClass);
}
/**
* Takes an object which subclasses IChemObject, e.g. Molecule, and will
* read this (from file, database, internet etc). If the specific
* implementation does not support a specific IChemObject it will throw an
* Exception.
*
* @param object The object that subclasses IChemObject
* @return The IChemObject read
* @throws CDKException
*/
@SuppressWarnings("unchecked")
@Override
public T read(T object) throws CDKException {
if (object instanceof IAtomContainer) {
return (T) readAtomContainer((IAtomContainer) object);
} else if (object instanceof IChemFile) {
return (T) readChemFile((IChemFile) object);
} else if (object instanceof IChemModel) {
return (T) readChemModel((IChemModel) object);
} else {
throw new CDKException("Only supported are ChemFile and Molecule.");
}
}
private IChemModel readChemModel(IChemModel chemModel) throws CDKException {
IAtomContainerSet setOfMolecules = chemModel.getMoleculeSet();
if (setOfMolecules == null) {
setOfMolecules = chemModel.getBuilder().newInstance(IAtomContainerSet.class);
}
IAtomContainer m = readAtomContainer(chemModel.getBuilder().newInstance(IAtomContainer.class));
if (m != null) {
setOfMolecules.addAtomContainer(m);
}
chemModel.setMoleculeSet(setOfMolecules);
return chemModel;
}
/**
* Read a ChemFile from a file in MDL SDF format.
*
* @return The ChemFile that was read from the MDL file.
*/
private IChemFile readChemFile(IChemFile chemFile) throws CDKException {
IChemObjectBuilder builder = chemFile.getBuilder();
IChemSequence sequence = builder.newInstance(IChemSequence.class);
try {
IAtomContainer m;
while ((m = readAtomContainer(builder.newInstance(IAtomContainer.class))) != null) {
sequence.addChemModel(newModel(m));
}
} catch (CDKException e) {
throw e;
} catch (IllegalArgumentException exception) {
String error = "Error while parsing SDF";
logger.error(error);
logger.debug(exception);
throw new CDKException(error, exception);
}
try {
input.close();
} catch (Exception exc) {
String error = "Error while closing file: " + exc.getMessage();
logger.error(error);
throw new CDKException(error, exc);
}
chemFile.addChemSequence(sequence);
return chemFile;
}
/**
* Create a new chem model for a single {@link IAtomContainer}.
*
* @param container the container to create the model for
* @return a new {@link IChemModel}
*/
private static IChemModel newModel(final IAtomContainer container) {
if (container == null) throw new NullPointerException("cannot create chem model for a null container");
final IChemObjectBuilder builder = container.getBuilder();
final IChemModel model = builder.newInstance(IChemModel.class);
final IAtomContainerSet containers = builder.newInstance(IAtomContainerSet.class);
containers.addAtomContainer(container);
model.setMoleculeSet(containers);
return model;
}
/**
* Read an IAtomContainer from a file in MDL sd format
*
* @return The Molecule that was read from the MDL file.
*/
private IAtomContainer readAtomContainer(IAtomContainer molecule) throws CDKException {
IAtomContainer outputContainer = null;
Map parities = new HashMap<>();
int linecount = 0;
String title = null;
String remark = null;
String line = "";
try {
line = input.readLine();
linecount++;
if (line == null) {
return null;
}
if (line.startsWith("$$$$")) {
return molecule;
}
if (line.length() > 0) {
title = line;
}
line = input.readLine();
linecount++;
line = input.readLine();
linecount++;
if (line.length() > 0) {
remark = line;
}
line = input.readLine();
linecount++;
// if the line is empty we hav a problem - either a malformed
// molecule entry or just extra new lines at the end of the file
if (line.length() == 0) {
handleError("Unexpected empty line", linecount, 0, 0);
// read till the next $$$$ or EOF
while (true) {
line = input.readLine();
linecount++;
if (line == null) {
return null;
}
if (line.startsWith("$$$$")) {
return molecule; // an empty molecule
}
}
}
final CTabVersion version = CTabVersion.ofHeader(line);
// check the CT block version
if (version == CTabVersion.V3000) {
handleError("This file must be read with the MDLV3000Reader.");
// even if relaxed we can't read V3000 using the V2000 parser
throw new CDKException("This file must be read with the MDLV3000Reader.");
} else if (version == CTabVersion.UNSPECIFIED) {
handleError("This file must be read with the MDLReader.");
// okay to read in relaxed mode
}
int nAtoms = readMolfileInt(line, 0);
int nBonds = readMolfileInt(line, 3);
final IAtom[] atoms = new IAtom[nAtoms];
final IBond[] bonds = new IBond[nBonds];
// used for applying the MDL valence model
int[] explicitValence = new int[nAtoms];
boolean hasX = false, hasY = false, hasZ = false;
for (int i = 0; i < nAtoms; i++) {
line = input.readLine();
linecount++;
final IAtom atom = readAtomFast(line, molecule.getBuilder(), parities, linecount);
atoms[i] = atom;
Point3d p = atom.getPoint3d();
hasX = hasX || p.x != 0d;
hasY = hasY || p.y != 0d;
hasZ = hasZ || p.z != 0d;
}
// convert to 2D, if totalZ == 0
if (!hasX && !hasY && !hasZ) {
if (nAtoms == 1) {
atoms[0].setPoint2d(new Point2d(0, 0));
} else {
for (IAtom atomToUpdate : atoms) {
atomToUpdate.setPoint3d(null);
}
}
} else if (!hasZ) {
if (!forceReadAs3DCoords.isSet()) {
for (IAtom atomToUpdate : atoms) {
Point3d p3d = atomToUpdate.getPoint3d();
if (p3d != null) {
atomToUpdate.setPoint2d(new Point2d(p3d.x, p3d.y));
atomToUpdate.setPoint3d(null);
}
}
}
}
boolean hasQueryBonds = false;
for (int i = 0; i < nBonds; i++) {
line = input.readLine();
linecount++;
bonds[i] = readBondFast(line, molecule.getBuilder(), atoms, explicitValence, linecount);
hasQueryBonds = hasQueryBonds
|| (bonds[i].getOrder() == IBond.Order.UNSET && !bonds[i].getFlag(CDKConstants.ISAROMATIC));
}
if (!hasQueryBonds)
outputContainer = molecule;
else
outputContainer = new QueryAtomContainer(molecule.getBuilder());
outputContainer.setProperty(CDKConstants.TITLE, title);
outputContainer.setProperty(CDKConstants.REMARK, remark);
// if the container is empty we can simply set the atoms/bonds
// otherwise we add them to the end
if (outputContainer.isEmpty()) {
outputContainer.setAtoms(atoms);
outputContainer.setBonds(bonds);
} else {
for (IAtom atom : atoms)
outputContainer.addAtom(atom);
for (IBond bond : bonds)
outputContainer.addBond(bond);
}
// create 0D stereochemistry
Parities:
for (Map.Entry e : parities.entrySet()) {
int parity = e.getValue();
if (parity != 1 && parity != 2)
continue; // 3=unspec
int idx = 0;
IAtom focus = e.getKey();
IAtom[] carriers = new IAtom[4];
int hidx = -1;
for (IAtom nbr : molecule.getConnectedAtomsList(focus)) {
if (idx == 4)
continue Parities; // too many neighbors
if (nbr.getAtomicNumber() == 1) {
if (hidx >= 0)
continue Parities;
hidx = idx;
}
carriers[idx++] = nbr;
}
// to few neighbors, or already have a hydrogen defined
if (idx < 3 || idx < 4 && hidx >= 0)
continue;
if (idx == 3)
carriers[idx++] = focus;
if (idx == 4) {
Stereo winding = parity == 1 ? Stereo.CLOCKWISE : Stereo.ANTI_CLOCKWISE;
// H is always at back, even if explicit! At least this seems to be the case.
// we adjust the winding as needed
if (hidx == 0 || hidx == 2)
winding = winding.invert();
molecule.addStereoElement(new TetrahedralChirality(focus, carriers, winding));
}
}
// read PROPERTY block
readPropertiesFast(input, outputContainer, nAtoms);
// read potential SD file data between M END and $$$$
readNonStructuralData(input, outputContainer);
if (interpretHydrogenIsotopes.isSet()) {
fixHydrogenIsotopes(molecule, Isotopes.getInstance());
}
// note: apply the valence model last so that all fixes (i.e. hydrogen
// isotopes) are in place we need to use a offset as this atoms
// could be added to a molecule which already had atoms present
int offset = outputContainer.getAtomCount() - nAtoms;
for (int i = offset; i < outputContainer.getAtomCount(); i++) {
int valence = explicitValence[i - offset];
if (valence < 0) {
hasQueryBonds = true; // also counts aromatic bond as query
} else {
int unpaired = outputContainer.getConnectedSingleElectronsCount(outputContainer.getAtom(i));
applyMDLValenceModel(outputContainer.getAtom(i), valence + unpaired);
}
}
// sanity check that we have a decent molecule, query bonds mean we
// don't have a hydrogen count for atoms and stereo perception isn't
// currently possible
if (!hasQueryBonds && addStereoElements.isSet() && hasX && hasY) {
if (hasZ) { // has 3D coordinates
outputContainer.setStereoElements(StereoElementFactory.using3DCoordinates(outputContainer)
.createAll());
} else if (!forceReadAs3DCoords.isSet()) { // has 2D coordinates (set as 2D coordinates)
outputContainer.setStereoElements(StereoElementFactory.using2DCoordinates(outputContainer)
.createAll());
}
}
} catch (CDKException exception) {
String error = "Error while parsing line " + linecount + ": " + line + " -> " + exception.getMessage();
logger.error(error);
throw exception;
} catch (IOException exception) {
exception.printStackTrace();
String error = "Error while parsing line " + linecount + ": " + line + " -> " + exception.getMessage();
logger.error(error);
handleError("Error while parsing line: " + line, linecount, 0, 0, exception);
}
return outputContainer;
}
/**
* Applies the MDL valence model to atoms using the explicit valence (bond
* order sum) and charge to determine the correct number of implicit
* hydrogens. The model is not applied if the explicit valence is less than
* 0 - this is the case when a query bond was read for an atom.
*
* @param atom the atom to apply the model to
* @param explicitValence the explicit valence (bond order sum)
*/
private void applyMDLValenceModel(IAtom atom, int explicitValence) {
if (atom.getValency() != null) {
if (atom.getValency() >= explicitValence)
atom.setImplicitHydrogenCount(atom.getValency() - explicitValence);
else
atom.setImplicitHydrogenCount(0);
} else {
Integer element = atom.getAtomicNumber();
if (element == null) element = 0;
Integer charge = atom.getFormalCharge();
if (charge == null) charge = 0;
int implicitValence = MDLValence.implicitValence(element, charge, explicitValence);
if (implicitValence < explicitValence) {
atom.setValency(explicitValence);
atom.setImplicitHydrogenCount(0);
} else {
atom.setValency(implicitValence);
atom.setImplicitHydrogenCount(implicitValence - explicitValence);
}
}
}
private void fixHydrogenIsotopes(IAtomContainer molecule, IsotopeFactory isotopeFactory) {
for (IAtom atom : AtomContainerManipulator.getAtomArray(molecule)) {
if (atom instanceof IPseudoAtom) {
IPseudoAtom pseudo = (IPseudoAtom) atom;
if ("D".equals(pseudo.getLabel())) {
IAtom newAtom = molecule.getBuilder().newInstance(IAtom.class, atom);
newAtom.setSymbol("H");
newAtom.setAtomicNumber(1);
isotopeFactory.configure(newAtom, isotopeFactory.getIsotope("H", 2));
AtomContainerManipulator.replaceAtomByAtom(molecule, atom, newAtom);
} else if ("T".equals(pseudo.getLabel())) {
IAtom newAtom = molecule.getBuilder().newInstance(IAtom.class, atom);
newAtom.setSymbol("H");
newAtom.setAtomicNumber(1);
isotopeFactory.configure(newAtom, isotopeFactory.getIsotope("H", 3));
AtomContainerManipulator.replaceAtomByAtom(molecule, atom, newAtom);
}
}
}
}
@Override
public void close() throws IOException {
input.close();
}
private void initIOSettings() {
forceReadAs3DCoords = addSetting(new BooleanIOSetting("ForceReadAs3DCoordinates", IOSetting.Importance.LOW,
"Should coordinates always be read as 3D?", "false"));
interpretHydrogenIsotopes = addSetting(new BooleanIOSetting("InterpretHydrogenIsotopes",
IOSetting.Importance.LOW, "Should D and T be interpreted as hydrogen isotopes?", "true"));
addStereoElements = addSetting(new BooleanIOSetting("AddStereoElements", IOSetting.Importance.LOW,
"Assign stereo configurations to stereocenters utilising 2D/3D coordinates.", "true"));
}
public void customizeJob() {
for (IOSetting setting : getSettings()) {
fireIOSettingQuestion(setting);
}
}
private String removeNonDigits(String input) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < input.length(); i++) {
char inputChar = input.charAt(i);
if (Character.isDigit(inputChar)) sb.append(inputChar);
}
return sb.toString();
}
IAtom readAtomFast(String line, IChemObjectBuilder builder, int lineNum) throws CDKException, IOException {
return readAtomFast(line, builder, Collections.emptyMap(), lineNum);
}
/**
* Parse an atom line from the atom block using the format: {@code
* xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee}
* where: - x: x coordinate
- y: y coordinate
- z: z
* coordinate
- a: atom symbol
- d: mass difference
* - c: charge
- s: stereo parity
- h: hydrogen count + 1
* (not read - query)
- b: stereo care (not read - query)
- v:
* valence
- H: H0 designator (not read - query)
- r: not
* used
- i: not used
- m: atom reaction mapping
- n:
* inversion/retention flag
- e: exact change flag
*
* The parsing is strict and does not allow extra columns (i.e. NMR shifts)
* malformed input.
*
* @param line input line
* @param builder chem object builder to create the atom
* @param parities map of atom parities for creation 0D stereochemistry
* @param lineNum the line number - for printing error messages
* @return a new atom instance
*/
IAtom readAtomFast(String line, IChemObjectBuilder builder, Map parities, int lineNum) throws CDKException, IOException {
// The line may be truncated and it's checked in reverse at the specified
// lengths:
// 1 2 3 4 5 6
// 123456789012345678901234567890123456789012345678901234567890123456789
// | | | | | | | | | | | | |
// xxxxx.xxxxyyyyy.yyyyzzzzz.zzzz aaaddcccssshhhbbbvvvHHHrrriiimmmnnneee
String symbol;
double x, y, z;
int massDiff = 0, charge = 0, parity = 0, valence = 0, mapping = 0;
int length = length(line);
if (length > 69) // excess data we should check all fields
length = 69;
// given the length we jump to the position and parse all fields
// that could be present (note - fall through switch)
switch (length) {
case 69: // eee: exact charge flag [reaction, query]
case 66: // nnn: inversion / retention [reaction]
case 63: // mmm: atom-atom mapping [reaction]
mapping = readMolfileInt(line, 60);
case 60: // iii: not used
case 57: // rrr: not used
case 54: // HHH: H0 designation [redundant]
case 51: // vvv: valence
valence = readMolfileInt(line, 48);
case 48: // bbb: stereo care [query]
case 45: // hhh: hydrogen count + 1 [query]
case 42: // sss: stereo parity
parity = toInt(line.charAt(41));
case 39: // ccc: charge
charge = toCharge(line.charAt(38));
case 36: // dd: mass difference
massDiff = sign(line.charAt(34)) * toInt(line.charAt(35));
case 34: // x y z and aaa: atom coordinates and symbol
case 33: // symbol is left aligned
case 32:
x = readMDLCoordinate(line, 0);
y = readMDLCoordinate(line, 10);
z = readMDLCoordinate(line, 20);
symbol = line.substring(31, 34).trim().intern();
break;
default:
handleError("invalid line length", lineNum, 0, 0);
throw new CDKException("invalid line length, " + length + ": " + line);
}
IAtom atom = createAtom(symbol, builder, lineNum);
atom.setPoint3d(new Point3d(x, y, z));
atom.setFormalCharge(charge);
atom.setStereoParity(parity);
if (parity != 0)
parities.put(atom, parity);
// if there was a mass difference, set the mass number
if (massDiff != 0 && atom.getAtomicNumber() > 0)
atom.setMassNumber(Isotopes.getInstance().getMajorIsotope(atom.getAtomicNumber()).getMassNumber()
+ massDiff);
if (valence > 0 && valence < 16) atom.setValency(valence == 15 ? 0 : valence);
if (mapping != 0) atom.setProperty(CDKConstants.ATOM_ATOM_MAPPING, mapping);
return atom;
}
/**
* Read a bond from a line in the MDL bond block. The bond block is
* formatted as follows, {@code 111222tttsssxxxrrrccc}, where:
*
* - 111: first atom number
* - 222: second atom number
* - ttt: bond type
* - xxx: bond stereo
* - rrr: bond topology
* - ccc: reaction center
*
*
* @param line the input line
* @param builder builder to create objects with
* @param atoms atoms read from the atom block
* @param explicitValence array to fill with explicit valence
* @param lineNum the input line number
* @return a new bond
* @throws CDKException thrown if the input was malformed or didn't make
* sense
*/
IBond readBondFast(String line, IChemObjectBuilder builder, IAtom[] atoms, int[] explicitValence, int lineNum)
throws CDKException {
// The line may be truncated and it's checked in reverse at the specified
// lengths. Absolutely required is atom indices, bond type and stereo.
// 1 2
// 123456789012345678901
// | | | |
// 111222tttsssxxxrrrccc
int length = length(line);
if (length > 21) length = 21;
int u, v, type, stereo = 0;
switch (length) {
case 21: // ccc: reaction centre status
case 18: // rrr: bond topology
case 15: // xxx: not used
case 12: // sss: stereo
stereo = readUInt(line, 9, 3);
case 9: // 111222ttt: atoms, type and stereo
u = readMolfileInt(line, 0) - 1;
v = readMolfileInt(line, 3) - 1;
type = readMolfileInt(line, 6);
break;
default:
throw new CDKException("invalid line length: " + length + " " + line);
}
IBond bond = builder.newInstance(IBond.class, atoms[u], atoms[v]);
switch (type) {
case 1: // single
bond.setOrder(IBond.Order.SINGLE);
bond.setStereo(toStereo(stereo, type));
break;
case 2: // double
bond.setOrder(IBond.Order.DOUBLE);
bond.setStereo(toStereo(stereo, type));
break;
case 3: // triple
bond.setOrder(IBond.Order.TRIPLE);
break;
case 4: // aromatic
bond.setOrder(IBond.Order.UNSET);
bond.setFlag(CDKConstants.ISAROMATIC, true);
bond.setFlag(CDKConstants.SINGLE_OR_DOUBLE, true);
atoms[u].setFlag(CDKConstants.ISAROMATIC, true);
atoms[v].setFlag(CDKConstants.ISAROMATIC, true);
break;
case 5: // single or double
case 6: // single or aromatic
case 7: // double or aromatic
case 8: // any
bond = CTFileQueryBond.ofType(bond, type);
break;
default:
throw new CDKException("unrecognised bond type: " + type + ", " + line);
}
if (type < 4) {
explicitValence[u] += type;
explicitValence[v] += type;
} else {
explicitValence[u] = explicitValence[v] = Integer.MIN_VALUE;
}
return bond;
}
/**
* Reads the property block from the {@code input} setting the values in the
* container.
*
* @param input input resource
* @param container the structure with atoms / bonds present
* @param nAtoms the number of atoms in the atoms block
* @throws IOException low-level IO error
*/
void readPropertiesFast(final BufferedReader input, final IAtomContainer container, final int nAtoms)
throws IOException, CDKException {
String line;
// first atom index in this Molfile, the container may have
// already had atoms present before reading the file
int offset = container.getAtomCount() - nAtoms;
Map sgroups = new LinkedHashMap<>();
LINES:
while ((line = input.readLine()) != null) {
int index, count, lnOffset;
Sgroup sgroup;
int length = line.length();
final PropertyKey key = PropertyKey.of(line);
switch (key) {
// A aaa
// x...
//
// atom alias is stored as label on a pseudo atom
case ATOM_ALIAS:
index = readMolfileInt(line, 3) - 1;
final String label = input.readLine();
if (label == null) return;
label(container, offset + index, label);
break;
// V aaa v...
//
// an atom value is stored as comment on an atom
case ATOM_VALUE:
index = readMolfileInt(line, 3) - 1;
final String comment = line.substring(7);
container.getAtom(offset + index).setProperty(CDKConstants.COMMENT, comment);
break;
// G aaappp
// x...
//
// Abbreviation is required for compatibility with previous versions of MDL ISIS/Desktop which
// allowed abbreviations with only one attachment. The attachment is denoted by two atom
// numbers, aaa and ppp. All of the atoms on the aaa side of the bond formed by aaa-ppp are
// abbreviated. The coordinates of the abbreviation are the coordinates of aaa. The text of the
// abbreviation is on the following line (x...). In current versions of ISIS, abbreviations can have any
// number of attachments and are written out using the Sgroup appendixes. However, any ISIS
// abbreviations that do have one attachment are also written out in the old style, again for
// compatibility with older ISIS versions, but this behavior might not be supported in future
// versions.
case GROUP_ABBREVIATION:
// not supported, existing parsing doesn't do what is
// mentioned in the specification above
// final int from = readMolfileInt(line, 3) - 1;
// final int to = readMolfileInt(line, 6) - 1;
final String group = input.readLine();
if (group == null) return;
break;
// M CHGnn8 aaa vvv ...
//
// vvv: -15 to +15. Default of 0 = uncharged atom. When present, this property supersedes
// all charge and radical values in the atom block, forcing a 0 charge on all atoms not
// listed in an M CHG or M RAD line.
case M_CHG:
count = readUInt(line, 6, 3);
for (int i = 0, st = 10; i < count && st + 7 <= length; i++, st += 8) {
index = readMolfileInt(line, st) - 1;
int charge = readMolfileInt(line, st + 4);
container.getAtom(offset + index).setFormalCharge(charge);
}
break;
// M ISOnn8 aaa vvv ...
//
// vvv: Absolute mass of the atom isotope as a positive integer. When present, this property
// supersedes all isotope values in the atom block. Default (no entry) means natural
// abundance. The difference between this absolute mass value and the natural
// abundance value specified in the PTABLE.DAT file must be within the range of -18
// to +12.
case M_ISO:
count = readUInt(line, 6, 3);
for (int i = 0, st = 10; i < count && st + 7 <= length; i++, st += 8) {
index = readMolfileInt(line, st) - 1;
int mass = readMolfileInt(line, st + 4);
container.getAtom(offset + index).setMassNumber(mass);
}
break;
// M RADnn8 aaa vvv ...
//
// vvv: Default of 0 = no radical, 1 = singlet (:), 2 = doublet ( . or ^), 3 = triplet (^^). When
// present, this property supersedes all charge and radical values in the atom block,
// forcing a 0 (zero) charge and radical on all atoms not listed in an M CHG or
// M RAD line.
case M_RAD:
count = readUInt(line, 6, 3);
for (int i = 0, st = 10; i < count && st + 7 <= length; i++, st += 8) {
index = readMolfileInt(line, st) - 1;
int value = readMolfileInt(line, st + 4);
SPIN_MULTIPLICITY multiplicity = SPIN_MULTIPLICITY.ofValue(value);
for (int e = 0; e < multiplicity.getSingleElectrons(); e++)
container.addSingleElectron(offset + index);
}
break;
// M RGPnn8 aaa rrr ...
//
// rrr: Rgroup number, value from 1 to 32 *, labels position of Rgroup on root.
//
// see also, RGroupQueryReader
case M_RGP:
count = readUInt(line, 6, 3);
for (int i = 0, st = 10; i < count && st + 7 <= length; i++, st += 8) {
index = readMolfileInt(line, st) - 1;
int number = readMolfileInt(line, st + 4);
label(container, offset + index, "R" + number);
}
break;
// M ZZC aaa c...
//
// c: first character of the label, extends to EOL.
//
// Proprietary atom labels created by ACD/Labs ChemSketch using the Manual Numbering Tool.
// This atom property appears to be undocumented, but experimentation leads to the following
// specification (tested with ACD/ChemSketch version 12.00 Build 29305, 25 Nov 2008)
//
// It's not necessary to label any/all atoms but if a label is present, the following applies:
//
// The atom label(s) consist of an optional prefix, a required numeric label, and optional suffix.
//
// The numeric label is an integer in the range 0 - 999 inclusive.
//
// If present, the prefix and suffix can each contain 1 - 50 characters, from the set of printable
// ASCII characters shown here
//
// !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~"
//
// In addition, both the prefix and suffix may contain leading and/or trailing and/or embedded
// whitespace, included within the limit of 50 characters. These should be preserved when read.
//
// Long labels in the mol/sdfile are not truncated or wrapped onto multiple lines. As a result, the
// line could be 114 characters in length (excluding the newline).
//
// By stopping and restarting the Manual Numbering Tool, it's possible to create non-sequential
// or even duplicate numbers or labels. This is reasonable for the intended purpose of the tool -
// labelling the structure as you wish. If unique labels are required, downstream processing will be
// necessary to enforce this.
//
case M_ZZC:
if (mode == Mode.STRICT) {
throw new CDKException("Atom property ZZC is illegal in STRICT mode");
}
index = readMolfileInt(line, 7) - 1;
String atomLabel = line.substring(11); // DO NOT TRIM
container.getAtom(offset + index).setProperty(CDKConstants.ACDLABS_LABEL, atomLabel);
break;
// M STYnn8 sss ttt ...
// sss: Sgroup number
// ttt: Sgroup type: SUP = abbreviation Sgroup (formerly called superatom), MUL = multiple group,
// SRU = SRU type, MON = monomer, MER = Mer type, COP = copolymer, CRO = crosslink,
// MOD = modification, GRA = graft, COM = component, MIX = mixture,
// FOR = formulation, DAT = data Sgroup, ANY = any polymer, GEN = generic.
//
// Note: For a given Sgroup, an STY line giving its type must appear before any other line that
// supplies information about it. For a data Sgroup, an SDT line must describe the data
// field before the SCD and SED lines that contain the data (see Data Sgroup Data below).
// When a data Sgroup is linked to another Sgroup, the Sgroup must already have been defined.
//
// Sgroups can be in any order on the Sgroup Type line. Brackets are drawn around Sgroups with the
// M SDI lines defining the coordinates.
case M_STY:
count = readMolfileInt(line, 6);
for (int i = 0; i < count; i++) {
lnOffset = 10 + (i * 8);
index = readMolfileInt(line, lnOffset);
if (mode == Mode.STRICT && sgroups.containsKey(index))
handleError("STY line must appear before any other line that supplies Sgroup information");
sgroup = new Sgroup();
sgroups.put(index, sgroup);
SgroupType type = SgroupType.parseCtabKey(line.substring(lnOffset + 4, lnOffset + 7));
if (type != null)
sgroup.setType(type);
}
break;
// Sgroup Subtype [Sgroup]
// M SSTnn8 sss ttt ...
// ttt: Polymer Sgroup subtypes: ALT = alternating, RAN = random, BLO = block
case M_SST:
count = readMolfileInt(line, 6);
for (int i = 0, st = 10; i < count && st + 7 <= length; i++, st += 8) {
sgroup = ensureSgroup(sgroups,
readMolfileInt(line, st));
if (mode == Mode.STRICT && sgroup.getType() != SgroupType.CtabCopolymer)
handleError("SST (Sgroup Subtype) specified for a non co-polymer group");
String sst = line.substring(st+4, st+7);
if (mode == Mode.STRICT && !("ALT".equals(sst) || "RAN".equals(sst) || "BLO".equals(sst)))
handleError("Invalid sgroup subtype: " + sst + " expected (ALT, RAN, or BLO)");
sgroup.putValue(SgroupKey.CtabSubType, sst);
}
break;
// Sgroup Atom List [Sgroup]
// M SAL sssn15 aaa ...
// aaa: Atoms in Sgroup sss
case M_SAL:
sgroup = ensureSgroup(sgroups, readMolfileInt(line, 7));
count = readMolfileInt(line, 10);
for (int i = 0, st = 14; i < count && st + 3 <= length; i++, st += 4) {
index = readMolfileInt(line, st) - 1;
sgroup.addAtom(container.getAtom(offset + index));
}
break;
// Sgroup Bond List [Sgroup]
// M SBL sssn15 bbb ...
// bbb: Bonds in Sgroup sss.
// (For data Sgroups, bbb’s are the containment bonds, for all other
// Sgroup types, bbb’s are crossing bonds.)
case M_SBL:
sgroup = ensureSgroup(sgroups, readMolfileInt(line, 7));
count = readMolfileInt(line, 10);
for (int i = 0, st = 14; i < count && st + 3 <= length; i++, st += 4) {
index = readMolfileInt(line, st) - 1;
sgroup.addBond(container.getBond(offset + index));
}
break;
// Sgroup Hierarchy Information [Sgroup]
// M SPLnn8 ccc ppp ...
// ccc: Sgroup index of the child Sgroup
// ppp: Sgroup index of the parent Sgroup (ccc and ppp must already be defined via an
// STY line prior to encountering this line)
case M_SPL:
count = readMolfileInt(line, 6);
for (int i = 0, st = 10; i < count && st + 6 <= length; i++, st += 8) {
sgroup = ensureSgroup(sgroups, readMolfileInt(line, st));
sgroup.addParent(ensureSgroup(sgroups, readMolfileInt(line, st+4)));
}
break;
// Sgroup Connectivity [Sgroup]
// M SCNnn8 sss ttt ...
// ttt: HH = head-to-head, HT = head-to-tail, EU = either unknown.
// Left justified.
case M_SCN:
count = readMolfileInt(line, 6);
for (int i = 0, st = 10; i < count && st + 6 <= length; i++, st += 8) {
sgroup = ensureSgroup(sgroups,
readMolfileInt(line, st));
String con = line.substring(st + 4, Math.min(length, st + 7)).trim();
if (mode == Mode.STRICT && !("HH".equals(con) || "HT".equals(con) || "EU".equals(con)))
handleError("Unknown SCN type (expected: HH, HT, or EU) was " + con);
sgroup.putValue(SgroupKey.CtabConnectivity,
con);
}
break;
// Sgroup Display Information
// M SDI sssnn4 x1 y1 x2 y2
// x1,y1, Coordinates of bracket endpoints
// x2,y2:
case M_SDI:
sgroup = ensureSgroup(sgroups, readMolfileInt(line, 7));
count = readMolfileInt(line, 10);
assert count == 4; // fixed?
sgroup.addBracket(new SgroupBracket(readMDLCoordinate(line, 13),
readMDLCoordinate(line, 23),
readMDLCoordinate(line, 33),
readMDLCoordinate(line, 43)));
break;
// Sgroup subscript
// M SMT sss m...
// m...: Text of subscript Sgroup sss.
// (For multiple groups, m... is the text representation of the multiple group multiplier.
// For abbreviation Sgroups, m... is the text of the abbreviation Sgroup label.)
case M_SMT:
sgroup = ensureSgroup(sgroups, readMolfileInt(line, 7));
sgroup.putValue(SgroupKey.CtabSubScript,
line.substring(11).trim());
break;
// Sgroup Bracket Style
// The format for the Sgroup bracket style is as follows:
// M SBTnn8 sss ttt ...
// where:
// sss: Index of Sgroup
// ttt: Bracket display style: 0 = default, 1 = curved (parenthetic) brackets
// This appendix supports altering the display style of the Sgroup brackets.
case M_SBT:
count = readMolfileInt(line, 6);
for (int i = 0, st = 10; i < count && st + 7 <= length; i++, st += 8) {
sgroup = ensureSgroup(sgroups,
readMolfileInt(line, st));
sgroup.putValue(SgroupKey.CtabBracketStyle,
readMolfileInt(line, st+4));
}
break;
// Sgroup Expansion
// M SDS EXPn15 sss ...
// sss: Sgroup index of expanded abbreviation Sgroups
case M_SDS:
if ("EXP".equals(line.substring(7, 10))) {
count = readMolfileInt(line, 10);
for (int i = 0, st = 14; i < count && st + 3 <= length; i++, st += 4) {
sgroup = ensureSgroup(sgroups, readMolfileInt(line, st));
sgroup.putValue(SgroupKey.CtabExpansion, true);
}
} else if (mode == Mode.STRICT) {
handleError("Expected EXP to follow SDS tag");
}
break;
// Multiple Group Parent Atom List [Sgroup]
// M SPA sssn15 aaa ...
// aaa: Atoms in paradigmatic repeating unit of multiple group sss
// Note: To ensure that all current molfile readers consistently
// interpret chemical structures, multiple groups are written
// in their fully expanded state to the molfile. The M SPA atom
// list is a subset of the full atom list that is defined by the
// Sgroup Atom List M SAL entry.
case M_SPA:
sgroup = ensureSgroup(sgroups, readMolfileInt(line, 7));
count = readMolfileInt(line, 10);
Set parentAtomList = sgroup.getValue(SgroupKey.CtabParentAtomList);
if (parentAtomList == null) {
sgroup.putValue(SgroupKey.CtabParentAtomList, parentAtomList = new HashSet());
}
for (int i = 0, st = 14; i < count && st + 3 <= length; i++, st += 4) {
index = readMolfileInt(line, st) - 1;
parentAtomList.add(container.getAtom(offset + index));
}
break;
// Sgroup Component Numbers [Sgroup]
// M SNCnn8 sss ooo ...
// sss: Index of component Sgroup
// ooo: Integer component order (1...256). This limit applies only to MACCS-II
case M_SNC:
count = readMolfileInt(line, 6);
for (int i = 0, st = 10; i < count && st + 7 <= length; i++, st += 8) {
sgroup = ensureSgroup(sgroups,
readMolfileInt(line, st));
sgroup.putValue(SgroupKey.CtabComponentNumber,
readMolfileInt(line, st+4));
}
break;
// M END
//
// This entry goes at the end of the properties block and is required for molfiles which contain a
// version stamp in the counts line.
case M_END:
break LINES;
}
}
if (!sgroups.isEmpty()) {
// load Sgroups into molecule, first we downcast
List sgroupOrgList = new ArrayList<>(sgroups.values());
List sgroupCpyList = new ArrayList<>(sgroupOrgList.size());
for (int i = 0; i < sgroupOrgList.size(); i++) {
Sgroup cpy = sgroupOrgList.get(i).downcast();
sgroupCpyList.add(cpy);
}
// update replaced parents
for (int i = 0; i < sgroupOrgList.size(); i++) {
Sgroup newSgroup = sgroupCpyList.get(i);
Set oldParents = new HashSet<>(newSgroup.getParents());
newSgroup.removeParents(oldParents);
for (Sgroup parent : oldParents) {
newSgroup.addParent(sgroupCpyList.get(sgroupOrgList.indexOf(parent)));
}
}
container.setProperty(CDKConstants.CTAB_SGROUPS, sgroupCpyList);
}
}
private Sgroup ensureSgroup(Map map, int idx) throws CDKException {
Sgroup sgroup = map.get(idx);
if (sgroup == null) {
if (mode == Mode.STRICT)
handleError("Sgroups must first be defined by a STY property");
map.put(idx, sgroup = new Sgroup());
}
return sgroup;
}
/**
* Convert an MDL V2000 stereo value to the CDK {@link IBond.Stereo}. The
* method should only be invoked for single/double bonds. If strict mode is
* enabled irrational bond stereo/types cause errors (e.g. up double bond).
*
* @param stereo stereo value
* @param type bond type
* @return bond stereo
* @throws CDKException the stereo value was invalid (strict mode).
*/
private IBond.Stereo toStereo(final int stereo, final int type) throws CDKException {
switch (stereo) {
case 0:
return type == 2 ? IBond.Stereo.E_Z_BY_COORDINATES : IBond.Stereo.NONE;
case 1:
if (mode == Mode.STRICT && type == 2)
throw new CDKException("stereo flag was 'up' but bond order was 2");
return IBond.Stereo.UP;
case 3:
if (mode == Mode.STRICT && type == 1)
throw new CDKException("stereo flag was 'cis/trans' but bond order was 1");
return IBond.Stereo.E_OR_Z;
case 4:
if (mode == Mode.STRICT && type == 2)
throw new CDKException("stereo flag was 'up/down' but bond order was 2");
return IBond.Stereo.UP_OR_DOWN;
case 6:
if (mode == Mode.STRICT && type == 2)
throw new CDKException("stereo flag was 'down' but bond order was 2");
return IBond.Stereo.DOWN;
}
if (mode == Mode.STRICT) throw new CDKException("unknown bond stereo type: " + stereo);
return IBond.Stereo.NONE;
}
/**
* Determine the length of the line excluding trailing whitespace.
*
* @param str a string
* @return the length when trailing white space is removed
*/
static int length(final String str) {
int i = str.length() - 1;
while (i >= 0 && str.charAt(i) == ' ') {
i--;
}
return i + 1;
}
/**
* Create an atom for the provided symbol. If the atom symbol is a periodic
* element a new 'Atom' is created otherwise if the symbol is an allowed
* query atom ('R', 'Q', 'A', '*', 'L', 'LP') a new 'PseudoAtom' is created.
* If the symbol is invalid an exception is thrown.
*
* @param symbol input symbol
* @param builder chem object builder
* @return a new atom
* @throws CDKException the symbol is not allowed
*/
private IAtom createAtom(String symbol, IChemObjectBuilder builder, int lineNum) throws CDKException {
if (isPeriodicElement(symbol)) return builder.newInstance(IAtom.class, symbol);
if (!isPseudoElement(symbol)) {
handleError("invalid symbol: " + symbol, lineNum, 31, 34);
// when strict only accept labels from the specification
if (mode == Mode.STRICT) throw new CDKException("invalid symbol: " + symbol);
}
// will be renumbered later by RGP if R1, R2 etc. if not renumbered then
// 'R' is a better label than 'R#' if now RGP is specified
if (symbol.equals("R#")) symbol = "R";
IAtom atom = builder.newInstance(IPseudoAtom.class, symbol);
atom.setSymbol(symbol);
atom.setAtomicNumber(0); // avoid NPE downstream
return atom;
}
/**
* Is the symbol a periodic element.
*
* @param symbol a symbol from the input
* @return the symbol is a pseudo atom
*/
private static boolean isPeriodicElement(final String symbol) {
// XXX: PeriodicTable is slow - switch without file IO would be optimal
Integer elem = PeriodicTable.getAtomicNumber(symbol);
return elem != null && elem > 0;
}
/**
* Is the atom symbol a non-periodic element (i.e. pseudo). Valid pseudo
* atoms are 'R#', 'A', 'Q', '*', 'L' and 'LP'. We also accept 'R' but this
* is not listed in the specification.
*
* @param symbol a symbol from the input
* @return the symbol is a valid pseudo element
*/
static boolean isPseudoElement(final String symbol) {
return PSEUDO_LABELS.contains(symbol);
}
/**
* Read a coordinate from an MDL input. The MDL V2000 input coordinate has
* 10 characters, 4 significant figures and is prefixed with whitespace for
* padding: 'xxxxx.xxxx'. Knowing the format allows us to use an optimised
* parser which does not consider exponents etc.
*
* @param line input line
* @param offset first character of the coordinate
* @return the specified value
* @throws CDKException the coordinates specification was not valid
*/
static double readMDLCoordinate(final String line, int offset) throws CDKException {
// to be valid the decimal should be at the fifth index (4 sig fig)
if (line.charAt(offset + 5) != '.') throw new CDKException("invalid coordinate specification");
int start = offset;
while (line.charAt(start) == ' ')
start++;
int sign = sign(line.charAt(start));
if (sign < 0) start++;
int integral = readUInt(line, start, (offset + 5) - start);
int fraction = readUInt(line, offset + 6, 4);
return sign * (integral * 10000l + fraction) / 10000d;
}
/**
* Convert the a character (from an MDL V2000 input) to a charge value:
* 1 = +1, 2 = +2, 3 = +3, 4 = doublet radical, 5 = -1, 6 = -2, 7 = -3.
*
* @param c a character
* @return formal charge
*/
private static int toCharge(final char c) {
switch (c) {
case '1':
return +3;
case '2':
return +2;
case '3':
return +1;
case '4':
return 0; // doublet radical - superseded by M RAD
case '5':
return -1;
case '6':
return -2;
case '7':
return -3;
}
return 0;
}
/**
* Obtain the sign of the character, -1 if the character is '-', +1
* otherwise.
*
* @param c a character
* @return the sign
*/
private static int sign(final char c) {
return c == '-' ? -1 : +1;
}
/**
* Convert a character (ASCII code points) to an integer. If the character
* was not a digit (i.e. space) the value defaults to 0.
*
* @param c a character
* @return the numerical value
*/
private static int toInt(final char c) {
// Character.getNumericalValue allows all of unicode which we don't want
// or need it - imagine an MDL file with roman numerals!
return c >= '0' && c <= '9' ? c - '0' : 0;
}
/**
* Read an unsigned int value from the given index with the expected number
* of digits.
*
* @param line input line
* @param index start index
* @param digits number of digits (max)
* @return an unsigned int
*/
private static int readUInt(final String line, int index, int digits) {
int result = 0;
while (digits-- > 0)
result = (result * 10) + toInt(line.charAt(index++));
return result;
}
/**
* Optimised method for reading a integer from 3 characters in a string at a
* specified index. MDL V2000 Molfile make heavy use of the 3 character ints
* in the atom/bond and property blocks. The integer may be signed and
* pre/post padded with white space.
*
* @param line input
* @param index start index
* @return the value specified in the string
*/
private static int readMolfileInt(final String line, final int index) {
int sign = 1;
int result = 0;
char c;
switch ((c = line.charAt(index))) {
case ' ':
break;
case '-':
sign = -1;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
result = (c - '0');
break;
default:
return 0;
}
switch ((c = line.charAt(index + 1))) {
case ' ':
if (result > 0) return sign * result;
break;
case '-':
if (result > 0) return sign * result;
sign = -1;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
result = (result * 10) + (c - '0');
break;
default:
return sign * result;
}
switch ((c = line.charAt(index + 2))) {
case ' ':
if (result > 0) return sign * result;
break;
case '-':
if (result > 0) return sign * result;
sign = -1;
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
result = (result * 10) + (c - '0');
break;
default:
return sign * result;
}
return sign * result;
}
/**
* Labels the atom at the specified index with the provide label. If the
* atom was not already a pseudo atom then the original atom is replaced.
*
* @param container structure
* @param index atom index to replace
* @param label the label for the atom
* @see IPseudoAtom#setLabel(String)
*/
static void label(final IAtomContainer container, final int index, final String label) {
final IAtom atom = container.getAtom(index);
final IPseudoAtom pseudoAtom = atom instanceof IPseudoAtom ? (IPseudoAtom) atom : container.getBuilder()
.newInstance(IPseudoAtom.class);
if (atom == pseudoAtom) {
pseudoAtom.setLabel(label);
} else {
pseudoAtom.setSymbol(label);
pseudoAtom.setAtomicNumber(0);
pseudoAtom.setPoint2d(atom.getPoint2d());
pseudoAtom.setPoint3d(atom.getPoint3d());
pseudoAtom.setMassNumber(atom.getMassNumber());
pseudoAtom.setFormalCharge(atom.getFormalCharge());
pseudoAtom.setValency(atom.getValency());
pseudoAtom.setLabel(label);
// XXX: would be faster to track all replacements and do it all in one
AtomContainerManipulator.replaceAtomByAtom(container, atom, pseudoAtom);
}
}
/**
* Reads an atom from the input allowing for non-standard formatting (i.e
* truncated lines) and chemical shifts.
*
* @param line input line
* @param builder chem object builder
* @param linecount the current line count
* @return an atom to add to a container
* @throws CDKException a CDK error occurred
* @throws IOException the isotopes file could not be read
*/
private IAtom readAtomSlow(String line, IChemObjectBuilder builder, int linecount) throws CDKException, IOException {
IAtom atom;
Matcher trailingSpaceMatcher = TRAILING_SPACE.matcher(line);
if (trailingSpaceMatcher.find()) {
handleError("Trailing space found", linecount, trailingSpaceMatcher.start(), trailingSpaceMatcher.end());
line = trailingSpaceMatcher.replaceAll("");
}
double x = Double.parseDouble(line.substring(0, 10).trim());
double y = Double.parseDouble(line.substring(10, 20).trim());
double z = Double.parseDouble(line.substring(20, 30).trim());
String element = line.substring(31, Math.min(line.length(), 34)).trim();
if (line.length() < 34) {
handleError("Element atom type does not follow V2000 format type should of length three"
+ " and padded with space if required", linecount, 31, 34);
}
logger.debug("Atom type: ", element);
IsotopeFactory isotopeFactory = Isotopes.getInstance();
if (isotopeFactory.isElement(element)) {
atom = isotopeFactory.configure(builder.newInstance(IAtom.class, element));
} else if ("A".equals(element)) {
atom = builder.newInstance(IPseudoAtom.class, element);
} else if ("Q".equals(element)) {
atom = builder.newInstance(IPseudoAtom.class, element);
} else if ("*".equals(element)) {
atom = builder.newInstance(IPseudoAtom.class, element);
} else if ("LP".equals(element)) {
atom = builder.newInstance(IPseudoAtom.class, element);
} else if ("L".equals(element)) {
atom = builder.newInstance(IPseudoAtom.class, element);
} else if (element.equals("R") || (element.length() > 0 && element.charAt(0) == 'R')) {
logger.debug("Atom ", element, " is not an regular element. Creating a PseudoAtom.");
//check if the element is R
String[] rGroup = element.split("^R");
if (rGroup.length > 1) {
try {
element = "R" + Integer.valueOf(rGroup[(rGroup.length - 1)]);
atom = builder.newInstance(IPseudoAtom.class, element);
} catch (Exception ex) {
// This happens for atoms labeled "R#".
// The Rnumber may be set later on, using RGP line
atom = builder.newInstance(IPseudoAtom.class, "R");
}
} else {
atom = builder.newInstance(IPseudoAtom.class, element);
}
} else {
handleError("Invalid element type. Must be an existing " + "element, or one in: A, Q, L, LP, *.",
linecount, 32, 35);
atom = builder.newInstance(IPseudoAtom.class, element);
atom.setSymbol(element);
}
// store as 3D for now, convert to 2D (if totalZ == 0.0) later
atom.setPoint3d(new Point3d(x, y, z));
// parse further fields
if (line.length() >= 36) {
String massDiffString = line.substring(34, 36).trim();
logger.debug("Mass difference: ", massDiffString);
if (!(atom instanceof IPseudoAtom)) {
try {
int massDiff = Integer.parseInt(massDiffString);
if (massDiff != 0) {
IIsotope major = Isotopes.getInstance().getMajorIsotope(element);
atom.setMassNumber(major.getMassNumber() + massDiff);
}
} catch (NumberFormatException | IOException exception) {
handleError("Could not parse mass difference field.", linecount, 35, 37, exception);
}
} else {
logger.error("Cannot set mass difference for a non-element!");
}
} else {
handleError("Mass difference is missing", linecount, 34, 36);
}
// set the stereo partiy
Integer parity = line.length() > 41 ? Character.digit(line.charAt(41), 10) : 0;
atom.setStereoParity(parity);
if (line.length() >= 51) {
String valenceString = removeNonDigits(line.substring(48, 51));
logger.debug("Valence: ", valenceString);
if (!(atom instanceof IPseudoAtom)) {
try {
int valence = Integer.parseInt(valenceString);
if (valence != 0) {
//15 is defined as 0 in mol files
if (valence == 15)
atom.setValency(0);
else
atom.setValency(valence);
}
} catch (Exception exception) {
handleError("Could not parse valence information field", linecount, 49, 52, exception);
}
} else {
logger.error("Cannot set valence information for a non-element!");
}
}
if (line.length() >= 39) {
String chargeCodeString = line.substring(36, 39).trim();
logger.debug("Atom charge code: ", chargeCodeString);
int chargeCode = Integer.parseInt(chargeCodeString);
if (chargeCode == 0) {
// uncharged species
} else if (chargeCode == 1) {
atom.setFormalCharge(+3);
} else if (chargeCode == 2) {
atom.setFormalCharge(+2);
} else if (chargeCode == 3) {
atom.setFormalCharge(+1);
} else if (chargeCode == 4) {
} else if (chargeCode == 5) {
atom.setFormalCharge(-1);
} else if (chargeCode == 6) {
atom.setFormalCharge(-2);
} else if (chargeCode == 7) {
atom.setFormalCharge(-3);
}
} else {
handleError("Atom charge is missing", linecount, 36, 39);
}
try {
// read the mmm field as position 61-63
String reactionAtomIDString = line.substring(60, 63).trim();
logger.debug("Parsing mapping id: ", reactionAtomIDString);
try {
int reactionAtomID = Integer.parseInt(reactionAtomIDString);
if (reactionAtomID != 0) {
atom.setProperty(CDKConstants.ATOM_ATOM_MAPPING, reactionAtomID);
}
} catch (Exception exception) {
logger.error("Mapping number ", reactionAtomIDString, " is not an integer.");
logger.debug(exception);
}
} catch (Exception exception) {
// older mol files don't have all these fields...
logger.warn("A few fields are missing. Older MDL MOL file?");
}
//shk3: This reads shifts from after the molecule. I don't think this is an official format, but I saw it frequently 80=>78 for alk
if (line.length() >= 78) {
double shift = Double.parseDouble(line.substring(69, 80).trim());
atom.setProperty("first shift", shift);
}
if (line.length() >= 87) {
double shift = Double.parseDouble(line.substring(79, 87).trim());
atom.setProperty("second shift", shift);
}
return atom;
}
/**
* Read a bond line from an MDL V2000 molfile bond block (slow). The
* explicit valence is also modified.
*
* @param line the input from the bond block
* @param builder chem object builder
* @param atoms array of atoms
* @param explicitValence stores the explicit valence of each atom (bond order sum)
* @param linecount the current line count
* @return a new bond
* @throws CDKException the bond line could not be parsed
*/
private IBond readBondSlow(String line, IChemObjectBuilder builder, IAtom[] atoms, int[] explicitValence,
int linecount) throws CDKException {
int atom1 = Integer.parseInt(line.substring(0, 3).trim());
int atom2 = Integer.parseInt(line.substring(3, 6).trim());
int order = Integer.parseInt(line.substring(6, 9).trim());
IBond.Stereo stereo = null;
if (line.length() >= 12) {
int mdlStereo = line.length() > 12 ? Integer.parseInt(line.substring(9, 12).trim()) : Integer.parseInt(line
.substring(9).trim());
if (mdlStereo == 1) {
// MDL up bond
stereo = IBond.Stereo.UP;
} else if (mdlStereo == 6) {
// MDL down bond
stereo = IBond.Stereo.DOWN;
} else if (mdlStereo == 0) {
if (order == 2) {
// double bond stereo defined by coordinates
stereo = IBond.Stereo.E_Z_BY_COORDINATES;
} else {
// bond has no stereochemistry
stereo = IBond.Stereo.NONE;
}
} else if (mdlStereo == 3 && order == 2) {
// unknown E/Z stereochemistry
stereo = IBond.Stereo.E_OR_Z;
} else if (mdlStereo == 4) {
//MDL bond undefined
stereo = IBond.Stereo.UP_OR_DOWN;
}
} else {
handleError("Missing expected stereo field at line: ", linecount, 10, 12);
}
if (logger.isDebugEnabled()) {
logger.debug("Bond: " + atom1 + " - " + atom2 + "; order " + order);
}
// interpret CTfile's special bond orders
IAtom a1 = atoms[atom1 - 1];
IAtom a2 = atoms[atom2 - 1];
IBond newBond;
if (order >= 1 && order <= 3) {
IBond.Order cdkOrder = IBond.Order.SINGLE;
if (order == 2) cdkOrder = IBond.Order.DOUBLE;
if (order == 3) cdkOrder = IBond.Order.TRIPLE;
if (stereo != null) {
newBond = builder.newInstance(IBond.class, a1, a2, cdkOrder, stereo);
} else {
newBond = builder.newInstance(IBond.class, a1, a2, cdkOrder);
}
explicitValence[atom1 - 1] += cdkOrder.numeric();
explicitValence[atom2 - 1] += cdkOrder.numeric();
} else if (order == 4) {
// aromatic bond
if (stereo != null) {
newBond = builder.newInstance(IBond.class, a1, a2, IBond.Order.UNSET, stereo);
} else {
newBond = builder.newInstance(IBond.class, a1, a2, IBond.Order.UNSET);
}
// mark both atoms and the bond as aromatic and raise the SINGLE_OR_DOUBLE-flag
newBond.setFlag(CDKConstants.SINGLE_OR_DOUBLE, true);
newBond.setFlag(CDKConstants.ISAROMATIC, true);
a1.setFlag(CDKConstants.ISAROMATIC, true);
a2.setFlag(CDKConstants.ISAROMATIC, true);
explicitValence[atom1 - 1] = explicitValence[atom2 - 1] = Integer.MIN_VALUE;
} else {
newBond = new CTFileQueryBond(builder);
IAtom[] bondAtoms = {a1, a2};
newBond.setAtoms(bondAtoms);
newBond.setOrder(IBond.Order.UNSET);
CTFileQueryBond.Type queryBondType = null;
switch (order) {
case 5:
queryBondType = CTFileQueryBond.Type.SINGLE_OR_DOUBLE;
break;
case 6:
queryBondType = CTFileQueryBond.Type.SINGLE_OR_AROMATIC;
break;
case 7:
queryBondType = CTFileQueryBond.Type.DOUBLE_OR_AROMATIC;
break;
case 8:
queryBondType = CTFileQueryBond.Type.ANY;
break;
}
((CTFileQueryBond) newBond).setType(queryBondType);
newBond.setStereo(stereo);
explicitValence[atom1 - 1] = explicitValence[atom2 - 1] = Integer.MIN_VALUE;
}
return newBond;
}
/**
* Read the properties from the V2000 block (slow).
*
* @param input input source
* @param container the container with the atoms / bonds loaded
* @param nAtoms the number of atoms in the atom block
* @param linecount the line count
* @throws IOException internal low-level error
* @throws CDKException the properties block could not be parsed
*/
private void readPropertiesSlow(BufferedReader input, IAtomContainer container, int nAtoms, int linecount)
throws IOException, CDKException {
logger.info("Reading property block");
String line;
while (true) {
line = input.readLine();
linecount++;
if (line == null) {
handleError("The expected property block is missing!", linecount, 0, 0);
}
if (line.startsWith("M END")) break;
boolean lineRead = false;
if (line.startsWith("M CHG")) {
// FIXME: if this is encountered for the first time, all
// atom charges should be set to zero first!
int infoCount = Integer.parseInt(line.substring(6, 9).trim());
StringTokenizer st = new StringTokenizer(line.substring(9));
for (int i = 1; i <= infoCount; i++) {
String token = st.nextToken();
int atomNumber = Integer.parseInt(token.trim());
token = st.nextToken();
int charge = Integer.parseInt(token.trim());
container.getAtom(atomNumber - 1).setFormalCharge(charge);
}
} else if (line.matches("A\\s{1,4}\\d+")) {
// Reads the pseudo atom property from the mol file
// The atom number of the to replaced atom
int aliasAtomNumber = Integer.parseInt(line.replaceFirst("A\\s{1,4}", ""));
String alias = input.readLine();
linecount++;
IAtom aliasAtom = container.getAtom(aliasAtomNumber - 1);
// skip if already a pseudoatom
if (aliasAtom instanceof IPseudoAtom) {
((IPseudoAtom) aliasAtom).setLabel(alias);
continue;
}
IAtom newPseudoAtom = container.getBuilder().newInstance(IPseudoAtom.class, alias);
if (aliasAtom.getPoint2d() != null) newPseudoAtom.setPoint2d(aliasAtom.getPoint2d());
if (aliasAtom.getPoint3d() != null) newPseudoAtom.setPoint3d(aliasAtom.getPoint3d());
AtomContainerManipulator.replaceAtomByAtom(container, aliasAtom, newPseudoAtom);
} else if (line.startsWith("M ISO")) {
try {
String countString = line.substring(6, 10).trim();
int infoCount = Integer.parseInt(countString);
StringTokenizer st = new StringTokenizer(line.substring(10));
for (int i = 1; i <= infoCount; i++) {
int atomNumber = Integer.parseInt(st.nextToken().trim());
int absMass = Integer.parseInt(st.nextToken().trim());
if (absMass != 0) {
IAtom isotope = container.getAtom(atomNumber - 1);
isotope.setMassNumber(absMass);
}
}
} catch (NumberFormatException exception) {
String error = "Error (" + exception.getMessage() + ") while parsing line " + linecount + ": "
+ line + " in property block.";
logger.error(error);
handleError("NumberFormatException in isotope information.", linecount, 7, 11, exception);
}
} else if (line.startsWith("M RAD")) {
try {
String countString = line.substring(6, 9).trim();
int infoCount = Integer.parseInt(countString);
StringTokenizer st = new StringTokenizer(line.substring(9));
for (int i = 1; i <= infoCount; i++) {
int atomNumber = Integer.parseInt(st.nextToken().trim());
int spinMultiplicity = Integer.parseInt(st.nextToken().trim());
MDLV2000Writer.SPIN_MULTIPLICITY spin = MDLV2000Writer.SPIN_MULTIPLICITY.NONE;
if (spinMultiplicity > 0) {
IAtom radical = container.getAtom(atomNumber - 1);
switch (spinMultiplicity) {
case 1:
spin = MDLV2000Writer.SPIN_MULTIPLICITY.DOUBLET;
break;
case 2:
spin = MDLV2000Writer.SPIN_MULTIPLICITY.SINGLET;
break;
case 3:
spin = MDLV2000Writer.SPIN_MULTIPLICITY.TRIPLET;
break;
default:
logger.debug("Invalid spin multiplicity found: " + spinMultiplicity);
break;
}
for (int j = 0; j < spin.getSingleElectrons(); j++) {
container.addSingleElectron(container.getBuilder().newInstance(ISingleElectron.class,
radical));
}
}
}
} catch (NumberFormatException exception) {
String error = "Error (" + exception.getMessage() + ") while parsing line " + linecount + ": "
+ line + " in property block.";
logger.error(error);
handleError("NumberFormatException in radical information", linecount, 7, 10, exception);
}
} else if (line.startsWith("G ")) {
try {
String atomNumberString = line.substring(3, 6).trim();
int atomNumber = Integer.parseInt(atomNumberString);
//String whatIsThisString = line.substring(6,9).trim();
String atomName = input.readLine();
// convert Atom into a PseudoAtom
IAtom prevAtom = container.getAtom(atomNumber - 1);
IPseudoAtom pseudoAtom = container.getBuilder().newInstance(IPseudoAtom.class, atomName);
if (prevAtom.getPoint2d() != null) {
pseudoAtom.setPoint2d(prevAtom.getPoint2d());
}
if (prevAtom.getPoint3d() != null) {
pseudoAtom.setPoint3d(prevAtom.getPoint3d());
}
AtomContainerManipulator.replaceAtomByAtom(container, prevAtom, pseudoAtom);
} catch (NumberFormatException exception) {
String error = "Error (" + exception.toString() + ") while parsing line " + linecount + ": " + line
+ " in property block.";
logger.error(error);
handleError("NumberFormatException in group information", linecount, 4, 7, exception);
}
} else if (line.startsWith("M RGP")) {
StringTokenizer st = new StringTokenizer(line);
//Ignore first 3 tokens (overhead).
st.nextToken();
st.nextToken();
st.nextToken();
//Process the R group numbers as defined in RGP line.
while (st.hasMoreTokens()) {
Integer position = Integer.valueOf(st.nextToken());
int rNumber = Integer.valueOf(st.nextToken());
// the container may have already had atoms before the new atoms were read
int index = container.getAtomCount() - nAtoms + position - 1;
IPseudoAtom pseudoAtom = (IPseudoAtom) container.getAtom(index);
if (pseudoAtom != null) {
pseudoAtom.setLabel("R" + rNumber);
}
}
}
if (line.startsWith("V ")) {
Integer atomNumber = Integer.valueOf(line.substring(3, 6).trim());
IAtom atomWithComment = container.getAtom(atomNumber - 1);
atomWithComment.setProperty(CDKConstants.COMMENT, line.substring(7));
}
if (!lineRead) {
logger.warn("Skipping line in property block: ", line);
}
}
}
/**
* Read non-structural data from input and store as properties the provided
* 'container'. Non-structural data appears in a structure data file (SDF)
* after an Molfile and before the record deliminator ('$$$$'). The data
* consists of one or more Data Header and Data blocks, an example is seen
* below.
*
* {@code
* > 29
* 0.9132 - 20.0
*
* > 29
* 63.0 (737 MM)
* 79.0 (42 MM)
*
* > 29
* SYLVAN
*
* > 29
* 09-23-1980
*
* > 29
* F-0213
*
* }
*
*
* @param input input source
* @param container the container
* @throws IOException an error occur whilst reading the input
*/
static void readNonStructuralData(final BufferedReader input, final IAtomContainer container) throws IOException {
String line, header = null;
boolean wrap = false;
final StringBuilder data = new StringBuilder(80);
while (!endOfRecord(line = input.readLine())) {
final String newHeader = dataHeader(line);
if (newHeader != null) {
if (header != null) container.setProperty(header, data.toString());
header = newHeader;
wrap = false;
data.setLength(0);
} else {
if (data.length() > 0 || !line.equals(" ")) line = line.trim();
if (line.isEmpty()) continue;
if (!wrap && data.length() > 0) data.append('\n');
data.append(line);
wrap = line.length() == 80;
}
}
if (header != null) container.setProperty(header, data.toString());
}
/**
* Obtain the field name from a potential SD data header. If the header
* does not contain a field name, then null is returned. The method does
* not currently return field numbers (e.g. DT<n>).
*
* @param line an input line
* @return the field name
*/
static String dataHeader(final String line) {
if (line.length() > 2 && line.charAt(0) != '>' && line.charAt(1) != ' ') return null;
int i = line.indexOf('<', 2);
if (i < 0) return null;
int j = line.indexOf('>', i);
if (j < 0) return null;
return line.substring(i + 1, j);
}
/**
* Is the line the end of a record. A line is the end of a record if it
* is 'null' or is the SDF deliminator, '$$$$'.
*
* @param line a line from the input
* @return the line indicates the end of a record was reached
*/
private static boolean endOfRecord(final String line) {
return line == null || line.equals(RECORD_DELIMITER);
}
/**
* Enumeration of property keys that can be specified in the V2000 property
* block.
*/
enum PropertyKey {
/** Atom Alias. */
ATOM_ALIAS,
/** Atom Value. */
ATOM_VALUE,
/** Group Abbreviation. */
GROUP_ABBREVIATION,
/** Skip lines. */
SKIP,
/** Charge [Generic]. */
M_CHG,
/** Radical [Generic]. */
M_RAD,
/** Isotope [Generic]. */
M_ISO,
/** Ring Bond Count [Query]. */
M_RBC,
/** Substitution Count [Query]. */
M_SUB,
/** Unsaturated Atom [Query]. */
M_UNS,
/** Link Atom [Query]. */
M_LIN,
/** Atom List [Query]. */
M_ALS,
/** Attachment Point [Rgroup]. */
M_APO,
/** Atom Attachment Order [Rgroup]. */
M_AAL,
/** Rgroup Label Location [Rgroup]. */
M_RGP,
/** Rgroup Logic, Unsatisfied Sites, Range of Occurrence [Rgroup]. */
M_LOG,
/** Sgroup Type [Sgroup]. */
M_STY,
/** Sgroup Subtype [Sgroup]. */
M_SST,
/** Sgroup Labels [Sgroup]. */
M_SLB,
/** Sgroup Connectivity [Sgroup]. */
M_SCN,
/** Sgroup Expansion [Sgroup]. */
M_SDS,
/** Sgroup Atom List [Sgroup]. */
M_SAL,
/** Sgroup Bond List [Sgroup]. */
M_SBL,
/** Multiple Group Parent Atom List [Sgroup]. */
M_SPA,
/** Sgroup Subscript [Sgroup]. */
M_SMT,
/** Sgroup Correspondence [Sgroup]. */
M_CRS,
/** Sgroup Display Information [Sgroup]. */
M_SDI,
/** Superatom Bond and Vector Information [Sgroup]. */
M_SBV,
/** Data Sgroup Field Description [Sgroup]. */
M_SDT,
/** Data Sgroup Display Information [Sgroup]. */
M_SDD,
/** Data Sgroup Data. */
M_SCD,
/** Data Sgroup Data. */
M_SED,
/** Sgroup Hierarchy Information. */
M_SPL,
/** Sgroup Component Numbers. */
M_SNC,
/** Sgroup Bracket Style. */
M_SBT,
/** 3D Feature Properties. */
M_$3D,
/** ACDLabs Atom Label */
M_ZZC,
/** End of Block. */
M_END,
/** Non-property header. */
UNKNOWN;
/** Index of 'M XXX' properties for quick lookup. */
private static final Map mSuffix = new HashMap(60);
static {
for (PropertyKey p : values()) {
if (p.name().charAt(0) == 'M') mSuffix.put(p.name().substring(2, 5), p);
}
}
/**
* Determine the property key of the provided line.
*
* @param line an property line
* @return the key (defaults to {@link #UNKNOWN})
*/
static PropertyKey of(final String line) {
if (line.length() < 5) return UNKNOWN;
switch (line.charAt(0)) {
case 'A':
if (line.charAt(1) == ' ' && line.charAt(2) == ' ') return ATOM_ALIAS;
return UNKNOWN;
case 'G':
if (line.charAt(1) == ' ' && line.charAt(2) == ' ') return GROUP_ABBREVIATION;
return UNKNOWN;
case 'S':
if (line.charAt(1) == ' ' && line.charAt(2) == ' ') return SKIP;
return UNKNOWN;
case 'V':
if (line.charAt(1) == ' ' && line.charAt(2) == ' ') return ATOM_VALUE;
return UNKNOWN;
case 'M':
if (line.charAt(1) != ' ' || line.charAt(2) != ' ') return UNKNOWN;
PropertyKey property = mSuffix.get(line.substring(3, 6));
if (property != null) return property;
return UNKNOWN;
}
return UNKNOWN;
}
}
/**
* Defines the version of the CTab.
*/
enum CTabVersion {
V2000, V3000, UNSPECIFIED;
/**
* Given a CTab header, what version was specified. The version
* is identifier in the by the presence of 'V[2|3]000'. If not
* version tag is present the version is unspecified.
*
* 5 5 0 0 0 0 999 V2000
* 0 0 0 0 0 0 999 V3000
*
* @param header input line (non-null)
* @return the CTab version
*/
static CTabVersion ofHeader(String header) {
if (header.length() < 39) return UNSPECIFIED;
char c = header.charAt(34);
if (c != 'v' && c != 'V') return UNSPECIFIED;
if (header.charAt(35) == '2') // could check for '000'
return V2000;
if (header.charAt(35) == '3') // could check for '000'
return V3000;
return UNSPECIFIED;
}
}
}