All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openscience.cdk.io.MDLV3000Reader Maven / Gradle / Ivy

There is a newer version: 2.10
Show newest version
/* Copyright (C) 2006-2008  Egon Willighagen 
 *
 * Contact: cdk-devel@lists.sourceforge.net
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 */
package org.openscience.cdk.io;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Hashtable;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.StringTokenizer;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.vecmath.Point2d;
import javax.vecmath.Point3d;

import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.config.Isotopes;
import org.openscience.cdk.config.IsotopeFactory;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IChemObject;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.interfaces.IPseudoAtom;
import org.openscience.cdk.io.formats.IResourceFormat;
import org.openscience.cdk.io.formats.MDLV3000Format;
import org.openscience.cdk.sgroup.Sgroup;
import org.openscience.cdk.sgroup.SgroupType;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import org.openscience.cdk.tools.manipulator.BondManipulator;

/**
 * Class that implements the MDL mol V3000 format. This reader reads the
 * element symbol and 2D or 3D coordinates from the ATOM block.
 *
 * @cdk.module io
 * @cdk.githash
 * @cdk.iooptions
 *
 * @author      Egon Willighagen 
 * @cdk.created 2006
 *
 * @cdk.keyword MDL molfile V3000
 * @cdk.require java1.4+
 */
public class MDLV3000Reader extends DefaultChemObjectReader {

    BufferedReader              input  = null;
    private static ILoggingTool logger = LoggingToolFactory.createLoggingTool(MDLV3000Reader.class);

    private Pattern             keyValueTuple;
    private Pattern             keyValueTuple2;

    private int                 lineNumber;

    public MDLV3000Reader(Reader in) {
        this(in, Mode.RELAXED);
    }

    public MDLV3000Reader(Reader in, Mode mode) {
        input = new BufferedReader(in);
        initIOSettings();
        super.mode = mode;
        /* compile patterns */
        keyValueTuple = Pattern.compile("\\s*(\\w+)=([^\\s]*)(.*)"); // e.g. CHG=-1
        keyValueTuple2 = Pattern.compile("\\s*(\\w+)=\\(([^\\)]*)\\)(.*)"); // e.g. ATOMS=(1 31)
        lineNumber = 0;
    }

    public MDLV3000Reader(InputStream input) {
        this(input, Mode.RELAXED);
    }

    public MDLV3000Reader(InputStream input, Mode mode) {
        this(new InputStreamReader(input), mode);
    }

    public MDLV3000Reader() {
        this(new StringReader(""));
    }

    @Override
    public IResourceFormat getFormat() {
        return MDLV3000Format.getInstance();
    }

    @Override
    public void setReader(Reader input) throws CDKException {
        if (input instanceof BufferedReader) {
            this.input = (BufferedReader) input;
        } else {
            this.input = new BufferedReader(input);
        }
        lineNumber = 0;
    }

    @Override
    public void setReader(InputStream input) throws CDKException {
        setReader(new InputStreamReader(input));
    }

    @Override
    public boolean accepts(Class classObject) {
        Class[] interfaces = classObject.getInterfaces();
        for (int i = 0; i < interfaces.length; i++) {
            if (IAtomContainer.class.equals(interfaces[i])) return true;
        }
        if (IAtomContainer.class.equals(classObject)) return true;
        Class superClass = classObject.getSuperclass();
        if (superClass != null) return this.accepts(superClass);
        return false;
    }

    @Override
    public  T read(T object) throws CDKException {
        if (object instanceof IAtomContainer) {
            return (T) readMolecule(object.getBuilder());
        }
        return null;
    }

    public IAtomContainer readMolecule(IChemObjectBuilder builder) throws CDKException {
        return readConnectionTable(builder);
    }

    public IAtomContainer readConnectionTable(IChemObjectBuilder builder) throws CDKException {
        logger.info("Reading CTAB block");
        IAtomContainer readData = builder.newInstance(IAtomContainer.class);
        boolean foundEND = false;
        String lastLine = readHeader(readData);
        while (isReady() && !foundEND) {
            String command = readCommand(lastLine);
            logger.debug("command found: " + command);
            if ("END CTAB".equals(command)) {
                foundEND = true;
            } else if ("BEGIN CTAB".equals(command)) {
                // that's fine
            } else if ("COUNTS".equals(command)) {
                // don't think I need to parse this
            } else if ("BEGIN ATOM".equals(command)) {
                readAtomBlock(readData);
            } else if ("BEGIN BOND".equals(command)) {
                readBondBlock(readData);
            } else if ("BEGIN SGROUP".equals(command)) {
                readSGroup(readData);
            } else {
                logger.warn("Unrecognized command: " + command);
            }
            lastLine = readLine();
        }
        return readData;
    }

    /**
     * @throws CDKException when no file content is detected
     * @return Last line read
     */
    public String readHeader(IAtomContainer readData) throws CDKException {
        // read four lines
        String line1 = readLine();
        if (line1 == null) {
            throw new CDKException("Expected a header line, but found nothing.");
        }
        if (line1.length() > 0) {
            if (line1.startsWith("M  V30")) {
                // no header
                return line1;
            }
            readData.setProperty(CDKConstants.TITLE, line1);
        }
        readLine();
        String line3 = readLine();
        if (line3.length() > 0) readData.setProperty(CDKConstants.COMMENT, line3);
        String line4 = readLine();
        if (!line4.contains("3000")) {
            throw new CDKException("This file is not a MDL V3000 molfile.");
        }
        return readLine();
    }

    /**
     * Reads the atoms, coordinates and charges.
     *
     * 

IMPORTANT: it does not support the atom list and its negation! */ public void readAtomBlock(IAtomContainer readData) throws CDKException { logger.info("Reading ATOM block"); IsotopeFactory isotopeFactory; try { isotopeFactory = Isotopes.getInstance(); } catch (IOException exception) { throw new CDKException("Could not initiate the IsotopeFactory.", exception); } int RGroupCounter = 1; int Rnumber = 0; String[] rGroup = null; boolean foundEND = false; while (isReady() && !foundEND) { String command = readCommand(readLine()); if ("END ATOM".equals(command)) { // FIXME: should check whether 3D is really 2D foundEND = true; } else { logger.debug("Parsing atom from: " + command); IAtom atom = readData.getBuilder().newInstance(IAtom.class); StringTokenizer tokenizer = new StringTokenizer(command); // parse the index try { atom.setID(tokenizer.nextToken()); } catch (Exception exception) { String error = "Error while parsing atom index"; logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } // parse the element String element = tokenizer.nextToken(); if (isotopeFactory.isElement(element)) { atom = isotopeFactory.configure(readData.getBuilder().newInstance(IAtom.class, element)); } else if ("A".equals(element)) { atom = readData.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("Q".equals(element)) { atom = readData.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("*".equals(element)) { atom = readData.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("LP".equals(element)) { atom = readData.getBuilder().newInstance(IPseudoAtom.class, element); } else if ("L".equals(element)) { atom = readData.getBuilder().newInstance(IPseudoAtom.class, element); } else if (element.length() > 0 && element.charAt(0) == 'R') { logger.debug("Atom ", element, " is not an regular element. Creating a PseudoAtom."); //check if the element is R rGroup = element.split("^R"); if (rGroup.length > 1) { try { Rnumber = Integer.valueOf(rGroup[(rGroup.length - 1)]).intValue(); RGroupCounter = Rnumber; } catch (Exception ex) { Rnumber = RGroupCounter; RGroupCounter++; } element = "R" + Rnumber; } atom = readData.getBuilder().newInstance(IPseudoAtom.class, element); } else { if (mode == ISimpleChemObjectReader.Mode.STRICT) { throw new CDKException( "Invalid element type. Must be an existing element, or one in: A, Q, L, LP, *."); } atom = readData.getBuilder().newInstance(IPseudoAtom.class, element); atom.setSymbol(element); } // parse atom coordinates (in Angstrom) try { String xString = tokenizer.nextToken(); String yString = tokenizer.nextToken(); String zString = tokenizer.nextToken(); double x = Double.parseDouble(xString); double y = Double.parseDouble(yString); double z = Double.parseDouble(zString); atom.setPoint3d(new Point3d(x, y, z)); atom.setPoint2d(new Point2d(x, y)); // FIXME: dirty! } catch (Exception exception) { String error = "Error while parsing atom coordinates"; logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } // atom-atom mapping String mapping = tokenizer.nextToken(); if (!mapping.equals("0")) { logger.warn("Skipping atom-atom mapping: " + mapping); } // else: default 0 is no mapping defined // the rest are key value things if (command.indexOf('=') != -1) { Map options = parseOptions(exhaustStringTokenizer(tokenizer)); Iterator keys = options.keySet().iterator(); while (keys.hasNext()) { String key = keys.next(); String value = options.get(key); try { if (key.equals("CHG")) { int charge = Integer.parseInt(value); if (charge != 0) { // zero is no charge specified atom.setFormalCharge(charge); } } else { logger.warn("Not parsing key: " + key); } } catch (Exception exception) { String error = "Error while parsing key/value " + key + "=" + value + ": " + exception.getMessage(); logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } } } // store atom readData.addAtom(atom); logger.debug("Added atom: " + atom); } } } /** * Reads the bond atoms, order and stereo configuration. */ public void readBondBlock(IAtomContainer readData) throws CDKException { logger.info("Reading BOND block"); boolean foundEND = false; while (isReady() && !foundEND) { String command = readCommand(readLine()); if ("END BOND".equals(command)) { foundEND = true; } else { logger.debug("Parsing bond from: " + command); StringTokenizer tokenizer = new StringTokenizer(command); IBond bond = readData.getBuilder().newInstance(IBond.class); // parse the index try { String indexString = tokenizer.nextToken(); bond.setID(indexString); } catch (Exception exception) { String error = "Error while parsing bond index"; logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } // parse the order try { String orderString = tokenizer.nextToken(); int order = Integer.parseInt(orderString); if (order >= 4) { logger.warn("Query order types are not supported (yet). File a bug if you need it"); } else { bond.setOrder(BondManipulator.createBondOrder((double) order)); } } catch (Exception exception) { String error = "Error while parsing bond index"; logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } // parse index atom 1 try { String indexAtom1String = tokenizer.nextToken(); int indexAtom1 = Integer.parseInt(indexAtom1String); IAtom atom1 = readData.getAtom(indexAtom1 - 1); bond.setAtom(atom1, 0); } catch (Exception exception) { String error = "Error while parsing index atom 1 in bond"; logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } // parse index atom 2 try { String indexAtom2String = tokenizer.nextToken(); int indexAtom2 = Integer.parseInt(indexAtom2String); IAtom atom2 = readData.getAtom(indexAtom2 - 1); bond.setAtom(atom2, 1); } catch (Exception exception) { String error = "Error while parsing index atom 2 in bond"; logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } List endpts = new ArrayList<>(); String attach = null; // the rest are key=value fields if (command.indexOf('=') != -1) { Map options = parseOptions(exhaustStringTokenizer(tokenizer)); for (String key : options.keySet()) { String value = options.get(key); try { if (key.equals("CFG")) { int configuration = Integer.parseInt(value); if (configuration == 0) { bond.setStereo(IBond.Stereo.NONE); } else if (configuration == 1) { bond.setStereo(IBond.Stereo.UP); } else if (configuration == 2) { bond.setStereo((IBond.Stereo) CDKConstants.UNSET); } else if (configuration == 3) { bond.setStereo(IBond.Stereo.DOWN); } } else if (key.equals("ENDPTS")) { String[] endptStr = value.split(" "); // skip first value that is count for (int i = 1; i < endptStr.length; i++) { endpts.add(readData.getAtom(Integer.parseInt(endptStr[i]) - 1)); } } else if (key.equals("ATTACH")) { attach = value; } else { logger.warn("Not parsing key: " + key); } } catch (Exception exception) { String error = "Error while parsing key/value " + key + "=" + value + ": " + exception.getMessage(); logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } } } // storing bond readData.addBond(bond); // storing positional variation if ("ANY".equals(attach)) { Sgroup sgroup = new Sgroup(); sgroup.setType(SgroupType.ExtMulticenter); sgroup.addAtom(bond.getAtom(0)); // could be other end? sgroup.addBond(bond); for (IAtom endpt : endpts) sgroup.addAtom(endpt); List sgroups = readData.getProperty(CDKConstants.CTAB_SGROUPS); if (sgroups == null) readData.setProperty(CDKConstants.CTAB_SGROUPS, sgroups = new ArrayList<>(4)); sgroups.add(sgroup); } logger.debug("Added bond: " + bond); } } } /** * Reads labels. */ public void readSGroup(IAtomContainer readData) throws CDKException { boolean foundEND = false; while (isReady() && !foundEND) { String command = readCommand(readLine()); if ("END SGROUP".equals(command)) { foundEND = true; } else { logger.debug("Parsing Sgroup line: " + command); StringTokenizer tokenizer = new StringTokenizer(command); // parse the index String indexString = tokenizer.nextToken(); logger.warn("Skipping external index: " + indexString); // parse command type String type = tokenizer.nextToken(); // parse the external index String externalIndexString = tokenizer.nextToken(); logger.warn("Skipping external index: " + externalIndexString); // the rest are key=value fields Map options = new Hashtable(); if (command.indexOf('=') != -1) { options = parseOptions(exhaustStringTokenizer(tokenizer)); } // now interpret line if (type.startsWith("SUP")) { Iterator keys = options.keySet().iterator(); int atomID = -1; String label = ""; while (keys.hasNext()) { String key = keys.next(); String value = options.get(key); try { if (key.equals("ATOMS")) { StringTokenizer atomsTokenizer = new StringTokenizer(value); Integer.parseInt(atomsTokenizer.nextToken()); // should be 1, int atomCount = atomID = Integer.parseInt(atomsTokenizer.nextToken()); } else if (key.equals("LABEL")) { label = value; } else { logger.warn("Not parsing key: " + key); } } catch (Exception exception) { String error = "Error while parsing key/value " + key + "=" + value + ": " + exception.getMessage(); logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } if (atomID != -1 && label.length() > 0) { IAtom original = readData.getAtom(atomID - 1); IAtom replacement = original; if (!(original instanceof IPseudoAtom)) { replacement = readData.getBuilder().newInstance(IPseudoAtom.class, original); } ((IPseudoAtom) replacement).setLabel(label); if (replacement != original) AtomContainerManipulator.replaceAtomByAtom(readData, original, replacement); } } } else { logger.warn("Skipping unrecognized SGROUP type: " + type); } } } } /** * Reads the command on this line. If the line is continued on the next, that * part is added. * * @return Returns the command on this line. */ private String readCommand(String line) throws CDKException { if (line.startsWith("M V30 ")) { String command = line.substring(7); if (command.endsWith("-")) { command = command.substring(0, command.length() - 1); command += readCommand(readLine()); } return command; } else { throw new CDKException("Could not read MDL file: unexpected line: " + line); } } private Map parseOptions(String string) throws CDKException { Map keyValueTuples = new Hashtable(); while (string.length() >= 3) { logger.debug("Matching remaining option string: " + string); Matcher tuple1Matcher = keyValueTuple2.matcher(string); if (tuple1Matcher.matches()) { String key = tuple1Matcher.group(1); String value = tuple1Matcher.group(2); string = tuple1Matcher.group(3); logger.debug("Found key: " + key); logger.debug("Found value: " + value); keyValueTuples.put(key, value); } else { Matcher tuple2Matcher = keyValueTuple.matcher(string); if (tuple2Matcher.matches()) { String key = tuple2Matcher.group(1); String value = tuple2Matcher.group(2); string = tuple2Matcher.group(3); logger.debug("Found key: " + key); logger.debug("Found value: " + value); keyValueTuples.put(key, value); } else { logger.warn("Quiting; could not parse: " + string + "."); string = ""; } } } return keyValueTuples; } public String exhaustStringTokenizer(StringTokenizer tokenizer) { StringBuffer buffer = new StringBuffer(); buffer.append(' '); while (tokenizer.hasMoreTokens()) { buffer.append(tokenizer.nextToken()); buffer.append(' '); } return buffer.toString(); } public String readLine() throws CDKException { String line = null; try { line = input.readLine(); lineNumber++; logger.debug("read line " + lineNumber + ":", line); } catch (Exception exception) { String error = "Unexpected error while reading file: " + exception.getMessage(); logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } return line; } public boolean isReady() throws CDKException { try { return input.ready(); } catch (Exception exception) { String error = "Unexpected error while reading file: " + exception.getMessage(); logger.error(error); logger.debug(exception); throw new CDKException(error, exception); } } @Override public void close() throws IOException { input.close(); } private void initIOSettings() {} }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy