All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openscience.cdk.io.MDLV2000Writer Maven / Gradle / Ivy

There is a newer version: 2.10
Show newest version
/* Copyright (C) 1997-2007  The Chemistry Development Kit (CDK) project
 *                    2009  Egon Willighagen 
 *                    2010  Mark Rijnbeek 
 *
 * Contact: [email protected]
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 * All we ask is that proper credit is given for our work, which includes
 * - but is not limited to - adding the above copyright notice to the beginning
 * of your source code files, and to any copyright notice that you may distribute
 * with programs based on this work.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 */
package org.openscience.cdk.io;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.nio.charset.StandardCharsets;
import java.text.NumberFormat;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.openscience.cdk.CDKConstants;
import org.openscience.cdk.config.Isotopes;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IBond.Order;
import org.openscience.cdk.interfaces.IChemFile;
import org.openscience.cdk.interfaces.IChemModel;
import org.openscience.cdk.interfaces.IChemObject;
import org.openscience.cdk.interfaces.IChemSequence;
import org.openscience.cdk.interfaces.IPseudoAtom;
import org.openscience.cdk.interfaces.IStereoElement;
import org.openscience.cdk.interfaces.ITetrahedralChirality;
import org.openscience.cdk.io.formats.IResourceFormat;
import org.openscience.cdk.io.formats.MDLFormat;
import org.openscience.cdk.io.setting.BooleanIOSetting;
import org.openscience.cdk.io.setting.IOSetting;
import org.openscience.cdk.sgroup.Sgroup;
import org.openscience.cdk.sgroup.SgroupBracket;
import org.openscience.cdk.sgroup.SgroupKey;
import org.openscience.cdk.sgroup.SgroupType;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;
import org.openscience.cdk.tools.manipulator.AtomContainerManipulator;
import org.openscience.cdk.tools.manipulator.ChemFileManipulator;

/**
 * Writes MDL molfiles, which contains a single molecule (see {@cdk.cite DAL92}).
 * For writing a MDL molfile you can this code:
 * 
 * MDLV2000Writer writer = new MDLV2000Writer(
 *   new FileWriter(new File("output.mol"))
 * );
 * writer.write((IAtomContainer)molecule);
 * writer.close();
 * 
*

*

The writer has two IO settings: one for writing 2D coordinates, even if * 3D coordinates are given for the written data; the second writes aromatic * bonds as bond type 4, which is, strictly speaking, a query bond type, but * my many tools used to reflect aromaticity. The full IO setting API is * explained in CDK News {@cdk.cite WILLIGHAGEN2004}. One programmatic option * to set the option for writing 2D coordinates looks like: *

 * Properties customSettings = new Properties();
 * customSettings.setProperty(
 *  "ForceWriteAs2DCoordinates", "true"
 * );
 * PropertiesListener listener =
 *   new PropertiesListener(customSettings);
 * writer.addChemObjectIOListener(listener);
 * 
* * @cdk.module io * @cdk.githash * @cdk.iooptions * @cdk.keyword file format, MDL molfile */ public class MDLV2000Writer extends DefaultChemObjectWriter { private final static ILoggingTool logger = LoggingToolFactory.createLoggingTool(MDLV2000Writer.class); // regular expression to capture R groups with attached numbers private Pattern NUMERED_R_GROUP = Pattern.compile("R(\\d+)"); /** * Enumeration of all valid radical values. */ public enum SPIN_MULTIPLICITY { NONE(0, 0), SINGLET(2, 1), DOUBLET(1, 2), TRIPLET(3, 2); // the radical SDF value private final int value; // the corresponding number of single electrons private final int singleElectrons; private SPIN_MULTIPLICITY(int value, int singleElectrons) { this.value = value; this.singleElectrons = singleElectrons; } /** * Radical value for the spin multiplicity in the properties block. * * @return the radical value */ public int getValue() { return value; } /** * The number of single electrons that correspond to the spin multiplicity. * * @return the number of single electrons */ public int getSingleElectrons() { return singleElectrons; } /** * Create a SPIN_MULTIPLICITY instance for the specified value. * * @param value input value (in the property block) * @return instance * @throws CDKException unknown spin multiplicity value */ public static SPIN_MULTIPLICITY ofValue(int value) throws CDKException { switch (value) { case 0: return NONE; case 1: return DOUBLET; case 2: return SINGLET; case 3: return TRIPLET; default: throw new CDKException("unknown spin multiplicity: " + value); } } } // number of entries on line; value = 1 to 8 private static final int NN8 = 8; // spacing between entries on line private static final int WIDTH = 3; private BooleanIOSetting forceWriteAs2DCoords; // The next two options are MDL Query format options, not really // belonging to the MDLV2000 format, and will be removed when // a MDLV2000QueryWriter is written. /* * Should aromatic bonds be written as bond type 4? If true, this makes the * output a query file. */ private BooleanIOSetting writeAromaticBondTypes; /* Should atomic valencies be written in the Query format. */ @Deprecated private BooleanIOSetting writeQueryFormatValencies; private BufferedWriter writer; /** * Constructs a new MDLWriter that can write an {@link IAtomContainer} * to the MDL molfile format. * * @param out The Writer to write to */ public MDLV2000Writer(Writer out) { if (out instanceof BufferedWriter) { writer = (BufferedWriter) out; } else { writer = new BufferedWriter(out); } initIOSettings(); } /** * Constructs a new MDLWriter that can write an {@link IAtomContainer} * to a given OutputStream. * * @param output The OutputStream to write to */ public MDLV2000Writer(OutputStream output) { this(new OutputStreamWriter(output, StandardCharsets.UTF_8)); } public MDLV2000Writer() { this(new StringWriter()); } @Override public IResourceFormat getFormat() { return MDLFormat.getInstance(); } @Override public void setWriter(Writer out) throws CDKException { if (out instanceof BufferedWriter) { writer = (BufferedWriter) out; } else { writer = new BufferedWriter(out); } } @Override public void setWriter(OutputStream output) throws CDKException { setWriter(new OutputStreamWriter(output)); } /** * Flushes the output and closes this object. */ @Override public void close() throws IOException { writer.close(); } @Override public boolean accepts(Class classObject) { Class[] interfaces = classObject.getInterfaces(); for (int i = 0; i < interfaces.length; i++) { if (IAtomContainer.class.equals(interfaces[i])) return true; if (IChemFile.class.equals(interfaces[i])) return true; if (IChemModel.class.equals(interfaces[i])) return true; } if (IAtomContainer.class.equals(classObject)) return true; if (IChemFile.class.equals(classObject)) return true; if (IChemModel.class.equals(classObject)) return true; Class superClass = classObject.getSuperclass(); if (superClass != null) return this.accepts(superClass); return false; } /** * Writes a {@link IChemObject} to the MDL molfile formated output. * It can only output ChemObjects of type {@link IChemFile}, * {@link IChemObject} and {@link IAtomContainer}. * * @param object {@link IChemObject} to write * @see #accepts(Class) */ @Override public void write(IChemObject object) throws CDKException { customizeJob(); try { if (object instanceof IChemFile) { writeChemFile((IChemFile) object); return; } else if (object instanceof IChemModel) { IChemFile file = object.getBuilder().newInstance(IChemFile.class); IChemSequence sequence = object.getBuilder().newInstance(IChemSequence.class); sequence.addChemModel((IChemModel) object); file.addChemSequence(sequence); writeChemFile((IChemFile) file); return; } else if (object instanceof IAtomContainer) { writeMolecule((IAtomContainer) object); return; } } catch (Exception ex) { logger.error(ex.getMessage()); logger.debug(ex); throw new CDKException("Exception while writing MDL file: " + ex.getMessage(), ex); } throw new CDKException("Only supported is writing of IChemFile, " + "IChemModel, and IAtomContainer objects."); } private void writeChemFile(IChemFile file) throws Exception { IAtomContainer bigPile = file.getBuilder().newInstance(IAtomContainer.class); for (IAtomContainer container : ChemFileManipulator.getAllAtomContainers(file)) { bigPile.add(container); if (container.getProperty(CDKConstants.TITLE) != null) { if (bigPile.getProperty(CDKConstants.TITLE) != null) bigPile.setProperty(CDKConstants.TITLE, bigPile.getProperty(CDKConstants.TITLE) + "; " + container.getProperty(CDKConstants.TITLE)); else bigPile.setProperty(CDKConstants.TITLE, container.getProperty(CDKConstants.TITLE)); } if (container.getProperty(CDKConstants.REMARK) != null) { if (bigPile.getProperty(CDKConstants.REMARK) != null) bigPile.setProperty(CDKConstants.REMARK, bigPile.getProperty(CDKConstants.REMARK) + "; " + container.getProperty(CDKConstants.REMARK)); else bigPile.setProperty(CDKConstants.REMARK, container.getProperty(CDKConstants.REMARK)); } } writeMolecule(bigPile); } /** * Writes a Molecule to an OutputStream in MDL sdf format. * * @param container Molecule that is written to an OutputStream */ public void writeMolecule(IAtomContainer container) throws Exception { String line = ""; Map rgroups = null; Map aliases = null; // write header block // lines get shortened to 80 chars, that's in the spec String title = (String) container.getProperty(CDKConstants.TITLE); if (title == null) title = ""; if (title.length() > 80) title = title.substring(0, 80); writer.write(title); writer.newLine(); /* * From CTX spec This line has the format: * IIPPPPPPPPMMDDYYHHmmddSSssssssssssEEEEEEEEEEEERRRRRR (FORTRAN: * A2<--A8--><---A10-->A2I2<--F10.5-><---F12.5--><-I6-> ) User's first * and last initials (l), program name (P), date/time (M/D/Y,H:m), * dimensional codes (d), scaling factors (S, s), energy (E) if modeling * program input, internal registry number (R) if input through MDL * form. A blank line can be substituted for line 2. */ writer.write(" CDK "); writer.write(new SimpleDateFormat("MMddyyHHmm").format(System.currentTimeMillis())); writer.newLine(); String comment = (String) container.getProperty(CDKConstants.REMARK); if (comment == null) comment = ""; if (comment.length() > 80) comment = comment.substring(0, 80); writer.write(comment); writer.newLine(); // write Counts line line += formatMDLInt(container.getAtomCount(), 3); line += formatMDLInt(container.getBondCount(), 3); line += " 0 0 0 0 0 0 0 0999 V2000"; writer.write(line); writer.newLine(); // index stereo elements for setting atom parity values Map atomstereo = new HashMap<>(); Map atomindex = new HashMap<>(); for (IStereoElement element : container.stereoElements()) if (element instanceof ITetrahedralChirality) atomstereo.put(((ITetrahedralChirality) element).getChiralAtom(), (ITetrahedralChirality) element); for (IAtom atom : container.atoms()) atomindex.put(atom, atomindex.size()); // write Atom block for (int f = 0; f < container.getAtomCount(); f++) { IAtom atom = container.getAtom(f); line = ""; if (atom.getPoint3d() != null && !forceWriteAs2DCoords.isSet()) { line += formatMDLFloat((float) atom.getPoint3d().x); line += formatMDLFloat((float) atom.getPoint3d().y); line += formatMDLFloat((float) atom.getPoint3d().z) + " "; } else if (atom.getPoint2d() != null) { line += formatMDLFloat((float) atom.getPoint2d().x); line += formatMDLFloat((float) atom.getPoint2d().y); line += " 0.0000 "; } else { // if no coordinates available, then output a number // of zeros line += formatMDLFloat((float) 0.0); line += formatMDLFloat((float) 0.0); line += formatMDLFloat((float) 0.0) + " "; } if (container.getAtom(f) instanceof IPseudoAtom) { //according to http://www.google.co.uk/url?sa=t&ct=res&cd=2&url=http%3A%2F%2Fwww.mdl.com%2Fdownloads%2Fpublic%2Fctfile%2Fctfile.pdf&ei=MsJjSMbjAoyq1gbmj7zCDQ&usg=AFQjCNGaJSvH4wYy4FTXIaQ5f7hjoTdBAw&sig2=eSfruNOSsdMFdlrn7nhdAw an R group is written as R# IPseudoAtom pseudoAtom = (IPseudoAtom) container.getAtom(f); String label = pseudoAtom.getLabel(); if (label == null) // set to empty string if null label = ""; // firstly check if it's a numbered R group Matcher matcher = NUMERED_R_GROUP.matcher(label); if (pseudoAtom.getSymbol().equals("R") && !label.isEmpty() && matcher.matches()) { line += "R# "; if (rgroups == null) { // we use a tree map to ensure the output order is always the same rgroups = new TreeMap(); } rgroups.put(f + 1, Integer.parseInt(matcher.group(1))); } // not a numbered R group - note the symbol may still be R else { // note: no distinction made between alias and pseudo atoms - normally // aliases maintain their original symbol while pseudo atoms are // written with a 'A' in the atom block // if the label is longer then 3 characters we need // to use an alias. if (label.length() > 3) { if (aliases == null) aliases = new TreeMap(); aliases.put(f + 1, label); // atom index to alias line += formatMDLString(atom.getSymbol(), 3); } else { // label is short enough to fit in the atom block // make sure it's not empty if (!label.isEmpty()) line += formatMDLString(label, 3); else line += formatMDLString(atom.getSymbol(), 3); } } } else { line += formatMDLString(container.getAtom(f).getSymbol(), 3); } final ITetrahedralChirality tc = atomstereo.get(atom); if (tc == null) { line += " 0 0 0 0 0"; } else { int parity = tc.getStereo() == ITetrahedralChirality.Stereo.CLOCKWISE ? 1 : 2; IAtom focus = tc.getChiralAtom(); IAtom[] carriers = tc.getLigands(); int hidx = -1; for (int i = 0; i < 4; i++) { // hydrogen position if (carriers[i] == focus || carriers[i].getAtomicNumber() == 1) { if (hidx >= 0) parity = 0; hidx = i; } } if (parity != 0) { for (int i = 0; i < 4; i++) { for (int j = i + 1; j < 4; j++) { int a = atomindex.get(carriers[i]); int b = atomindex.get(carriers[j]); if (i == hidx) a = container.getAtomCount(); if (j == hidx) b = container.getAtomCount(); if (a > b) parity ^= 0x3; } } } line += String.format(" 0 0 %d 0 0", parity); } // write valence - this is a bit of pain as the CDK has both // valence and implied hydrogen counts making life a lot more // difficult than it needs to be - we also have formal // neighbor count but to avoid more verbosity that check has been // omitted { try { // slow but neat int explicitValence = (int) AtomContainerManipulator.getBondOrderSum(container, atom); int charge = atom.getFormalCharge() == null ? 0 : atom.getFormalCharge(); Integer element = atom.getAtomicNumber(); if (element == null) { line += formatMDLInt(0, 3); } else { int implied = MDLValence.implicitValence(element, charge, explicitValence); if (atom.getValency() != null && atom.getImplicitHydrogenCount() != null) { int valence = atom.getValency(); int actual = explicitValence + atom.getImplicitHydrogenCount(); // valence from h count differs from field? we still // set to default - which one has more merit? if (valence != actual || implied == atom.getValency()) line += formatMDLInt(0, 3); else if (valence == 0) line += formatMDLInt(15, 3); else if (valence > 0 && valence < 15) line += formatMDLInt(valence, 3); else line += formatMDLInt(0, 3); } else if (atom.getImplicitHydrogenCount() != null) { int actual = explicitValence + atom.getImplicitHydrogenCount(); if (implied == actual) { line += formatMDLInt(0, 3); } else { if (actual == 0) line += formatMDLInt(15, 3); else if (actual > 0 && actual < 15) line += formatMDLInt(actual, 3); else line += formatMDLInt(0, 3); } } else { int valence = atom.getValency(); // valence from h count differs from field? we still // set to default - which one has more merit? if (implied == valence) line += formatMDLInt(0, 3); else if (valence == 0) line += formatMDLInt(15, 3); else if (valence > 0 && valence < 15) line += formatMDLInt(valence, 3); else line += formatMDLInt(0, 3); } } } catch (RuntimeException e) { // null bond order, query bond order - who knows.. but line += formatMDLInt(0, 3); } } line += " 0 0 0"; if (container.getAtom(f).getProperty(CDKConstants.ATOM_ATOM_MAPPING) != null) { Object atomAtomMapping = container.getAtom(f).getProperty(CDKConstants.ATOM_ATOM_MAPPING); if (atomAtomMapping instanceof String) { try { int value = Integer.parseInt((String) atomAtomMapping); line += formatMDLInt(value, 3); } catch (NumberFormatException exception) { line += formatMDLInt(0, 3); logger.warn("Skipping atom-atom mapping, invalid value: " + atomAtomMapping); } } else if (atomAtomMapping instanceof Integer) { int value = (Integer) atomAtomMapping; line += formatMDLInt(value, 3); } else { line += formatMDLInt(0, 3); } } else { line += formatMDLInt(0, 3); } line += " 0 0"; writer.write(line); writer.newLine(); } // write Bond block Iterator bonds = container.bonds().iterator(); while (bonds.hasNext()) { IBond bond = bonds.next(); if (bond.getAtomCount() != 2) { logger.warn("Skipping bond with more/less than two atoms: " + bond); } else { if (bond.getStereo() == IBond.Stereo.UP_INVERTED || bond.getStereo() == IBond.Stereo.DOWN_INVERTED || bond.getStereo() == IBond.Stereo.UP_OR_DOWN_INVERTED) { // turn around atom coding to correct for inv stereo line = formatMDLInt(atomindex.get(bond.getAtom(1)) + 1, 3); line += formatMDLInt(atomindex.get(bond.getAtom(0)) + 1, 3); } else { line = formatMDLInt(atomindex.get(bond.getAtom(0)) + 1, 3); line += formatMDLInt(atomindex.get(bond.getAtom(1)) + 1, 3); } int bondType; if (writeAromaticBondTypes.isSet() && bond.getFlag(CDKConstants.ISAROMATIC)) bondType = 4; else if (Order.QUADRUPLE == bond.getOrder()) throw new CDKException("MDL molfiles do not support quadruple bonds."); else bondType = bond.getOrder().numeric(); line += formatMDLInt(bondType, 3); line += " "; switch (bond.getStereo()) { case UP: line += "1"; break; case UP_INVERTED: line += "1"; break; case DOWN: line += "6"; break; case DOWN_INVERTED: line += "6"; break; case UP_OR_DOWN: line += "4"; break; case UP_OR_DOWN_INVERTED: line += "4"; break; case E_OR_Z: line += "3"; break; default: line += "0"; } line += " 0 0 0 "; writer.write(line); writer.newLine(); } } // Write Atom Value for (int i = 0; i < container.getAtomCount(); i++) { IAtom atom = container.getAtom(i); if (atom.getProperty(CDKConstants.COMMENT) != null && atom.getProperty(CDKConstants.COMMENT) instanceof String && !((String) atom.getProperty(CDKConstants.COMMENT)).trim().equals("")) { writer.write("V "); writer.write(formatMDLInt(i + 1, 3)); writer.write(" "); writer.write((String) atom.getProperty(CDKConstants.COMMENT)); writer.newLine(); } } // write formal atomic charges for (int i = 0; i < container.getAtomCount(); i++) { IAtom atom = container.getAtom(i); Integer charge = atom.getFormalCharge(); if (charge != null && charge != 0) { writer.write("M CHG 1 "); writer.write(formatMDLInt(i + 1, 3)); writer.write(" "); writer.write(formatMDLInt(charge, 3)); writer.newLine(); } } // write radical information if (container.getSingleElectronCount() > 0) { Map atomIndexSpinMap = new LinkedHashMap(); for (int i = 0; i < container.getAtomCount(); i++) { int eCount = container.getConnectedSingleElectronsCount(container.getAtom(i)); switch (eCount) { case 0: continue; case 1: atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.SINGLET); break; case 2: atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.DOUBLET); break; case 3: atomIndexSpinMap.put(i, SPIN_MULTIPLICITY.TRIPLET); break; default: logger.debug("Invalid number of radicals found: " + eCount); break; } } Iterator> iterator = atomIndexSpinMap.entrySet().iterator(); for (int i = 0; i < atomIndexSpinMap.size(); i += NN8) { if (atomIndexSpinMap.size() - i <= NN8) { writer.write("M RAD" + formatMDLInt(atomIndexSpinMap.size() - i, WIDTH)); writeRadicalPattern(iterator, i); } else { writer.write("M RAD" + formatMDLInt(NN8, WIDTH)); writeRadicalPattern(iterator, i); } writer.newLine(); } } // write formal isotope information for (int i = 0; i < container.getAtomCount(); i++) { IAtom atom = container.getAtom(i); if (!(atom instanceof IPseudoAtom)) { Integer atomicMass = atom.getMassNumber(); if (atomicMass != null) { int majorMass = Isotopes.getInstance().getMajorIsotope(atom.getSymbol()).getMassNumber(); if (atomicMass != majorMass) { writer.write("M ISO 1 "); writer.write(formatMDLInt(i + 1, 3)); writer.write(" "); writer.write(formatMDLInt(atomicMass, 3)); writer.newLine(); } } } } //write RGP line (max occurrence is 16 data points per line) if (rgroups != null) { StringBuilder rgpLine = new StringBuilder(); int cnt = 0; // the order isn't guarantied but as we index with the atom // number this isn't an issue for (Map.Entry e : rgroups.entrySet()) { rgpLine.append(formatMDLInt(e.getKey(), 4)); rgpLine.append(formatMDLInt(e.getValue(), 4)); cnt++; if (cnt == 8) { rgpLine.insert(0, "M RGP" + formatMDLInt(cnt, 3)); writer.write(rgpLine.toString()); writer.newLine(); rgpLine = new StringBuilder(); cnt = 0; } } if (cnt != 0) { rgpLine.insert(0, "M RGP" + formatMDLInt(cnt, 3)); writer.write(rgpLine.toString()); writer.newLine(); } } // write atom aliases if (aliases != null) { for (Map.Entry e : aliases.entrySet()) { writer.write("A" + formatMDLInt(e.getKey(), 5)); writer.newLine(); String label = e.getValue(); // fixed width file - doubtful someone would have a label > 70 but trim if they do if (label.length() > 70) label = label.substring(0, 70); writer.write(label); writer.newLine(); } } writeSgroups(container, writer, atomindex); // close molecule writer.write("M END"); writer.newLine(); writer.flush(); } private void writeSgroups(IAtomContainer container, BufferedWriter writer, Map atomidxs) throws IOException { List sgroups = container.getProperty(CDKConstants.CTAB_SGROUPS); if (sgroups == null) return; // going to modify sgroups = new ArrayList<>(sgroups); // remove non-ctab Sgroups Iterator iter = sgroups.iterator(); while (iter.hasNext()) { if (iter.next().getType() == SgroupType.ExtMulticenter) iter.remove(); } for (List wrapSgroups : wrap(sgroups, 8)) { // Declare the SGroup type writer.write("M STY"); writer.write(formatMDLInt(wrapSgroups.size(), 3)); for (Sgroup sgroup : wrapSgroups) { writer.write(' '); writer.write(formatMDLInt(1 + sgroups.indexOf(sgroup), 3)); writer.write(' '); writer.write(sgroup.getType().getKey()); } writer.newLine(); } // Sgroup output is non-compact for now - but valid for (int id = 1; id <= sgroups.size(); id++) { Sgroup sgroup = sgroups.get(id - 1); // Sgroup Atom List for (List atoms : wrap(sgroup.getAtoms(), 15)) { writer.write("M SAL "); writer.write(formatMDLInt(id, 3)); writer.write(formatMDLInt(atoms.size(), 3)); for (IAtom atom : atoms) { writer.write(' '); writer.write(formatMDLInt(1+atomidxs.get(atom), 3)); } writer.newLine(); } // Sgroup Bond List for (List bonds : wrap(sgroup.getBonds(), 15)) { writer.write("M SBL "); writer.write(formatMDLInt(id, 3)); writer.write(formatMDLInt(bonds.size(), 3)); for (IBond bond : bonds) { writer.write(' '); writer.write(formatMDLInt(1+container.getBondNumber(bond), 3)); } writer.newLine(); } // Sgroup Parent List for (List parents : wrap(sgroup.getParents(), 8)) { writer.write("M SPL"); writer.write(formatMDLInt(parents.size(), 3)); for (Sgroup parent : parents) { writer.write(' '); writer.write(formatMDLInt(id, 3)); writer.write(' '); writer.write(formatMDLInt(1 + sgroups.indexOf(parent), 3)); } writer.newLine(); } Set attributeKeys = sgroup.getAttributeKeys(); // TODO order and aggregate attribute keys for (SgroupKey key : attributeKeys) { switch (key) { case CtabSubScript: writer.write("M SMT "); writer.write(formatMDLInt(id, 3)); writer.write(' '); writer.write((String) sgroup.getValue(key)); writer.newLine(); break; case CtabExpansion: final boolean expanded = sgroup.getValue(key); if (expanded) { writer.write("M SDS EXP"); writer.write(formatMDLInt(1, 3)); writer.write(' '); writer.write(formatMDLInt(id, 3)); writer.newLine(); } break; case CtabBracket: final List brackets = sgroup.getValue(key); for (SgroupBracket bracket : brackets) { writer.write("M SDI "); writer.write(formatMDLInt(id, 3)); writer.write(formatMDLInt(4, 3)); writer.write(formatMDLFloat((float) bracket.getFirstPoint().x)); writer.write(formatMDLFloat((float) bracket.getFirstPoint().y)); writer.write(formatMDLFloat((float) bracket.getSecondPoint().x)); writer.write(formatMDLFloat((float) bracket.getSecondPoint().y)); writer.newLine(); } break; case CtabBracketStyle: writer.write("M SBT"); writer.write(formatMDLInt(1, 3)); writer.write(' '); writer.write(formatMDLInt(id, 3)); writer.write(' '); writer.write(formatMDLInt((int)sgroup.getValue(key), 3)); writer.newLine(); break; case CtabConnectivity: writer.write("M SCN"); writer.write(formatMDLInt(1, 3)); writer.write(' '); writer.write(formatMDLInt(id, 3)); writer.write(' '); writer.write((String) sgroup.getValue(key)); writer.newLine(); break; case CtabSubType: writer.write("M SST"); writer.write(formatMDLInt(1, 3)); writer.write(' '); writer.write(formatMDLInt(id, 3)); writer.write(' '); writer.write((String) sgroup.getValue(key)); writer.newLine(); break; case CtabParentAtomList: Set parentAtomList = sgroup.getValue(key); for (List atoms : wrap(parentAtomList, 15)) { writer.write("M SPA "); writer.write(formatMDLInt(id, 3)); writer.write(formatMDLInt(atoms.size(), 3)); for (IAtom atom : atoms) { writer.write(' '); writer.write(formatMDLInt(1+atomidxs.get(atom), 3)); } writer.newLine(); } break; case CtabComponentNumber: Integer compNumber = sgroup.getValue(key); writer.write("M SNC"); writer.write(formatMDLInt(1, 3)); writer.write(' '); writer.write(formatMDLInt(id, 3)); writer.write(' '); writer.write(formatMDLInt(compNumber, 3)); writer.newLine(); break; } } } } private List> wrap(Collection set, int lim) { List> wrapped = new ArrayList<>(); List list = new ArrayList(set); if (set.size() <= lim) { if (!list.isEmpty()) wrapped.add(list); } else { int i = 0; for (; (i + lim) < set.size(); i += lim) { wrapped.add(list.subList(i, i + lim)); } wrapped.add(list.subList(i, list.size())); } return wrapped; } private void writeRadicalPattern(Iterator> iterator, int i) throws IOException { Map.Entry entry = iterator.next(); writer.write(" "); writer.write(formatMDLInt(entry.getKey() + 1, WIDTH)); writer.write(" "); writer.write(formatMDLInt(entry.getValue().getValue(), WIDTH)); i = i + 1; if (i < NN8 && iterator.hasNext()) writeRadicalPattern(iterator, i); } /** * Formats an integer to fit into the connection table and changes it * to a String. * * @param i The int to be formated * @param l Length of the String * @return The String to be written into the connectiontable */ protected static String formatMDLInt(int i, int l) { String s = "", fs = ""; NumberFormat nf = NumberFormat.getNumberInstance(Locale.ENGLISH); nf.setParseIntegerOnly(true); nf.setMinimumIntegerDigits(1); nf.setMaximumIntegerDigits(l); nf.setGroupingUsed(false); s = nf.format(i); l = l - s.length(); for (int f = 0; f < l; f++) fs += " "; fs += s; return fs; } /** * Formats a float to fit into the connectiontable and changes it * to a String. * * @param fl The float to be formated * @return The String to be written into the connectiontable */ protected static String formatMDLFloat(float fl) { String s = "", fs = ""; int l; NumberFormat nf = NumberFormat.getNumberInstance(Locale.ENGLISH); nf.setMinimumIntegerDigits(1); nf.setMaximumIntegerDigits(4); nf.setMinimumFractionDigits(4); nf.setMaximumFractionDigits(4); nf.setGroupingUsed(false); s = nf.format(fl); l = 10 - s.length(); for (int f = 0; f < l; f++) fs += " "; fs += s; return fs; } /** * Formats a String to fit into the connectiontable. * * @param s The String to be formated * @param le The length of the String * @return The String to be written in the connectiontable */ protected static String formatMDLString(String s, int le) { s = s.trim(); if (s.length() > le) return s.substring(0, le); int l; l = le - s.length(); for (int f = 0; f < l; f++) s += " "; return s; } /** * Initializes IO settings.
* Please note with regards to "writeAromaticBondTypes": bond type values 4 through 8 are for SSS queries only, * so a 'query file' is created if the container has aromatic bonds and this settings is true. */ private void initIOSettings() { forceWriteAs2DCoords = addSetting(new BooleanIOSetting("ForceWriteAs2DCoordinates", IOSetting.Importance.LOW, "Should coordinates always be written as 2D?", "false")); writeAromaticBondTypes = addSetting(new BooleanIOSetting("WriteAromaticBondTypes", IOSetting.Importance.LOW, "Should aromatic bonds be written as bond type 4?", "false")); writeQueryFormatValencies = addSetting(new BooleanIOSetting("WriteQueryFormatValencies", IOSetting.Importance.LOW, "Should valencies be written in the MDL Query format? (deprecated)", "false")); } public void customizeJob() { for (IOSetting setting : getSettings()) { fireIOSettingQuestion(setting); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy