All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openscience.cdk.io.RGroupQueryWriter Maven / Gradle / Ivy

There is a newer version: 2.10
Show newest version
/*
 * Copyright (C) 2010  Mark Rijnbeek 
 *
 * Contact: [email protected]
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 * All we ask is that proper credit is given for our work, which includes
 * - but is not limited to - adding the above copyright notice to the beginning
 * of your source code files, and to any copyright notice that you may
 * distribute with programs based on this work.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 */
package org.openscience.cdk.io;

import java.io.BufferedWriter;
import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.io.StringWriter;
import java.io.Writer;
import java.text.SimpleDateFormat;
import java.util.List;
import java.util.Map;

import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IChemObject;
import org.openscience.cdk.io.formats.IResourceFormat;
import org.openscience.cdk.io.formats.RGroupQueryFormat;
import org.openscience.cdk.isomorphism.matchers.IRGroupQuery;
import org.openscience.cdk.isomorphism.matchers.RGroup;
import org.openscience.cdk.isomorphism.matchers.RGroupList;

/**
 * A writer for Symyx' Rgroup files (RGFiles).
* An RGfile describes a single molecular query with Rgroups. * Each RGfile is a combination of Ctabs defining the root molecule and each * member of each Rgroup in the query. *
* This class relies on the {@link org.openscience.cdk.io.MDLV2000Writer} to * create CTAB data blocks. * * @cdk.module io * @cdk.githash * @cdk.iooptions * @cdk.keyword Rgroup * @cdk.keyword R group * @cdk.keyword R-group * @author Mark Rijnbeek */ public class RGroupQueryWriter extends DefaultChemObjectWriter { private BufferedWriter writer; private static String LSEP = System.getProperty("line.separator"); /** * Constructs a new writer that can write an {@link IRGroupQuery} * to the Symx RGFile format. * * @param out The Writer to write to */ public RGroupQueryWriter(Writer out) { if (out instanceof BufferedWriter) { writer = (BufferedWriter) out; } else { writer = new BufferedWriter(out); } } /** * Zero argument constructor. */ public RGroupQueryWriter() { this(new StringWriter()); } /** * Returns true for accepted input types. */ @SuppressWarnings("unchecked") @Override public boolean accepts(Class classObject) { Class[] interfaces = classObject.getInterfaces(); for (Class anInterface : interfaces) { if (IRGroupQuery.class.equals(anInterface)) return true; } Class superClass = classObject.getSuperclass(); if (superClass != null) return this.accepts(superClass); return false; } /** * Flushes the output and closes this object. */ @Override public void close() throws IOException { writer.close(); } /** * Produces a CTAB block for an atomContainer, without the header lines. * @param atomContainer * @return CTAB block * @throws CDKException */ private String getCTAB(IAtomContainer atomContainer) throws CDKException { StringWriter strWriter = new StringWriter(); MDLV2000Writer mdlWriter = new MDLV2000Writer(strWriter); mdlWriter.write(atomContainer); try { mdlWriter.close(); } catch (IOException exception) { // FIXME } String ctab = strWriter.toString(); //strip of the individual header, as we have one super header instead. for (int line = 1; line <= 3; line++) { ctab = ctab.substring(ctab.indexOf(LSEP) + (LSEP.length())); } return ctab; } /** * Returns output format. */ @Override public IResourceFormat getFormat() { return RGroupQueryFormat.getInstance(); } /** * Sets the writer to given output stream. */ @Override public void setWriter(OutputStream output) throws CDKException { setWriter(new OutputStreamWriter(output)); } /** * Sets the writer. */ @Override public void setWriter(Writer out) throws CDKException { if (out instanceof BufferedWriter) { writer = (BufferedWriter) out; } else { writer = new BufferedWriter(out); } } /** * The actual writing of the output. * @throws CDKException * @throws IOException */ @Override public void write(IChemObject object) throws CDKException { if (!(object instanceof IRGroupQuery)) { throw new CDKException("Only IRGroupQuery input is accepted."); } try { IRGroupQuery rGroupQuery = (IRGroupQuery) object; String now = new SimpleDateFormat("MMddyyHHmm").format(System.currentTimeMillis()); IAtomContainer rootAtc = rGroupQuery.getRootStructure(); //Construct header StringBuffer rootBlock = new StringBuffer(); String header = "$MDL REV 1 " + now + LSEP + "$MOL" + LSEP + "$HDR" + LSEP + " Rgroup query file (RGFile)" + LSEP + " CDK " + now + "2D" + LSEP + LSEP + "$END HDR" + LSEP + "$CTAB"; rootBlock.append(header).append(LSEP); //Construct the root structure, the scaffold String rootCTAB = getCTAB(rootAtc); rootCTAB = rootCTAB.replaceAll(LSEP + "M END" + LSEP, ""); rootBlock.append(rootCTAB).append(LSEP); //Write the root's LOG lines for (Integer rgrpNum : rGroupQuery.getRGroupDefinitions().keySet()) { RGroupList rgList = rGroupQuery.getRGroupDefinitions().get(rgrpNum); int restH = rgList.isRestH() ? 1 : 0; String logLine = "M LOG" + MDLV2000Writer.formatMDLInt(1, 3) + MDLV2000Writer.formatMDLInt(rgrpNum, 4) + MDLV2000Writer.formatMDLInt(rgList.getRequiredRGroupNumber(), 4) + MDLV2000Writer.formatMDLInt(restH, 4) + " " + rgList.getOccurrence(); rootBlock.append(logLine).append(LSEP); } //AAL lines are optional, they are needed for R-atoms with multiple bonds to the root //for which the order of the attachment points can not be implicitly derived //from the order in the atom block. See CT spec for more on that. for (IAtom rgroupAtom : rGroupQuery.getRootAttachmentPoints().keySet()) { Map rApo = rGroupQuery.getRootAttachmentPoints().get(rgroupAtom); if (rApo.size() > 1) { int prevPos = -1; int apoIdx = 1; boolean implicitlyOrdered = true; while (rApo.get(apoIdx) != null && implicitlyOrdered) { IAtom partner = rApo.get(apoIdx).getConnectedAtom(rgroupAtom); for (int atIdx = 0; atIdx < rootAtc.getAtomCount(); atIdx++) { if (rootAtc.getAtom(atIdx).equals(partner)) { if (atIdx < prevPos) implicitlyOrdered = false; prevPos = atIdx; break; } } apoIdx++; } if (!implicitlyOrdered) { StringBuffer aalLine = new StringBuffer("M AAL"); for (int atIdx = 0; atIdx < rootAtc.getAtomCount(); atIdx++) { if (rootAtc.getAtom(atIdx).equals(rgroupAtom)) { aalLine.append(MDLV2000Writer.formatMDLInt((atIdx + 1), 4)); aalLine.append(MDLV2000Writer.formatMDLInt(rApo.size(), 3)); apoIdx = 1; while (rApo.get(apoIdx) != null) { IAtom partner = rApo.get(apoIdx).getConnectedAtom(rgroupAtom); for (int a = 0; a < rootAtc.getAtomCount(); a++) { if (rootAtc.getAtom(a).equals(partner)) { aalLine.append(MDLV2000Writer.formatMDLInt(a + 1, 4)); aalLine.append(MDLV2000Writer.formatMDLInt(apoIdx, 4)); } } apoIdx++; } } } rootBlock.append(aalLine.toString()).append(LSEP); } } } rootBlock.append("M END").append(LSEP).append("$END CTAB").append(LSEP); //Construct each R-group block StringBuffer rgpBlock = new StringBuffer(); for (Integer rgrpNum : rGroupQuery.getRGroupDefinitions().keySet()) { List rgrpList = rGroupQuery.getRGroupDefinitions().get(rgrpNum).getRGroups(); if (rgrpList != null && rgrpList.size() != 0) { rgpBlock.append("$RGP").append(LSEP);; rgpBlock.append(MDLV2000Writer.formatMDLInt(rgrpNum, 4)).append(LSEP); for (RGroup rgroup : rgrpList) { //CTAB block rgpBlock.append("$CTAB").append(LSEP); String ctab = getCTAB(rgroup.getGroup()); ctab = ctab.replaceAll(LSEP + "M END" + LSEP, ""); rgpBlock.append(ctab).append(LSEP); //The APO line IAtom firstAttachmentPoint = rgroup.getFirstAttachmentPoint(); IAtom secondAttachmentPoint = rgroup.getSecondAttachmentPoint(); int apoCount = 0; if (firstAttachmentPoint != null) { StringBuffer apoLine = new StringBuffer(); for (int atIdx = 0; atIdx < rgroup.getGroup().getAtomCount(); atIdx++) { if (rgroup.getGroup().getAtom(atIdx).equals(firstAttachmentPoint)) { apoLine.append(MDLV2000Writer.formatMDLInt((atIdx + 1), 4)); apoCount++; if (secondAttachmentPoint != null && secondAttachmentPoint.equals(firstAttachmentPoint)) { apoLine.append(MDLV2000Writer.formatMDLInt(3, 4)); } else { apoLine.append(MDLV2000Writer.formatMDLInt(1, 4)); } } } if (secondAttachmentPoint != null && !secondAttachmentPoint.equals(firstAttachmentPoint)) { for (int atIdx = 0; atIdx < rgroup.getGroup().getAtomCount(); atIdx++) { if (rgroup.getGroup().getAtom(atIdx).equals(secondAttachmentPoint)) { apoCount++; apoLine.append(MDLV2000Writer.formatMDLInt((atIdx + 1), 4)); apoLine.append(MDLV2000Writer.formatMDLInt(2, 4)); } } } if (apoCount > 0) { apoLine.insert(0, "M APO" + MDLV2000Writer.formatMDLInt(apoCount, 3)); rgpBlock.append(apoLine).append(LSEP); } } rgpBlock.append("M END").append(LSEP); rgpBlock.append("$END CTAB").append(LSEP); } rgpBlock.append("$END RGP").append(LSEP); } } rgpBlock.append("$END MOL").append(LSEP); writer.write(rootBlock.toString()); writer.write(rgpBlock.toString()); writer.flush(); } catch (IOException e) { e.printStackTrace(); throw new CDKException("Unexpected exception when writing RGFile" + LSEP + e.getMessage()); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy