All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openscience.cdk.layout.IdentityTemplateLibrary Maven / Gradle / Ivy

There is a newer version: 2.10
Show newest version
/*
 * Copyright (c) 2014 European Bioinformatics Institute (EMBL-EBI)
 *                    John May 
 *
 * Contact: [email protected]
 *
 * This program is free software; you can redistribute it and/or modify it
 * under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation; either version 2.1 of the License, or (at
 * your option) any later version. All we ask is that proper credit is given
 * for our work, which includes - but is not limited to - adding the above
 * copyright notice to the beginning of your source code files, and to any
 * copyright notice that you may distribute with programs based on this work.
 *
 * This program is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
 * License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 */

package org.openscience.cdk.layout;

import com.google.common.collect.LinkedListMultimap;
import com.google.common.collect.Multimap;
import org.openscience.cdk.exception.CDKException;
import org.openscience.cdk.graph.rebond.Point;
import org.openscience.cdk.interfaces.IAtom;
import org.openscience.cdk.interfaces.IAtomContainer;
import org.openscience.cdk.interfaces.IBond;
import org.openscience.cdk.interfaces.IChemObjectBuilder;
import org.openscience.cdk.smiles.SmilesGenerator;
import org.openscience.cdk.smiles.SmilesParser;
import org.openscience.cdk.tools.ILoggingTool;
import org.openscience.cdk.tools.LoggingToolFactory;

import javax.vecmath.Point2d;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;

import static java.util.AbstractMap.SimpleEntry;
import static java.util.Map.Entry;

/**
 * A library for 2D layout templates that are retrieved based on identity. Such a library is useful
 * for ensure ring systems are laid out in their de facto orientation. Importantly, identity
 * templates means the library size can be very large but still searched in constant time.
 *
 * 
{@code
 *
 * // load from a resource file on the classpath
 * IdentityTemplateLibrary lib = IdentityTemplateLibrary.loadFromResource("/data/ring-templates.smi");
 *
 * IAtomContainer container, container2;
 *
 * // add to the library
 * lib.add(container);
 *
 * // assign a layout
 * boolean modified = lib.assignLayout(container2);
 *
 * // store
 * OutputStream out = new FileOutputStream("/tmp/lib.smi");
 * lib.store(out);
 * out.close();
 * }
* * @author John May */ final class IdentityTemplateLibrary { private static final DecimalFormat DECIMAL_FORMAT = new DecimalFormat(".##"); private final Multimap templateMap = LinkedListMultimap.create(); private final SmilesGenerator smigen = SmilesGenerator.unique(); private final ILoggingTool logger = LoggingToolFactory.createLoggingTool(getClass()); private IdentityTemplateLibrary() { } /** * Add one template library to another. * * @param library another template library * @return this library with the other one added in (allows chaining) */ public IdentityTemplateLibrary add(IdentityTemplateLibrary library) { this.templateMap.putAll(library.templateMap); return this; } /** * Internal - create a canonical SMILES string temporarily adjusting to default * hydrogen count. This method may be moved to the SMILESGenerator in future. * * @param mol molecule * @param ordering ordering output * @return SMILES * @throws CDKException SMILES could be generate */ private String cansmi(IAtomContainer mol, int[] ordering) throws CDKException { // backup parts we will strip off Integer[] hcntBackup = new Integer[mol.getAtomCount()]; Map idxs = new HashMap<>(); for (int i = 0; i < mol.getAtomCount(); i++) { hcntBackup[i] = mol.getAtom(i).getImplicitHydrogenCount(); idxs.put(mol.getAtom(i), i); } int[] bondedValence = new int[mol.getAtomCount()]; for (int i = 0; i < mol.getBondCount(); i++) { IBond bond = mol.getBond(i); bondedValence[idxs.get(bond.getAtom(0))] += bond.getOrder().numeric(); bondedValence[idxs.get(bond.getAtom(1))] += bond.getOrder().numeric(); } // http://www.opensmiles.org/opensmiles.html#orgsbst for (int i = 0; i < mol.getAtomCount(); i++) { IAtom atom = mol.getAtom(i); atom.setImplicitHydrogenCount(0); switch (atom.getAtomicNumber()) { case 5: // B if (bondedValence[i] <= 3) atom.setImplicitHydrogenCount(3 - bondedValence[i]); break; case 6: // C if (bondedValence[i] <= 4) atom.setImplicitHydrogenCount(4 - bondedValence[i]); break; case 7: // N case 15: // P if (bondedValence[i] <= 3) atom.setImplicitHydrogenCount(3 - bondedValence[i]); else if (bondedValence[i] <= 5) atom.setImplicitHydrogenCount(5 - bondedValence[i]); break; case 8: // O if (bondedValence[i] <= 2) atom.setImplicitHydrogenCount(2 - bondedValence[i]); break; case 16: // S if (bondedValence[i] <= 2) atom.setImplicitHydrogenCount(2 - bondedValence[i]); else if (bondedValence[i] <= 4) atom.setImplicitHydrogenCount(4 - bondedValence[i]); else if (bondedValence[i] <= 6) atom.setImplicitHydrogenCount(6 - bondedValence[i]); break; case 9: // F case 17: // Cl case 35: // Br case 53: // I if (bondedValence[i] <= 1) atom.setImplicitHydrogenCount(1 - bondedValence[i]); break; default: atom.setImplicitHydrogenCount(0); break; } } String smi = null; try { smi = smigen.create(mol, ordering); } finally { // restore for (int i = 0; i < mol.getAtomCount(); i++) mol.getAtom(i).setImplicitHydrogenCount(hcntBackup[i]); } return smi; } /** * Create a library entry from an atom container. Note the entry is not added to the library. * * @param container structure representation * @return a new library entry (not stored). * @see #add(java.util.Map.Entry) */ Entry createEntry(final IAtomContainer container) { try { final int n = container.getAtomCount(); final int[] ordering = new int[n]; final String smiles = cansmi(container, ordering); // build point array that is in the canonical output order final Point2d[] points = new Point2d[n]; for (int i = 0; i < n; i++) { Point2d point = container.getAtom(i).getPoint2d(); if (point == null) { logger.warn("Atom at index ", i, " did not have coordinates."); return null; } points[ordering[i]] = point; } return new SimpleEntry(smiles, points); } catch (CDKException e) { logger.warn("Could not encode container as SMILES: ", e); } return null; } /** * Create a library entry from a SMILES string with the coordinates suffixed in binary. The * entry should be created with {@link #encodeEntry(java.util.Map.Entry)} and not created * manually. Note, the entry is not added to the library. * * @param str input string * @return library entry */ static Entry decodeEntry(String str) { final int i = str.indexOf(' '); if (i < 0) throw new IllegalArgumentException(); return new SimpleEntry(str.substring(0, i), decodeCoordinates(str.substring(i + 1))); } /** * Decode coordinates that have been placed in a byte buffer. * * @param str the string to decode * @return array of coordinates */ static Point2d[] decodeCoordinates(String str) { if (str.startsWith("|(")) { int end = str.indexOf(')', 2); if (end < 0) return new Point2d[0]; String[] strs = str.substring(2, end).split(";"); Point2d[] points = new Point2d[strs.length]; for (int i = 0; i < strs.length; i++) { String coord = strs[i]; int first = coord.indexOf(','); int second = coord.indexOf(',', first+1); points[i] = new Point2d(Double.parseDouble(coord.substring(0, first)), Double.parseDouble(coord.substring(first + 1, second))); } return points; } else { String[] strs = str.split(", "); Point2d[] points = new Point2d[strs.length / 2]; for (int i = 0; i < strs.length; i += 2) { points[i / 2] = new Point2d(Double.parseDouble(strs[i]), Double.parseDouble(strs[i + 1])); } return points; } } /** * Encodes an entry in a compact string representation. The encoded entry is a SMILES string * with the coordinates suffixed in binary. * * @param entry the entry to encode * @return encoded entry */ static String encodeEntry(Entry entry) { StringBuilder sb = new StringBuilder(); sb.append(entry.getKey()); sb.append(' '); sb.append(encodeCoordinates(entry.getValue())); return sb.toString(); } /** * Encode coordinates in a string. * * @param points points * @return extended SMILES format coordinates */ static String encodeCoordinates(Point2d[] points) { StringBuilder sb = new StringBuilder(); sb.append("|("); for (Point2d point : points) { if (sb.length() > 2) sb.append(";"); sb.append(DECIMAL_FORMAT.format(point.x)); sb.append(','); sb.append(DECIMAL_FORMAT.format(point.y)); sb.append(','); } sb.append(")|"); return sb.toString(); } /** * Add a created entry to the library. * * @param entry entry */ void add(Entry entry) { if (entry != null) templateMap.put(entry.getKey(), entry.getValue()); } /** * Create an entry for the provided container and add it to the library. * * @param container structure representation */ void add(IAtomContainer container) { add(createEntry(container)); } /** * Assign a 2D layout to the atom container using the contents of the library. If multiple * coordinates are available the first is choosen. * * @param container structure representation * @return a layout was assigned */ boolean assignLayout(IAtomContainer container) { try { // create the library key to lookup an entry, we also store // the canonical out ordering int n = container.getAtomCount(); int[] ordering = new int[n]; String smiles = cansmi(container, ordering); // find the points in the library for (Point2d[] points : templateMap.get(smiles)) { // set the points for (int i = 0; i < n; i++) { container.getAtom(i).setPoint2d(new Point2d(points[ordering[i]])); } return true; } } catch (CDKException e) { // ignored } return false; } /** * Get all templated coordinates for the provided molecule. The return collection has * coordinates ordered based on the input. * * @param mol molecule (or fragment) to lookup * @return the coordinates */ Collection getCoordinates(IAtomContainer mol) { try { // create the library key to lookup an entry, we also store // the canonical out ordering int n = mol.getAtomCount(); int[] ordering = new int[n]; String smiles = cansmi(mol, ordering); final Collection coordSet = templateMap.get(smiles); final List orderedCoordSet = new ArrayList<>(coordSet.size()); for (Point2d[] coords : coordSet) { Point2d[] orderedCoords = new Point2d[coords.length]; for (int i = 0; i < n; i++) { orderedCoords[i] = new Point2d(coords[ordering[i]]); } orderedCoordSet.add(orderedCoords); } return Collections.unmodifiableList(orderedCoordSet); } catch (CDKException e) { return Collections.emptyList(); } } /** * Create an empty template library. * * @return an empty template library */ static IdentityTemplateLibrary empty() { return new IdentityTemplateLibrary(); } /** * Load a template library from a resource on the class path. * * @return loaded template library * @throws java.lang.IllegalArgumentException resource not found or could not be loaded */ static IdentityTemplateLibrary loadFromResource(String resource) { InputStream in = IdentityTemplateLibrary.class.getResourceAsStream(resource); try { return load(in); } catch (IOException e) { throw new IllegalArgumentException("Could not load template library from resource " + resource, e); } finally { try { if (in != null) in.close(); } catch (IOException e) { // ignored } } } /** * Load a template library from an input stream. * * @return loaded template library * @throws java.io.IOException low level IO error */ static IdentityTemplateLibrary load(InputStream in) throws IOException { BufferedReader br = new BufferedReader(new InputStreamReader(in)); String line = null; IdentityTemplateLibrary library = new IdentityTemplateLibrary(); while ((line = br.readLine()) != null) { // skip comments if (line.charAt(0) == '#') continue; library.add(decodeEntry(line)); } return library; } /** * Reorder coordinates. * * @param coords coordinates * @param order permutation * @return reordered coordinates */ static Point2d[] reorderCoords(Point2d[] coords, int[] order) { Point2d[] neworder = new Point2d[coords.length]; for (int i = 0; i < order.length; i++) neworder[order[i]] = coords[i]; return neworder; } /** * Update the template library - can be called for safety after * each load. * * @param bldr builder */ void update(IChemObjectBuilder bldr) { final SmilesParser smipar = new SmilesParser(bldr); Multimap updated = LinkedListMultimap.create(); for (Map.Entry> e : templateMap.asMap().entrySet()) { try { IAtomContainer mol = smipar.parseSmiles(e.getKey()); int[] order = new int[mol.getAtomCount()]; String key = cansmi(mol, order); for (Point2d[] coords : e.getValue()) { updated.put(key, reorderCoords(coords, order)); } } catch (CDKException ex) { System.err.println(e.getKey() + " could not be updated: " + ex.getMessage()); } } templateMap.clear(); templateMap.putAll(updated); } /** * Store a template library to the provided output stream. * * @param out output stream * @throws IOException low level IO error */ void store(OutputStream out) throws IOException { BufferedWriter bw = new BufferedWriter(new OutputStreamWriter(out)); for (Entry e : templateMap.entries()) { bw.write(encodeEntry(e)); bw.write('\n'); } bw.close(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy