All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.bitbucket.kienerj.indigoutils.SubstructureReplacer Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2013 Joos Kiener 
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see .
 */
package org.bitbucket.kienerj.indigoutils;

import com.ggasoftware.indigo.Indigo;
import com.ggasoftware.indigo.IndigoException;
import com.ggasoftware.indigo.IndigoObject;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import org.bitbucket.kienerj.indigoutils.exception.ReplaceAllDepthExceedException;
import org.slf4j.ext.XLogger;
import org.slf4j.ext.XLoggerFactory;

/**
 * 

Replaces a given substructure (can be a SMARTS pattern) with a * replacement. The replacement must be a fully defined molecule. The * replacement is correctly inserted and connected the target molecule. Only * valid molecules are returned.

* *

All possible replacements are returned once. There is no specific mapping * of the pattern to the replacement. This means that when replacing CO with CN, * O may map to either C or N of the replacement.
If the pattern matches * multiple times, for each map a separate replacement is returned. This means a * returned molecule can still contain the pattern.
The returned molecules * are ordered naturally by their canonical smiles.

*

 * Examples:
 *
 * The following examples substitute SMILES/SMARTS strings for molecules. You'd
 * have to actually use (query) molecules (IndigoObject). It's recommended to
 * look at the transformations in a real structure viewer (paste below SMILES)
 * or on paper to understand how the replacement works. Else replaceResults might be
 * unexpected.
 *
 * Simple Replacement:
 * replaceSubstructure("CCOC", "CO", "CN")
 * -> ("CCCN", "CCNC")
 *
 * Replacement that has more atoms than the pattern
 * replaceSubstructure("CC(=C)OC", "CO", "CCN")
 * -> ("CC(CCN)=C", "CC(NCC)=C", "CC(=C)CNC")
 * Note that the 4th possible solution, "CN(=C)CCC", is generated but not returned
 * due to invalid valence of the N atoms.
 *
 * Replacement in a "double-ring":
 * replaceSubstructure("CCC1CCC2=CC=CCN2C1", "CN", "CNC")
 * -> ("CCC1CC2CC=CC=C(CC1)N2", "CCC1CC2C(=CC=CCN2)CC1", "CCC1CNC2CC=CC=C2CC1")
 * here one or both of the rins get bigger by 1 atom.
 *
 *
 * 

* * @author Joos Kiener */ public class SubstructureReplacer { private static final int REPLACE_ALL_DEPTH_THRESHOLD = 5; private static final String THRESHOLD_EXCEEDED_MESSAGE = "Invalid replace-all pattern." + "Could not replace all occurences after %s iterations."; private static final XLogger logger = XLoggerFactory.getXLogger("SubstructureReplacer"); private final Indigo indigo; public SubstructureReplacer(Indigo indigo) { this.indigo = indigo; } /** *

Replaces all occurrences of pattern in given molecule.

* *

Returns a full list of all possible complete replacements. See * {@link #replaceSubstructure(IndigoObject,IndigoObject,IndigoObject) replaceSubstructure(mol, pattern, replacement)} * for general rules how the replacing is done.

* *

To avoid an endless loop (which would happen if you replaced "C" with * "CC"), the replacement is stopped if there are more than 5 replacements * to be made in a single molecule and a * ReplaceAllDepthExceedException is thrown.

* *

*

     *
     * Example:
     *
     * replaceAll("COCO", "CO", "CN")
     * -> ("CCNN", "CCNO", "C(NC)N", "CNNC", "C(CN)N")
     *
     *
     * 

* * @param mol the target molecule * @param pattern the substructure to replace * @param replacement the replacement substructure * @return a List of molecules with all occurrences of pattern replaced with * replacement * @throws ReplaceAllDepthExceedException if no result is found after 5 * replacements */ public List replaceAll(IndigoObject mol, IndigoObject pattern, IndigoObject replacement) { logger.entry(mol, pattern, replacement); TreeMap molecules = perfromReplaceAll(mol, pattern, replacement, REPLACE_ALL_DEPTH_THRESHOLD, 0); ArrayList results = new ArrayList<>(molecules.values()); logger.exit(results); return results; } /** * *

Replaces all occurrences of pattern in given molecule.

* *

See * {@link #replaceAll(IndigoObject,IndigoObject,IndigoObject) replaceAll(mol, pattern, replacement)}.

* *

* maxDepth> lets you control the number of allowed replacements until * an Exception is thrown and the replacement terminated. Default value is * 5.

* * @param mol the target molecule * @param pattern the substructure to replace * @param replacement the replacement substructure * @param maxDepth maximum number of replacements to be made * @return a List of molecules with all occurrences of pattern replaced with * replacement * @throws ReplaceAllDepthExceedException if no result is found after 5 * replacements */ public List replaceAll(IndigoObject mol, IndigoObject pattern, IndigoObject replacement, int maxDepth) { logger.entry(mol, pattern, replacement, maxDepth); TreeMap molecules = perfromReplaceAll(mol, pattern, replacement, maxDepth, 0); ArrayList results = new ArrayList<>(molecules.values()); logger.exit(results); return results; } /** *

Replaces occurrences of pattern once in the given molecule. All * possible replacements are returned as a List of molecules.

* *

There is no specific mapping of the pattern to the replacement. *

     *
     * Example:
     *
     * replaceSubstructure("CCOC", "CO", "CN")
     * -> ("CCCN", "CCNC")
     *
     * 

* *

A result molecule can still contain the pattern: *

     *
     * Example:
     *
     * replaceSubstructure("COCO", "CO", "CN")
     * -> ("CNCO", "NCCO", ...)
     *
     * 

* *

See * {@link org.bitbucket.kienerj.indigoutils.SubstructureReplacer main JavaDoc for this class} * for further information.

* * * @param mol the target molecule * @param pattern the substructure to replace * @param replacement the replacement substructure * @return a List of molecules with exactly once occurrence of pattern * replaced */ public List replaceSubstructure(IndigoObject mol, IndigoObject pattern, IndigoObject replacement) { logger.entry(mol, pattern, replacement); TreeMap molecules = perfromReplacement(mol, pattern, replacement); ArrayList results = new ArrayList<>(molecules.values()); logger.exit(results); return results; } /** * Replaces all occurrences of pattern in mol with replacement. If there are * too many replacements (recursion depth too big) an exception is thrown. * The depth can be adjusted by the caller. * */ private TreeMap perfromReplaceAll(IndigoObject mol, IndigoObject pattern, IndigoObject replacement, int depthThreshold, int depth) { logger.entry(mol, pattern, replacement, depthThreshold, depth); /* * Recursion: * * Replaces pattern in mol with perfromReplacement()-emthod and then * calls this method (eg. recursion) for all returned molecules. * * The recursion is terminated and an exception is thrown if the * depth of the recursion is higher than the given maximum depth. */ if (depth > depthThreshold) { throw new ReplaceAllDepthExceedException(String.format( THRESHOLD_EXCEEDED_MESSAGE, depthThreshold)); } TreeMap molecules = new TreeMap<>(); IndigoObject matcher = indigo.substructureMatcher(mol); String canSmiles = mol.canonicalSmiles(); if (matcher.match(pattern) == null) { molecules.put(canSmiles, mol); logger.debug("Added molecule to results: {}", canSmiles); } else { logger.debug("Replace All Target SMILES: {}", canSmiles); TreeMap replaceResults = perfromReplacement( mol, pattern, replacement); for (IndigoObject result : replaceResults.values()) { // recursion here TreeMap replaceAllResults = perfromReplaceAll(result, pattern, replacement, depthThreshold, depth + 1); molecules.putAll(replaceAllResults); } } logger.exit(molecules); return molecules; } /** * Replaces 1 occurrence of pattern in mol * */ private TreeMap perfromReplacement(IndigoObject mol, IndigoObject pattern, IndigoObject replacement) { logger.entry(mol, pattern, replacement); mol = mol.clone(); // resets atom indices -> required!!! TreeMap molecules = new TreeMap<>(); IndigoObject matcher = indigo.substructureMatcher(mol); IndigoObject matches = matcher.iterateMatches(pattern); if (matches != null) { for (IndigoObject match : matches) { logger.debug("Found pattern in {}. Generating all possible " + "replacements...", mol.canonicalSmiles()); HashSet atomIndices = new HashSet<>(); int minIndex = Integer.MAX_VALUE; int maxIndex = 0; for (IndigoObject queryAtom : pattern.iterateAtoms()) { IndigoObject atom = match.mapAtom(queryAtom); if (atom != null) { int atomIndex = atom.index(); atomIndices.add(atomIndex); if (atomIndex < minIndex) { minIndex = atomIndex; } if (atomIndex > maxIndex) { maxIndex = atomIndex; } } } HashMap startNeighbors = new HashMap<>(); IndigoObject startAtom = mol.getAtom(minIndex); for (IndigoObject neighbor : startAtom.iterateNeighbors()) { int atomIndex = neighbor.index(); // neighbors that will be removed anyway don't count here if (!atomIndices.contains(atomIndex)) { startNeighbors.put(atomIndex, neighbor.bond().bondOrder()); } } HashMap endNeighbors = new HashMap<>(); IndigoObject endAtom = mol.getAtom(maxIndex); for (IndigoObject neighbor : endAtom.iterateNeighbors()) { int atomIndex = neighbor.index(); // neighbors that will be removed anyway don't count here if (!atomIndices.contains(atomIndex)) { endNeighbors.put(neighbor.index(), neighbor.bond().bondOrder()); } } logger.debug("\tGenerating molecule for lowest with lowest mapping"); IndigoObject lowestWithLowest = mol.clone(); IndigoObject mapping = lowestWithLowest.merge(replacement); createBonds(lowestWithLowest, replacement, mapping, 0, startNeighbors); /* * If replacement is a single atom, no further bonds need to be * created. * single atom -> explicit hydrogens don't count hence remove * them from total atom count. * * if min and max index are equal and replacementAtomCount is * greater than 1, we are replacing a single atom at the "end" of * a molecule with multiple atoms. Therefore only 1 bond must be * formed and that already happend above. * Example: in CC replace C with CC -> C1CC1 but CCC is what we * want. */ int replacementAtomCount = replacement.countAtoms() - (replacement.countHydrogens() - replacement.countImplicitHydrogens()); if (replacementAtomCount > 1 && minIndex != maxIndex) { createBonds(lowestWithLowest, replacement, mapping, replacement.countAtoms() - 1, endNeighbors); } lowestWithLowest.removeAtoms(atomIndices); // map canonical smiles to molecule object // so that each result is unique // note creating canonical smiles can cause an exception // for invalid molecules (but not always!) try { String canSmiles = lowestWithLowest.canonicalSmiles(); molecules.put(canSmiles, lowestWithLowest); logger.debug("\tLowest with Lowest molecule generated: {}", canSmiles); } catch (IndigoException ex) { if (isValidMolecule(lowestWithLowest)) { throw ex; } // else do nothing as invalid molecules are expected and // we do not want to return them } // If replacement is a single atom. there is only 1 possible // result per substructure match. This result has already // been created above! if (replacementAtomCount > 1) { logger.debug("\tGenerating molecule for lowest with highest mapping"); IndigoObject lowestWithHighest = mol.clone(); IndigoObject mapping2 = lowestWithHighest.merge(replacement); createBonds(lowestWithHighest, replacement, mapping2, replacement.countAtoms() - 1, startNeighbors); /* * if min and max index are equal and replacementAtomCount is * greater than 1, we are replacing a single atom at the "end" of * a molecule with multiple atoms. Therefore only 1 bond must be * formed and that already happend above. * Example: in CC replace C with CC -> C1CC1 but CCC is what we * want. */ if (minIndex != maxIndex) { createBonds(lowestWithHighest, replacement, mapping2, 0, endNeighbors); } lowestWithHighest.removeAtoms(atomIndices); // map canonical smiles to molecule object // so that each result is unique // note creating canonical smiles can cause an exception // for invalid molecules (but not always!) try { String canSmiles = lowestWithHighest.canonicalSmiles(); molecules.put(canSmiles, lowestWithHighest); logger.debug("\tLowest with Highest molecule generated: {}", canSmiles); } catch (IndigoException ex) { if (isValidMolecule(lowestWithHighest)) { throw ex; } // else do nothing as invalid molecules are expected and // we do not want to return them } } } } removeInvalidResults(molecules); logger.exit(molecules); return molecules; } /** * Creates bonds of given order between the given neighbors to the mapped * atom of the replacement */ private void createBonds(IndigoObject mol, IndigoObject replacement, IndigoObject mapping, int replacementAtomIndex, HashMap neighbors) { logger.entry(mol, replacement, mapping, replacementAtomIndex, neighbors); for (Map.Entry neighbor : neighbors.entrySet()) { int atomIndex = neighbor.getKey(); int bondOrder = neighbor.getValue(); IndigoObject replacementAtom = mapping.mapAtom(replacement.getAtom(replacementAtomIndex)); replacementAtom.addBond(mol.getAtom(atomIndex), bondOrder); } logger.exit(); } /** * Removes Molecules with bad valence or ambiguous hydrogens * * @param molecules */ private void removeInvalidResults(Map molecules) { logger.entry(molecules); Iterator> iterator = molecules.entrySet().iterator(); while (iterator.hasNext()) { IndigoObject mol = iterator.next().getValue(); if (!isValidMolecule(mol)) { iterator.remove(); } } logger.exit(); } private boolean isValidMolecule(IndigoObject mol) { logger.entry(mol); String valence = mol.checkBadValence(); String ambiguousH = mol.checkAmbiguousH(); if (valence.isEmpty() && ambiguousH.isEmpty()) { logger.exit(true); return true; } else { logger.exit(false); return false; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy