
org.bitbucket.kienerj.indigoutils.SubstructureRemover Maven / Gradle / Ivy
Show all versions of indigo-utils Show documentation
/*
* Copyright (C) 2013 Joos Kiener
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package org.bitbucket.kienerj.indigoutils;
import com.ggasoftware.indigo.Indigo;
import com.ggasoftware.indigo.IndigoObject;
import java.util.HashMap;
import java.util.HashSet;
import org.slf4j.ext.XLogger;
import org.slf4j.ext.XLoggerFactory;
/**
* Removes all instances of a given query molecule (can be SMARTS pattern)
* from a target molecule.
*
* The
* removeFragment()
-methods only remove the pattern if it fully
* matches a fragment/component within the target molecule.
*
* @author Joos Kiener
*/
public class SubstructureRemover {
private static final XLogger logger = XLoggerFactory.getXLogger("SubstructureRemover");
private Indigo indigo;
public SubstructureRemover(Indigo indigo) {
this.indigo = indigo;
}
/**
* Removes all occurrences of
* queryMol
from
* mol
.
*
* The passed in
* mol
remains unchanged. The method returns a copy of
* mol
form which all occurrences of
* queryMol
have been removed
*
* Note that the result can contain disconnected components if the
* removed substructure is in the middle of the molecule:
*
* CCOCCl remove CO -> C.Cl
*
* @param mol the molecule to remove the substructure from
* @param queryMol the substructure to remove
* @return a copy of mol
with queryMol
removed
*/
public IndigoObject removeSubstructure(IndigoObject mol, IndigoObject queryMol) {
return removeSubstructure(mol, queryMol, null);
}
/**
* Removes all occurrences of
* queryMol
from
* mol
.
*
* The passed in
* mol
remains unchanged. The method returns a copy of
* mol
form which all occurrences of
* queryMol
have been removed
*
* Note that the result can contain disconnected components if the
* removed substructure is in the middle of the molecule:
*
* CCOCCl remove CO -> C.Cl
*
* For
* options
please see the Indigo
* API Manual. Possible values are "TAU" (Tautomer search) or "RES"
* (resonance structure search).
*
* @param mol the molecule to remove the substructure from
* @param queryMol the substructure to remove
* @param options options used for substructure matching
* @return a copy of mol
with queryMol
removed
*/
public IndigoObject removeSubstructure(IndigoObject mol, IndigoObject queryMol,
String options) {
logger.entry(mol, queryMol, options);
IndigoObject clone = mol.clone();
IndigoObject matcher = indigo.substructureMatcher(clone, options);
IndigoObject matches = matcher.iterateMatches(queryMol);
if (matches != null) {
HashSet atomIndices = new HashSet<>();
for (IndigoObject match : matches) {
for (IndigoObject atom : queryMol.iterateAtoms()) {
IndigoObject mappedAtom = match.mapAtom(atom);
if (mappedAtom != null) {
atomIndices.add(mappedAtom.index());
}
}
}
clone.removeAtoms(atomIndices);
logger.debug("Removed substructure from {}.", mol.canonicalSmiles());
}
logger.exit(clone);
return clone;
}
/**
* See
* {@link #removeFragment(IndigoObject, IndigoObject) removeFragment()}-
* method. This is a convenience method if you are not interested in the
* fragments that were removed.
*
* @param mol the molecule to remove the fragment from
* @param fragment the fragment to remove
* @return a molecule with all instances of fragment removed
*/
public IndigoObject removeFragmentSimple(IndigoObject mol, IndigoObject fragment) {
return removeFragment(mol, fragment, null).getMol();
}
/**
* Removes all fragments/components from
* mol
which fully match
* fragment
.
*
* The passed in
* mol
remains unchanged. The method returns a copy of
* mol
form which all occurrences of
* fragment
have been removed and a Map of the removed
* fragments and how often it was removed.
*
* Fragment or components are disconnected from the rest of the molecule
* like in ionic bonds (salts).
*
*
* Example: C[NH+](C)(C).[Cl-]
*
*
where the fragments are separated by a dot ".". Removing fragment
* [Cl-] (and [Cl]) will return C[NH+](C)(C). Removing fragment [NH+] would
* not change anything and return C[NH+](C)(C).[Cl-] as [NH+] is only part
* of a fragment and not a whole fragment. Note that
* fragment
[Cl] will match Cl and [Cl-].
*
* This mimics the RDKits
* DeleteSubstructs
where
* onlyFrags
flag is set to true. See RDKit
* API Documentation.
*
* @param mol the molecule to remove the substructure from
* @param fragment the fragment to remove
* @return a
* {@link org.bitbucket.kienerj.indigoutils.FragmentRemovalResult FragmentRemovalResult}
* containing the new molecule and removed fragments
*/
public FragmentRemovalResult removeFragment(IndigoObject mol, IndigoObject fragment) {
return removeFragment(mol, fragment, null);
}
/**
* Removes all fragments/components from
* mol
which fully match
* fragment
. Fully means that the pattern matches and that the
* pattern and fragment have the same number of atoms.
*
* The passed in
* mol
remains unchanged. The method returns a copy of
* mol
form which all occurrences of
* fragment
have been removed and a Map of the removed
* fragments and how often it was removed. If fragment
is not
* found, then the returned result will contain mol
.
*
* Fragment or components are disconnected from the rest of the molecule
* like in ionic bonds (salts).
*
*
* Example: C[NH+](C)(C).[Cl-]
*
*
where the fragments are separated by a dot ".". Removing fragment
* [Cl-] (and [Cl]) will return C[NH+](C)(C). Removing fragment [NH+] would
* not change anything and return C[NH+](C)(C).[Cl-] as [NH+] is only part
* of a fragment and not a whole fragment. Note that
* fragment
[Cl] will match Cl and [Cl-].
*
* For
* options
please see the Indigo
* API Manual. Possible values are "TAU" (Tautomer search) or "RES"
* (resonance structure search).
*
* This mimics the RDKits
* DeleteSubstructs
where
* onlyFrags
flag is set to true. See RDKit
* API Documentation.
*
* @param mol the molecule to remove the substructure from
* @param fragment the fragment to remove
* @return a
* {@link org.bitbucket.kienerj.indigoutils.FragmentRemovalResult FragmentRemovalResult}
* containing the new molecule and removed fragments
*/
public FragmentRemovalResult removeFragment(IndigoObject mol, IndigoObject fragment,
String options) {
/*
* Note that this method uses substructure search so that fragments
* match regardless of charge eg. fragment [Cl] will match Cl and [Cl-].
*
* This is not possible with exact matcher. This mimics the RDKits
* DeleteSubstructs where onlyFrags flag is set to true.
*
*/
logger.entry(mol, fragment, options);
HashMap removedFragments = new HashMap<>();
IndigoObject result = indigo.createMolecule();
boolean modified = false;
for (IndigoObject component : mol.iterateComponents()) {
IndigoObject componentClone = component.clone();
// if atom count is the same, the whole fragment must match
// and thats exactly the desired behavior
if (componentClone.countAtoms() == fragment.countAtoms()) {
IndigoObject matcher = indigo.substructureMatcher(componentClone, options);
IndigoObject match = matcher.match(fragment);
if (match == null) {
//no match hence we want to keep the fragment
result.merge(componentClone);
} else {
modified = true;
// match, this fragment will be removed
// (it's not actually removed but not added to the new molecule)
String componentsSmiles = componentClone.smiles();
Integer count = removedFragments.get(componentsSmiles);
count = count == null ? 0 : count;
removedFragments.put(componentClone.smiles(), count + 1);
logger.debug("Fragment removed from {}.", mol.canonicalSmiles());
}
} else {
//no match hence we want to keep the fragment
result.merge(componentClone);
}
}
FragmentRemovalResult removalResult;
if (modified) {
removalResult = new FragmentRemovalResult(result, removedFragments);
} else {
removalResult = new FragmentRemovalResult(mol, removedFragments);
}
logger.exit(removalResult);
return removalResult;
}
}