
org.bitbucket.kienerj.indigoutils.SaltRemover Maven / Gradle / Ivy
Show all versions of indigo-utils Show documentation
/*
* Copyright (C) 2013 Joos Kiener
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package org.bitbucket.kienerj.indigoutils;
import com.ggasoftware.indigo.Indigo;
import com.ggasoftware.indigo.IndigoException;
import com.ggasoftware.indigo.IndigoObject;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import org.slf4j.ext.XLogger;
import org.slf4j.ext.XLoggerFactory;
/**
* Class for stripping salts.
*
* The implementation is straight-forward port of the RDKits SaltRemover
* class. Given same molecules the result will be the same for this and the
* RDKits class.
*
*
*
* Examples with default salts:
*
* Simple:
* CN(C)C.Cl -> CN(C)C
*
* Removes all Salts:
* CN(C)C.Cl.Cl.Br -> CN(C)C
*
* Salt-like molecules remain unaffected:
* CN(Br)Cl -> CN(Br)Cl
* CN(Br)Cl.Cl -> CN(Br)Cl
*
* Charge-independent:
* C[NH+](C)(C).[Cl-] -> C[NH+](C)(C)
*
* Everything removed:
* CC(=O)O.[Na] ->
*
* To avoid above issue,
* keepLastFragment
must be set to true: CC(=O)O.[Na] -> CC(=O)O
* Cl.Cl -> Cl.Cl
*
*
*
* You may pass your own list of salts. This will change which fragments are
* removed. If you simply want to add additional salts you can do:
*
*
*
* List salts = SaltRemover.getDefaultSalts(); // static method
* salts.add(mySaltPattern);
* SaltRemover saltRemover = new SaltRemover(indigo, salts);
*
*
Note that the order of salts in the list matters if
* keepLastFragment
is
* true
. The latest fragment in the list will be be the remaining
* one.
*
* @author Joos Kiener
*/
public class SaltRemover {
private static final XLogger logger = XLoggerFactory.getXLogger("SaltRemover");
private static List defaultSaltPatterns;
private final SubstructureRemover subStructRemover;
private final List salts;
/**
* Use
* SaltRemover
with default salts.
*
* @param indigo the main Indigo object used for all molecules submitted to
* SaltRemover
*/
public SaltRemover(Indigo indigo) {
this.subStructRemover = new SubstructureRemover(indigo);
this.salts = new ArrayList<>();
initializeSalts();
for (String pattern : defaultSaltPatterns) {
IndigoObject salt = indigo.loadQueryMolecule(pattern);
salts.add(salt);
}
}
/**
* Use
* SaltRemover
with your custom salts.
*
* @param indigo the main Indigo object used for all molecules submitted to
* SaltRemover
*/
public SaltRemover(Indigo indigo, List saltPatterns) {
this.subStructRemover = new SubstructureRemover(indigo);
this.salts = new ArrayList<>();
for (String pattern : saltPatterns) {
IndigoObject salt = indigo.loadQueryMolecule(pattern);
salts.add(salt);
}
}
private static void initializeSalts() {
// see RDKit/Data/Salts.txt
// Copyright (c) 2010, Novartis Institutes for BioMedical Research Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following
// disclaimer in the documentation and/or other materials provided
// with the distribution.
// * Neither the name of Novartis Institutes for BioMedical Research Inc.
// nor the names of its contributors may be used to endorse or promote
// products derived from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
if (defaultSaltPatterns == null) {
logger.debug("Initializing Salt Patterns...");
defaultSaltPatterns = new ArrayList<>();
// simple inorganics
defaultSaltPatterns.add("[Cl,Br,I]");
defaultSaltPatterns.add("[Li,Na,K,Ca,Mg]");
defaultSaltPatterns.add("[O,N]");
// "complex" inorganics
defaultSaltPatterns.add("[N](=O)(O)O");
defaultSaltPatterns.add("[P](=O)(O)(O)O");
defaultSaltPatterns.add("[P](F)(F)(F)(F)(F)F");
defaultSaltPatterns.add("[S](=O)(=O)(O)O");
defaultSaltPatterns.add("[CH3][S](=O)(=O)(O)");
defaultSaltPatterns.add("c1cc([CH3])ccc1[S](=O)(=O)(O)");
// organics
defaultSaltPatterns.add("[CH3]C(=O)O");
defaultSaltPatterns.add("FC(F)(F)C(=O)O");
defaultSaltPatterns.add("OC(=O)C=CC(=O)O");
defaultSaltPatterns.add("OC(=O)C(=O)O");
defaultSaltPatterns.add("OC(=O)C(O)C(O)C(=O)O");
defaultSaltPatterns.add("C1CCCCC1[NH]C1CCCCC1");
}
}
/**
* Returns a copy of the List of default salts used.
*
* @return
*/
public static List getDefaultSalts() {
initializeSalts();
return new ArrayList<>(defaultSaltPatterns);
}
/**
* Strips salts from the given molecule
* mol
.
*
* If
* keepLastFragment
is true, the last remaining fragment will
* not be removed. If it is false, it's possible an empty molecules is
* returned.
*
* The returned object contains the stripped molecule and a mapping of
* all fragments removed as SMILES and how often it was removed.
*
*
* Example:
*
* CN(C)C.Cl.Cl.Br -> CN(C)C
*
* removed Fragments:
*
* Cl -> 2
* Br -> 1
*
*
*
* @param mol the molecules to strip salts from
* @param keepLastFragment whether to keep at least 1 remaining fragment or
* not
* @return the stripped molecules and the removed salts
*/
public FragmentRemovalResult stripMol(IndigoObject mol, boolean keepLastFragment) {
logger.entry(mol, keepLastFragment);
HashMap removedFragments = new HashMap<>();
if (keepLastFragment && mol.countComponents() <= 1) {
FragmentRemovalResult result = new FragmentRemovalResult(mol, removedFragments);
logger.exit(result);
return result;
}
boolean modified = false;
for (IndigoObject salt : salts) {
IndigoObject tempMol = applyPattern(mol, salt, removedFragments,
keepLastFragment);
if (!tempMol.equals(mol)) { // compares for same instance
mol = tempMol;
modified = true;
if (keepLastFragment && mol.countComponents() <= 1) {
break;
}
}
}
if (modified && mol.countAtoms() > 0) {
String valence = mol.checkBadValence();
String ambiguousH = mol.checkAmbiguousH();
if (!valence.isEmpty() || !ambiguousH.isEmpty()) {
String message = valence;
message = ambiguousH.isEmpty() ? message : message + " " + ambiguousH;
throw new IndigoException(mol, message);
}
}
// Assumption: If all counter ions are stripped, all remaining
// components are identical. Hence ratio is equal to amount of components
// in the stripped molecule and we just pick the first component as
// structure.
// Example: trimagnesiumphosphate were 2 phosphate groups remain.
// Could be improved to actually look at the remaining components
int ratio = mol.countComponents();
IndigoObject mainComponent;
// in case all fragments are removed and mol is empty
// in that case mol.component(0).clone() throws exception
if (ratio < 1) {
mainComponent = mol;
} else {
mainComponent = mol.component(0).clone();
}
FragmentRemovalResult result = new FragmentRemovalResult(mainComponent, ratio, removedFragments);
logger.exit(result);
return result;
}
private IndigoObject applyPattern(IndigoObject mol, IndigoObject salt,
HashMap removedFragments,
boolean keepLastFragment) {
logger.entry(mol, salt, removedFragments, keepLastFragment);
int nrOfAtoms = mol.countAtoms();
if (nrOfAtoms == 0) {
return mol;
}
IndigoObject molClone = mol.clone();
FragmentRemovalResult removalResult = subStructRemover.removeFragment(molClone, salt);
IndigoObject result = removalResult.getMol();
if (keepLastFragment && result.countAtoms() == 0) {
logger.exit(mol);
return mol;
} else {
removedFragments.putAll(removalResult.getRemovedFragments());
logger.debug("Removed salt from {}.", mol.canonicalSmiles());
logger.exit(result);
return result;
}
}
}