
org.jmol.smiles.SmilesMatcher Maven / Gradle / Ivy
/* $RCSfile$
* $Author: hansonr $
* $Date: 2007-04-26 16:57:51 -0500 (Thu, 26 Apr 2007) $
* $Revision: 7502 $
*
* Copyright (C) 2005 The Jmol Development Team
*
* Contact: [email protected]
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
*/
package org.jmol.smiles;
import javajs.util.AU;
import javajs.util.Lst;
import javajs.util.P3;
import javajs.util.PT;
import org.jmol.api.SmilesMatcherInterface;
import org.jmol.java.BS;
import org.jmol.util.BSUtil;
import org.jmol.util.Edge;
import org.jmol.util.Elements;
import org.jmol.util.Logger;
import org.jmol.util.Node;
import org.jmol.util.Point3fi;
import org.jmol.viewer.JC;
/**
* Originating author: Nicholas Vervelle
*
* A class to handle a variety of SMILES/SMARTS-related functions, including: --
* determining if two SMILES strings are equivalent -- determining the molecular
* formula of a SMILES or SMARTS string -- searching for specific runs of atoms
* in a 3D model -- searching for specific runs of atoms in a SMILES description
* -- generating valid (though not canonical) SMILES and bioSMILES strings --
* getting atom-atom correlation maps to be used with biomolecular alignment
* methods
*
*
* The original SMILES description can been found at the SMILES Home Page.
*
* Specification for this implementation can be found in package.html.
*
*
*
*
*
* public methods:
*
* int areEqual -- checks a SMILES string against a reference (-1 for error; 0 for no finds; >0 for number of finds)
*
* BitSet[] find -- finds one or more occurances of a SMILES or SMARTS string within a SMILES string
*
* int[][] getCorrelationMaps -- returns correlated arrays of atoms
*
* String getLastError -- returns any error that was last encountered.
*
* String getMolecularFormula -- returns the MF of a SMILES or SMARTS string
*
* String getRelationship -- returns isomeric relationship
*
* String getSmiles -- returns a standard SMILES string or a
* Jmol BIOSMILES string with comment header.
*
* BitSet getSubstructureSet -- returns a single BitSet with all found atoms included
*
*
* in Jmol script:
*
* string2.find("SMILES", string1)
* string2.find("SMARTS", string1)
*
* e.g.
*
* print "CCCC".find("SMILES", "C[C]")
*
* select search("smartsString")
*
* All bioSMARTS strings begin with ~ (tilde).
*
*
*
*
* @author Bob Hanson
*
*/
public class SmilesMatcher implements SmilesMatcherInterface {
// internal flags
private final static int MODE_BITSET = 0x01;
private final static int MODE_ARRAY = 0x02;
private final static int MODE_MAP = 0x03;
private static final int MODE_ATROP = 0x04;
@Override
public String getLastException() {
return InvalidSmilesException.getLastError();
}
@Override
public String getMolecularFormula(String pattern, boolean isSmarts)
throws Exception {
InvalidSmilesException.clear();
// note: Jmol may undercount the number of hydrogen atoms
// for aromatic amines where the ring bonding to N is
// not explicit. Each "n" will be assigned a bonding count
// of two unless explicitly indicated as -n-.
// Thus, we take the position that "n" is the
// N of pyridine unless otherwise indicated.
//
// For example:
// $ print "c1ncccc1C".find("SMILES","MF")
// H 7 C 5 N 1 (correct)
// $ print "c1nc-n-c1C".find("SMILES","MF")
// H 6 C 4 N 2 (correct)
// but
// $ print "c1ncnc1C".find("SMILES","MF")
// H 5 C 4 N 2 (incorrect)
SmilesSearch search = SmilesParser.newSearch("/nostereo/"+pattern, isSmarts, true);
search.createTopoMap(null);
search.nodes = search.targetAtoms;
return search.getMolecularFormula(!isSmarts, null, false);
}
/**
* internal to Jmol -- called by org.jmol.Viewer.getSmiles
*/
@Override
public String getSmiles(Node[] atoms, int ac, BS bsSelected,
String bioComment, int flags) throws Exception {
InvalidSmilesException.clear();
return (new SmilesGenerator()).getSmiles(this, atoms, ac, bsSelected, bioComment, flags);
}
@Override
public int areEqual(String smiles1, String smiles2) throws Exception {
InvalidSmilesException.clear();
BS[] result = (BS[]) findPriv(smiles1, SmilesParser.newSearch(smiles2,
false, true), (smiles1.indexOf("*") >= 0 ? JC.SMILES_TYPE_SMARTS
: JC.SMILES_TYPE_SMILES) | JC.SMILES_FIRST_MATCH_ONLY, MODE_ARRAY);
return (result == null ? -1 : result.length);
}
/**
* for JUnit test, mainly
*
* @param smiles
* @param search
* @return true only if the SMILES strings match and there are no errors
* @throws Exception
*/
public boolean areEqualTest(String smiles, SmilesSearch search)
throws Exception {
BS[] ret = (BS[]) findPriv(smiles, search, JC.SMILES_TYPE_SMILES
| JC.SMILES_FIRST_MATCH_ONLY, MODE_ARRAY);
return (ret != null && ret.length == 1);
}
/**
*
* Searches for all matches of a pattern within a SMILES string. If SMILES
* (not isSmarts), requires that all atoms be part of the match.
*
*
* @param pattern
* SMILES or SMARTS pattern.
* @param target
* @param flags
* @return array of correlations of occurances of pattern within smiles
* @throws Exception
*/
@Override
public int[][] find(String pattern, String target, int flags)
throws Exception {
InvalidSmilesException.clear();
target = SmilesParser.cleanPattern(target);
pattern = SmilesParser.cleanPattern(pattern);
// search flags will be set in findPriv
SmilesSearch search = SmilesParser.newSearch(target, false, true); /// smiles chirality is fixed here
int[][] array = (int[][]) findPriv(pattern, search, flags, MODE_MAP);
for (int i = array.length; --i >= 0;) {
int[] a = array[i];
for (int j = a.length; --j >= 0;)
a[j] = ((SmilesAtom) search.targetAtoms[a[j]]).mapIndex;
}
return array;
}
@Override
public String getRelationship(String smiles1, String smiles2)
throws Exception {
if (smiles1 == null || smiles2 == null || smiles1.length() == 0
|| smiles2.length() == 0)
return "";
String mf1 = getMolecularFormula(smiles1, false);
String mf2 = getMolecularFormula(smiles2, false);
if (!mf1.equals(mf2))
return "none";
boolean check;
// note: find smiles1 IN smiles2 here
int n1 = PT.countChar(PT.rep(smiles1, "@@", "@"), '@');
int n2 = PT.countChar(PT.rep(smiles2, "@@", "@"), '@');
check = (n1 == n2 && areEqual(smiles2, smiles1) > 0);
if (!check) {
// MF matched, but didn't match SMILES
String s = smiles1 + smiles2;
if (s.indexOf("/") >= 0 || s.indexOf("\\") >= 0 || s.indexOf("@") >= 0) {
if (n1 == n2 && n1 > 0 && s.indexOf("@SP") < 0) {
// reverse chirality centers
check = (areEqual("/invertstereo/" + smiles2, smiles1) > 0);
if (check)
return "enantiomers";
}
// remove all stereochemistry from SMILES string
check = (areEqual("/nostereo/" + smiles2, smiles1) > 0);
if (check)
return (n1 == n2 ? "diastereomers" : "ambiguous stereochemistry!");
}
// MF matches, but not enantiomers or diastereomers
return "constitutional isomers";
}
return "identical";
}
/**
* Note, this may be incompatible with [$(select(..))]
*
* THIS IS NOT DEPENDABLE. USE /invertStereo/ INSTEAD
*/
@Override
public String reverseChirality(String smiles) {
smiles = PT.rep(smiles, "@@", "!@");
smiles = PT.rep(smiles, "@", "@@");
smiles = PT.rep(smiles, "!@@", "@");
// note -- @@SP does not exist
// smiles = PT.rep(smiles, "@@SP", "@SP");
// smiles = PT.rep(smiles, "@@OH", "@OH");
// smiles = PT.rep(smiles, "@@TP", "@TP");
return smiles;
}
/**
* Returns a bitset matching the pattern within a set of Jmol atoms.
*
* @param pattern
* SMILES or SMARTS pattern.
* @param atoms
* @param ac
* @param bsSelected
* @return BitSet indicating which atoms match the pattern.
*/
@Override
public BS getSubstructureSet(String pattern, Node[] atoms, int ac, BS bsSelected, int flags) throws Exception {
return (BS) matchPriv(pattern, atoms, ac, bsSelected, null, true,
flags | SmilesParser.getFlags(pattern), MODE_BITSET);
}
/**
* called by ForceFieldMMFF.setAtomTypes only
*
*/
@Override
public void getMMFF94AtomTypes(String[] smarts, Node[] atoms, int ac,
BS bsSelected, Lst ret, Lst[] vRings)
throws Exception {
InvalidSmilesException.clear();
SmilesParser sp = new SmilesParser(true, true); // target setting just turns off stereochemistry check
SmilesSearch search = null;
int flags = (JC.SMILES_TYPE_SMARTS | JC.SMILES_AROMATIC_MMFF94);
search = sp.parse("");
search.exitFirstMatch = false;
search.targetAtoms = atoms;
search.targetAtomCount = Math.abs(ac);
search.setSelected(bsSelected);
search.flags = flags;
search.getRingData(vRings, true, true);
search.asVector = false;
search.subSearches = new SmilesSearch[1];
search.getSelections();
BS bsDone = new BS();
for (int i = 0; i < smarts.length; i++) {
if (smarts[i] == null || smarts[i].length() == 0
|| smarts[i].startsWith("#")) {
ret.addLast(null);
continue;
}
search.clear();
search.subSearches[0] = sp.getSubsearch(search,
SmilesParser.cleanPattern(smarts[i]), flags);
BS bs = BSUtil.copy((BS) search.search());
ret.addLast(bs);
bsDone.or(bs);
if (bsDone.cardinality() == ac)
return;
}
}
/**
* Returns a vector of bitsets indicating which atoms match the pattern.
*
* @param pattern
* SMILES or SMARTS pattern.
* @param atoms
* @param ac
* @param bsSelected
* @param bsAromatic
* @return BitSet Array indicating which atoms match the pattern.
* @throws Exception
*/
@Override
public BS[] getSubstructureSetArray(String pattern, Node[] atoms, int ac,
BS bsSelected, BS bsAromatic, int flags)
throws Exception {
return (BS[]) matchPriv(pattern, atoms, ac, bsSelected, bsAromatic, true,
flags, MODE_ARRAY);
}
/**
* called by SmilesParser to get nn in ^nn- base on match to actual structure
* @param pattern
* @param atoms
* @param ac
* @param bsSelected
* @param bsAromatic
* @param flags
* @return string of nn,nn,nn,nn
* @throws Exception
*/
public String getAtropisomerKeys(String pattern, Node[] atoms, int ac,
BS bsSelected, BS bsAromatic, int flags)
throws Exception {
return (String) matchPriv(pattern, atoms, ac, bsSelected, bsAromatic, false,
flags, MODE_ATROP);
}
/**
* Generate a topological SMILES string from a set of faces
*
* @param faces
* @param atomCount
*
* @return topological SMILES string
* @throws Exception
*/
@Override
public String polyhedronToSmiles(Node center, int[][] faces, int atomCount,
P3[] points, int flags, String details)
throws Exception {
SmilesAtom[] atoms = new SmilesAtom[atomCount];
for (int i = 0; i < atomCount; i++) {
atoms[i] = new SmilesAtom();
P3 pt = (points == null ? null : points[i]);
if (pt instanceof Node) {
atoms[i].elementNumber = ((Node) pt).getElementNumber();
atoms[i].bioAtomName = ((Node) pt).getAtomName();
atoms[i].atomNumber = ((Node) pt).getAtomNumber();
atoms[i].setT(pt);
} else {
atoms[i].elementNumber = (pt instanceof Point3fi ? ((Point3fi) pt).sD
: -2);
}
atoms[i].index = i;
}
int nBonds = 0;
for (int i = faces.length; --i >= 0;) {
int[] face = faces[i];
int n = face.length;
int iatom, iatom2;
for (int j = n; --j >= 0;) {
if ((iatom = face[j]) >= atomCount
|| (iatom2 = face[(j + 1) % n]) >= atomCount)
continue;
if (atoms[iatom].getBondTo(atoms[iatom2]) == null) {
SmilesBond b = new SmilesBond(atoms[iatom], atoms[iatom2],
Edge.BOND_COVALENT_SINGLE, false);
b.index = nBonds++;
}
}
}
for (int i = 0; i < atomCount; i++) {
int n = atoms[i].bondCount;
if (n == 0 || n != atoms[i].bonds.length)
atoms[i].bonds = (SmilesBond[]) AU.arrayCopyObject(atoms[i].bonds, n);
}
String s = null;
SmilesGenerator g = new SmilesGenerator();
if (points != null)
g.polySmilesCenter = (P3) center;
InvalidSmilesException.clear();
s = g.getSmiles(this, atoms, atomCount, BSUtil.newBitSet2(0, atomCount),
null, flags | JC.SMILES_GEN_EXPLICIT_H | JC.SMILES_NO_AROMATIC
| JC.SMILES_IGNORE_STEREOCHEMISTRY);
if ((flags & JC.SMILES_GEN_POLYHEDRAL) == JC.SMILES_GEN_POLYHEDRAL) {
s = "//* " + center + " *//\t["
+ Elements.elementSymbolFromNumber(center.getElementNumber()) + "@PH"
+ atomCount + (details == null ? "" : "/" + details + "/") + "]." + s;
}
return s;
}
/**
* Rather than returning bitsets, this method returns the sets of matching
* atoms in array form so that a direct atom-atom correlation can be made.
*
* @param pattern
* SMILES or SMARTS pattern.
* @param atoms
* @param bsSelected
* @return a set of atom correlations
*
*/
@Override
public int[][] getCorrelationMaps(String pattern, Node[] atoms, int atomCount,
BS bsSelected, int flags) throws Exception {
return (int[][]) matchPriv(pattern, atoms, atomCount, bsSelected, null, true,
flags, MODE_MAP);
}
/////////////// private methods ////////////////
private Object findPriv(String pattern, SmilesSearch search, int flags, int mode)
throws Exception {
// create a topological model set from smiles
// do not worry about stereochemistry -- this
// will be handled by SmilesSearch.setSmilesCoordinates
BS bsAromatic = new BS();
search.setFlags(search.flags | SmilesParser.getFlags(pattern));
search.createTopoMap(bsAromatic);
return matchPriv(pattern, search.targetAtoms, -search.targetAtoms.length,
null, bsAromatic, bsAromatic.isEmpty(), flags, mode);
}
@SuppressWarnings({ "unchecked" })
private Object matchPriv(String pattern, Node[] atoms, int ac, BS bsSelected,
BS bsAromatic, boolean doTestAromatic, int flags,
int mode) throws Exception {
InvalidSmilesException.clear();
try {
boolean isSmarts = ((flags & JC.SMILES_TYPE_SMARTS) == JC.SMILES_TYPE_SMARTS);
// Note that additional flags are set when the pattern is parsed.
SmilesSearch search = SmilesParser.newSearch(pattern, isSmarts, false);
if (!isSmarts && !search.patternAromatic) {
if (bsAromatic == null)
bsAromatic = new BS();
SmilesSearch.normalizeAromaticity(search.patternAtoms, bsAromatic,
search.flags);
search.isNormalized = true;
}
search.targetAtoms = atoms;
search.targetAtomCount = Math.abs(ac);
if (ac < 0)
search.haveTopo = true;
if (ac != 0 && (bsSelected == null || !bsSelected.isEmpty())) {
boolean is3D = !(atoms[0] instanceof SmilesAtom);
search.setSelected(bsSelected);
search.getSelections();
if (!doTestAromatic)
search.bsAromatic = bsAromatic;
search.setRingData(null, null, is3D || doTestAromatic);
search.exitFirstMatch = ((flags & JC.SMILES_FIRST_MATCH_ONLY) == JC.SMILES_FIRST_MATCH_ONLY);
}
switch (mode) {
case MODE_BITSET:
search.asVector = false;
return search.search();
case MODE_ARRAY:
search.asVector = true;
Lst vb = (Lst) search.search();
return vb.toArray(new BS[vb.size()]);
case MODE_ATROP:
search.exitFirstMatch = true;
search.setAtropicity = true;
search.search();
return search.atropKeys;
case MODE_MAP:
search.getMaps = true;
search.setFlags(flags | search.flags); // important for COMPARE command - no stereochem
Lst vl = (Lst) search.search();
return vl.toArray(AU.newInt2(vl.size()));
}
} catch (Exception e) {
if (Logger.debugging)
e.printStackTrace();
if (InvalidSmilesException.getLastError() == null)
InvalidSmilesException.clear();
throw new InvalidSmilesException(InvalidSmilesException.getLastError());
}
return null;
}
@Override
public String cleanSmiles(String smiles) {
return SmilesParser.cleanPattern(smiles);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy