All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.xmlcml.cml.tools.Morgan Maven / Gradle / Ivy

/**
 *    Copyright 2011 Peter Murray-Rust et. al.
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */

package org.xmlcml.cml.tools;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import nu.xom.Attribute;

import org.apache.log4j.Logger;
import org.xmlcml.cml.base.AbstractTool;
import org.xmlcml.cml.base.CMLConstants;
import org.xmlcml.cml.element.CMLAtom;
import org.xmlcml.cml.element.CMLAtomSet;
import org.xmlcml.cml.element.CMLLabel;
import org.xmlcml.cml.element.CMLMolecule;
import org.xmlcml.euclid.Util;
import org.xmlcml.molutil.ChemicalElement;

/**
 * class to apply Morgan algorithm.
 * 
 * 
 *  The simple Morgan algorithm recursively adds neighbouring connectivity until the 
 *  number of equivalence classes no longer changes. Thus methyl cyclopropane
 *  a1 hydrogenCount = 3
 *  a2 hydrogenCount = 1
 *  a3 hydrogenCount = 2
 *  a4 hydrogenCount = 2
 *  a1-a2
 *  a2-a3
 *  a2-a4
 *  a3-a4
 *  gives equivalence classes for Carbons:
 *  a1, a2, (a3, a4)
 *  (each described by a unique number)
 *  the result is an ordered list of equivalence classes. Each class has an atomSet of the atoms 
 *  in that class. To map one molecule to another the equivalenceClassList for each is calculated and 
 *  then the atoms in each class can be mapped
 * 
* * the output is to a CMLMap. * * @author pm286 Copyright P.Murray-Rust, 29-May-2005 Artistic license * */ public class Morgan extends AbstractTool { private static Logger LOG = Logger.getLogger(Morgan.class); public static String EQUIVALENCE_ATTRIBUTE = "equivalenceString"; /** decides on type of algorithm. * no choice at present * @author pm286 * */ public enum Algorithm { /** simple morgan algorithm; returns equivalence classes */ SIMPLE, /** tries to split equivalence classes */ SPLIT; } /** annotation for atoms. * * @author pm286 * */ public enum Annotation { /** marked for equivalence class */ MARKED, /** existing morgan numbers */ NUMBER, /** accumulator for next numbers; recursively transferredd to MORGAN0 */ NEXTNUMBER; } final static Logger logger = Logger.getLogger(Morgan.class.getName()); private static int MAX_ITER = 10; private Algorithm algorithm = null; private CMLAtomSet constantAtomSet; private CMLMolecule molecule; private List morganList = null; private Map equivalenceMap = null; private int nClasses = -1; private List markedAtomsList = null; private String equivalenceString; private List atomSetList; private List childMorganList; /** * constructor * * @param molecule */ public Morgan() { init(); } /** * constructor * * @param molecule * @deprecated ("Use setMolecule()") */ public Morgan(CMLMolecule molecule) { this(); setMolecule(molecule); } public void setMolecule(CMLMolecule molecule) { List childMoleculeList = CMLMolecule.getChildMoleculeList(molecule); if (molecule == null) { throw new RuntimeException("Null molecule"); } else if (childMoleculeList.size() > 0) { childMorganList = new ArrayList(); for (CMLMolecule childMolecule : childMoleculeList) { childMorganList.add(new Morgan(childMolecule)); } } else { this.molecule = molecule; this.constantAtomSet = MoleculeTool.getOrCreateTool(molecule).getAtomSet(); } } void init() { this.algorithm = Algorithm.SIMPLE; clean(); equivalenceString = null; molecule = null; } private void clean() { equivalenceMap = new HashMap(); morganList = null; } /** * constructor. * * @param atomSet */ public Morgan(CMLAtomSet atomSet) { this.constantAtomSet = new CMLAtomSet(atomSet); init(); } /** * set algorithm. * @param alg */ public void setAlgorithm(Algorithm alg) { algorithm = alg; } /** get map between Morgan numbers and atomSets (equivalences). * * @return map indexed by number */ public Map getEquivalenceMap() { return equivalenceMap; } public String getEquivalenceString() { ensureMorganList(); return calculateEquivalenceString(); } private String calculateEquivalenceString() { equivalenceString = CMLConstants.S_EMPTY; if (childMorganList != null) { StringBuilder sb = new StringBuilder(); sb.append(CMLConstants.S_LCURLY); for (int i = 0; i < childMorganList.size(); i++) { if (i > 0) { sb.append(CMLConstants.S_SEMICOLON); } sb.append(childMorganList.get(i).getEquivalenceString()); } sb.append(CMLConstants.S_RCURLY); equivalenceString = sb.toString(); } else { List longList = getSortedLongList(); for (Long longx : longList) { CMLAtomSet atomSet = equivalenceMap.get(longx); equivalenceString += longx+atomSet.getAtoms().get(0).getElementType()+atomSet.size()+"/"; } } return equivalenceString; } private List getSortedLongList() { ensureMorganList(); List longList = new ArrayList(); for (Long longx : equivalenceMap.keySet()) { longList.add(longx); } Collections.sort(longList); return longList; } public void addAtomAndMoleculeLabels() { ensureMorganList(); List longList = getSortedLongList(); for (Long longx : longList) { CMLAtomSet atomSet = equivalenceMap.get(longx); for (CMLAtom atom : atomSet.getAtoms()) { CMLLabel label = new CMLLabel(); label.addAttribute(new Attribute("role", "cml:morgan")); label.setCMLValue(""+longx); atom.addLabel(label); } } if (molecule != null) { getEquivalenceString(); CMLLabel label = new CMLLabel(); label.addAttribute(new Attribute("role", "cml:morgan")); label.setCMLValue(equivalenceString); molecule.addLabel(label); } } /** get list of atoms required to break equivalence classes. * should be null unless SPLIT is used. * @return list of marked atoms (or null) */ public List getMarkedAtomList() { return markedAtomsList; } /** * return ordered (by atoms in molecule or set) list of morganNumbers * * @return list of integers */ public List getMorganList() { ensureMorganList(); return morganList; } /** \ * return ordered (by atoms in molecule or set) list of morganNumbers * only returns atoms with element types in list. Will not return empty sets * * @return list of integers */ public List getMorganListIncluding(String[] elementTypes) { morganListExcludeInclude(elementTypes, true); return morganList; } /** * return ordered (by atoms in molecule or set) list of atomSets * only returns atoms with element types in list. Will not return empty sets * * @return list of integers */ public List getAtomSetListIncluding(String[] elementTypes) { morganListExcludeInclude(elementTypes, true); return getAtomSetList(); } private void morganListExcludeInclude(String[] elementTypes, boolean include) { ensureMorganList(); atomSetList = this.getAtomSetList(); morganList = this.getMorganList(); List newAtomSetList = new ArrayList(); List newMorganList = new ArrayList(); for (int i = 0; i < atomSetList.size(); i++) { CMLAtomSet atomSet = atomSetList.get(i); CMLAtomSet newAtomSet = (include == true) ? atomSet.includeElementTypes(elementTypes) : atomSet.excludeElementTypes(elementTypes); if (newAtomSet.size() > 0) { newAtomSetList.add(atomSet); newMorganList.add(morganList.get(i)); } } morganList = newMorganList; atomSetList = newAtomSetList; } /** * return ordered (by atoms in molecule or set) list of morganNumbers * only returns atoms with element types not in list. Will not return empty sets * * @return list of integers */ public List getMorganListExcluding(String[] elementTypes) { morganListExcludeInclude(elementTypes, false); return morganList; } /** * return ordered (by atoms in molecule or set) list of atomSets * only returns atoms with element types not in list. Will not return empty sets * * @return list of atomSets */ public List getAtomSetListExcluding(String[] elementTypes) { morganListExcludeInclude(elementTypes, false); return getAtomSetList(); } private void ensureMorganList() { if (morganList == null) { morganList = new ArrayList(); // iterate until number of equivalence classes is constant iterateTillConstantEquivalenceClassCount(); // if (algorithm == Algorithm.SPLIT) { repeatedlySplitEquivalences(); } } } private void calculateMorganNumbers() { if (constantAtomSet != null) { for (CMLAtom atom : constantAtomSet.getAtoms()) { // start with ligand count for each atom atom.setProperty(Annotation.NUMBER.toString(), new Long(Morgan .getNumber(atom, constantAtomSet))); } } } /** get largest atomSet. * only counts those with more than 1 atom. * @return largest set or null (if all sets are of size 1 */ private CMLAtomSet getLargestAtomSet() { CMLAtomSet atomSet = null; for (Long ii : equivalenceMap.keySet()) { CMLAtomSet atomSet0 = equivalenceMap.get(ii); if (atomSet0.size() <= 1) { // } else if (atomSet == null) { atomSet = atomSet0; } else if (atomSet0.size() > atomSet.size()) { atomSet = atomSet0; } } return atomSet; } private void addMarkedAtom(CMLAtom markedAtom) { if (markedAtomsList == null) { markedAtomsList = new ArrayList(); } markedAtomsList.add(markedAtom); markedAtom.setProperty(Annotation.MARKED.toString(), new Long(markedAtomsList.size())); } private void iterateTillConstantEquivalenceClassCount() { calculateMorganNumbers(); int newClassCount = -1; while (true) { clean(); newClassCount = classifyMorganNumbers(); if (newClassCount <= nClasses) { break; } nClasses = newClassCount; expandMorganNumbers(); } } private void repeatedlySplitEquivalences() { int count = 0; while (true) { CMLAtomSet atomSet = getLargestAtomSet(); if (atomSet == null) { break; } if (count++ > MAX_ITER) { break; } CMLAtom markedAtom = atomSet.getAtom(0); addMarkedAtom(markedAtom); iterateTillConstantEquivalenceClassCount(); } } /** returns ordered list (by morgan numbers) of atomSets. * each element is an atomSet with one or more atoms * * @return the list of ordered atomSets */ public List getAtomSetList() { ensureMorganList(); atomSetList = new ArrayList(); for (int i = 0; i < morganList.size(); i++) { Long morganNumber = morganList.get(i); CMLAtomSet atomSet = equivalenceMap.get(morganNumber); atomSetList.add(atomSet); } return atomSetList; } /** puts morgan numbers into equivalence classes. * * @return number of classes */ private int classifyMorganNumbers() { if (constantAtomSet != null) { for (CMLAtom atom : constantAtomSet.getAtoms()) { Long morganNumber = (Long) atom.getProperty(Annotation.NUMBER.toString()); CMLAtomSet atomSet = equivalenceMap.get(morganNumber); if (atomSet == null) { atomSet = new CMLAtomSet(); addMorganNumber(morganNumber, atomSet); } atomSet.addAtom(atom); } if (morganList == null) { // LOG.error("null morganList"); } else { Collections.sort(morganList); } } return equivalenceMap.size(); } private void addMorganNumber(Long morganNumber, CMLAtomSet atomSet) { if (equivalenceMap == null) { equivalenceMap = new HashMap(); } equivalenceMap.put(morganNumber, atomSet); if (morganList == null) { morganList = new ArrayList(); } morganList.add(morganNumber); } private static List getLigandAtomsInAtomSetList(CMLAtom atom, CMLAtomSet atomSet) { List ligandsInAtomSetList = new ArrayList(); for (CMLAtom ligand : atom.getLigandAtoms()) { if (atomSet.contains(ligand)) { ligandsInAtomSetList.add(ligand); } } return ligandsInAtomSetList; } /** expand shell by adding numbers from ligands * */ private void expandMorganNumbers() { if (constantAtomSet != null) { for (CMLAtom atom : constantAtomSet.getAtoms()) { // iterate through all ligands long newMorgan = ((Long) atom.getProperty(Annotation.NUMBER.toString())).longValue(); List ligandList = Morgan.getLigandAtomsInAtomSetList(atom, constantAtomSet); int ii = 0; for (CMLAtom ligand : ligandList) { Long llong = (Long) ligand.getProperty(Annotation.NUMBER.toString()); if (llong == null) { System.err.println(ligand.getId()+"/"+ii); throw new RuntimeException("null long in morgan"); } ii++; newMorgan += llong.longValue(); } atom.setProperty(Annotation.NEXTNUMBER.toString(), new Long(newMorgan)); } // transfer new morgan values to old for (CMLAtom atom : constantAtomSet.getAtoms()) { atom.setProperty(Annotation.NUMBER.toString(), atom.getProperty(Annotation.NEXTNUMBER.toString())); } } } /** prints debug. * @param message */ public void debug(String message) { if (morganList == null) { LOG.warn("NO atoms in list"); } else { Util.println("==========="+message+"=========="); constantAtomSet.debug("MORGAN"); for (int i = 0; i < morganList.size(); i++) { if (i % 5 == 0) { Util.println(); } Long morganNumber = (Long) morganList.get(i); @SuppressWarnings("unused") CMLAtomSet atomSet = (CMLAtomSet) equivalenceMap.get(morganNumber); Util.println(" I "+morganNumber); } } Util.println(); } /** calculate number for atom. * * @param atom * @return int */ private static long getNumber(CMLAtom atom, CMLAtomSet atomSet) { ChemicalElement chemicalElement = ChemicalElement .getChemicalElement(atom.getElementType()); int atomicNumber = (chemicalElement == null) ? 0 : chemicalElement .getAtomicNumber(); List ligandList = Morgan.getLigandAtomsInAtomSetList(atom, atomSet); int nLigand = ligandList.size(); int np = 10; long morgan = nLigand; // atomic number morgan = atomicNumber + (Util.getPrime(np++) * morgan); // hydrogen count morgan = atom.getHydrogenCount() + (Util.getPrime(np++) * morgan); // formal charge morgan = 4 + atom.getFormalCharge() + (Util.getPrime(np++) * morgan); // property Long property = (Long) atom.getProperty(Annotation.MARKED.toString()); int propertyValue = (property == null) ? 0 : property.hashCode(); morgan = Util.getPrime(propertyValue) + (Util.getPrime(np++) * morgan); // uses labels as disambiguation CMLLabel label = (CMLLabel) atom.getFirstCMLChild(CMLLabel.TAG); String labelS = (label == null) ? CMLConstants.S_EMPTY : label.getValue(); @SuppressWarnings("unused") int labelHash = labelS.hashCode() % 1000; // and properties return morgan; } /** * * @param groupSMILESString of form "[R]CNOSCC" with singly liganded R-group * @return */ public static String createMorganStringFromSMILES(String smiles) { String morganString = null; if (smiles != null) { CMLMolecule smilesMolecule = SMILESTool.createMolecule(smiles); morganString = createMorganStringFromMolecule(smilesMolecule); } return morganString; } public static String createMorganStringFromMolecule(CMLMolecule molecule) { String s = null; if (molecule != null) { Morgan morgan = new Morgan(); morgan.setMolecule(molecule); try { s = morgan.getEquivalenceString(); } catch (Exception e) { throw new RuntimeException("BUG? maybe multiple molecules: ", e); } } return s; } /** * * @param groupSMILESString of form "[R]CNOSCC" with singly liganded R-group * @return */ public static String createMorganStringFromSMILESRGroup(String groupSMILESString) { String morganString = null; if (groupSMILESString != null) { CMLMolecule smilesMolecule = SMILESTool.createMolecule(groupSMILESString); morganString = createMorganStringFromRGroupMolecule(smilesMolecule); } return morganString; } public static String createMorganStringFromRGroupMolecule(CMLMolecule molecule) { String morganString = null; CMLAtomSet rAtomSet = AtomSetTool.getOrCreateTool(new CMLAtomSet(molecule)) .getAtomSetIncludingElementTypes(new String[]{"R"}); if (rAtomSet != null && rAtomSet.size() == 1) { CMLAtom rAtom = rAtomSet.getAtom(0); if (rAtom.getLigandAtoms().size() == 1) { BondTool rBondTool = BondTool.getOrCreateTool(rAtom.getLigandBonds().get(0)); morganString = rBondTool.getDownstreamMorganString(rAtom); } } return morganString; } public CMLAtomSet getUniqueAtomSet() { CMLAtomSet uniqueAtomSet = new CMLAtomSet(); List atomSetList = getAtomSetList(); for (CMLAtomSet atomSet : atomSetList) { if (atomSet.size() == 1) { uniqueAtomSet.addAtom(atomSet.getAtom(0)); } } return uniqueAtomSet; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy