org.xmlcml.cml.tools.AtomTree Maven / Gradle / Ivy
/**
* Copyright 2011 Peter Murray-Rust et. al.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.xmlcml.cml.tools;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import org.xmlcml.cml.base.AbstractTool;
import org.xmlcml.cml.base.CMLConstants;
import org.xmlcml.cml.element.CMLAtom;
import org.xmlcml.cml.element.CMLLabel;
import org.xmlcml.euclid.IntMatrix;
import org.xmlcml.molutil.ChemicalElement;
import org.xmlcml.molutil.ChemicalElement.AS;
/**
* An atom-centered tree.
*
* Each tree is composed of subtrees recursively.
*
* AtomTree starts at the atom and recursively adds new ligands, level by level.
* LigandManager are ordered lexically. At any level the leaves may be equal,
* but recursion to deeper levels may resolve this and so change the ordering of
* the atoms (though not of their lexical representations). bond orders are
* ignored (InChI-like).
The following are supported and can be used in
* any combination:
*
* - formal charges
* - atom labels
* - implicit hydrogens
* - explicit hydrogens
*
*
*
* thus for DMF - HC(O)N(C(H)(H)(H))(C(H)(H)(H))
* the carbon has the lexical representations
* level 0 C
* level 1 C(H)(O)(N)
* level 2 C(H)(O)(N(C)(C))
* level 3 C(H)(O)(N(C(H)(H)(H))(C(H)(H)(H))
*
*
* Typical usage:
*
*
* CMLAtom atom = ...
* AtomTree atomTree = new AtomTree(atom);
* atomTree.setUseCharges(true)
*
*
*
*
*/
public class AtomTree extends AbstractTool implements Comparable {
// the parent of each atomTree. null for top atom. prevents backrecursion
private CMLAtom parent;
private CMLAtom atom;
// child trees
private List atomTreeList = null;
private AtomTree[] atomTree;
private AtomMatchObject atomMatchObject;
/**
* the maximum level to explore atomTree labelling
*
*/
protected int maximumAtomTreeLevel;
/**
* create from given atom and parent.
*
* @param parent
* either ligand or atom or null; if not a ligand, will be set to
* null
* @param atom
* next node of recursive tree; if null no action
*/
public AtomTree(CMLAtom parent, CMLAtom atom) {
super();
atomMatchObject = new AtomMatchObject();
this.parent = parent;
this.atom = atom;
if (atom != null) {
atomTreeList = new ArrayList();
atomMatchObject.setUseCharge(false);
atomMatchObject.setUseImplicitHydrogens(false);
atomMatchObject.setExplicitHydrogens(false);
}
// molecule = atom.getMolecule();
}
/** not directly called. used when AtomTree is
* primarily a DTO
*/
private AtomTree() {
}
/**
* create from atom without parent.
*
* @param atom
* root node
*/
public AtomTree(CMLAtom atom) {
this(null, atom);
}
/** generate atomtree as DTO (i.e. not based on atom)
*
* @return
*/
public static AtomTree createDefaultAtomTree() {
return new AtomTree();
}
/**
* is charge to be included in string.
*
* @param ch
* true if charge included (default false)
*/
public void setUseCharge(final boolean ch) {
atomMatchObject.setUseCharge(ch);
}
/**
* is atom label to be included in string.
*
* @param lab
* true if label included (default false)
*/
public void setUseLabel(final boolean lab) {
atomMatchObject.setUseLabel(lab);
}
public int getAtomTreeLevel() {
return atomMatchObject.getAtomTreeLevel();
}
public void setAtomTreeLevel(int atomTreeLevel) {
atomMatchObject.setAtomTreeLevel(atomTreeLevel);
}
/**
* are implicit hydrogens to be included in string.
*
* @param hyd
* true if implict hydrogens used (default false)
*/
public void setUseImplicitHydrogens(final boolean hyd) {
atomMatchObject.setUseImplicitHydrogens(hyd);
}
/**
* are explicit hydrogens to be included in tree.
*
* @param hyd
* true if explict hydrogens used (default false)
*/
public void setUseExplicitHydrogens(final boolean hyd) {
atomMatchObject.setUseExplicitHydrogens(hyd);
}
/**
* add layers of atomTrees. //FIXME - has hydrogen logic hardcoded
*
* @param level
* number of levels (0 = bare atom)
*/
public void expandTo(final int level) {
atomMatchObject.setAtomTreeLevel(level);
if (atom != null && level > 0) {
List ligandList = atom.getLigandAtoms();
for (CMLAtom ligand : ligandList) {
if (ligand != this.parent
&& (atomMatchObject.isUseExplicitHydrogens() || !AS.H.equals(ligand.getElementType()))) {
AtomTree ligandTree = new AtomTree(this.atom, ligand);
ligandTree.setAtomMatchObject(this.atomMatchObject);
ligandTree.setUseCharge(atomMatchObject.isUseCharge());
// ligandTree.setUseLabel(label);
// ligandTree.setUseImplicitHydrogens(implicitHydrogens);
// ligandTree.setUseExplicitHydrogens(explicitHydrogens);
atomTreeList.add(ligandTree);
ligandTree.expandTo(level - 1);
}
}
atomTree = (AtomTree[]) atomTreeList.toArray(new AtomTree[0]);
Arrays.sort(atomTree);
}
}
private void setAtomMatchObject(AtomMatchObject atomMatchObject) {
this.atomMatchObject = atomMatchObject;
}
/**
* compares atomTrees by lexical representation.
*
* @param o
* the atomTree to compare
* @return this.toString().compareTo(o.toString())
* @throws ClassCastException
* o is not an AtomTree
*/
public int compareTo(AtomTree o) {
return this.toString().compareTo(o.toString());
}
/** finds maximum depth of atomTreeString
*
* @param s
* @return
*/
public static int getLevelOfAtomTreeString(String s) {
int maxDepth = -1;
if (s != null) {
maxDepth = 0;
int depth = 0;
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (c == CMLConstants.C_LBRAK) {
depth++;
if (maxDepth < depth) {
maxDepth = depth;
}
} else if (c == CMLConstants.C_RBRAK) {
depth--;
}
}
}
return maxDepth;
}
/** removes leaves and branches until tree is of
* specified depth. No effect is depth is already that
* size or smaller
* @param s
* @param level
* @return
*/
public static String trimToLevel(String s, int level) {
if (s == null) return null;
StringBuilder sb = new StringBuilder();
int depth = 0;
for (int i = 0; i < s.length(); i++) {
char c = s.charAt(i);
if (c == CMLConstants.C_LBRAK) {
if (depth++ <= level-1) {
sb.append(c);
}
} else if (c == CMLConstants.C_RBRAK) {
if (--depth <= level-1) {
sb.append(c);
}
} else if (depth <= level) {
sb.append(c);
}
}
return sb.toString();
}
public static ChemicalElement getRootElement(String atomTreeString) {
String s = AtomTree.trimToLevel(atomTreeString, 0);
return ChemicalElement.getChemicalElement(s);
}
/**
* string representation. sorted recursive levels, enclosed in (...) atoms
* have charges and hydrogens as set by flags
*
* @return string
*/
public String toString() {
StringBuffer s = new StringBuffer(S_EMPTY);
String elType = atom.getElementType();
if (atomMatchObject.isUseExplicitHydrogens() || !AS.H.equals(elType)) {
s.append(elType);
if (atomMatchObject.isUseLabel()) {
CMLLabel childLabel = (CMLLabel) atom.getFirstChildElement(
"label", CMLConstants.CML_NS);
if (childLabel != null) {
s.append(S_LCURLY);
s.append(childLabel.getValue());
s.append(S_RCURLY);
}
}
if (atom.getHydrogenCountAttribute() != null) {
int hCount = atom.getHydrogenCount();
if (atomMatchObject.isUseImplicitHydrogens() && hCount > 0) {
s.append(AS.H.value);
s.append(((hCount == 1) ? CMLConstants.S_EMPTY : CMLConstants.S_EMPTY + hCount));
}
}
if (atomMatchObject.isUseCharge() & atom.getFormalChargeAttribute() != null) {
int ch = atom.getFormalCharge();
int nch = (ch > 0) ? ch : -ch;
if (ch != 0) {
String chS = (ch > 0) ? CMLConstants.S_PLUS : CMLConstants.S_MINUS;
for (int i = 0; i < nch; i++) {
s.append(chS);
}
}
}
if (atomTree != null) {
for (int i = 0; i < atomTree.length; i++) {
s.append(S_LBRAK);
s.append(atomTree[i].toString());
s.append(S_RBRAK);
}
}
}
return s.toString();
}
public static IntMatrix createSimilarityMatrix(
List sortedAtomTreeStringi,
List sortedAtomTreeStringj) {
int maxLen = -1;
int rows = sortedAtomTreeStringi.size();
int cols = sortedAtomTreeStringj.size();
IntMatrix intMatrix = new IntMatrix(rows, cols);
for (int irow = 0; irow < rows; irow++) {
String si = sortedAtomTreeStringi.get(irow);
for (int jcol = 0; jcol < cols; jcol++) {
String sj = sortedAtomTreeStringj.get(jcol);
int compare = maximumCommonLevel(si, sj);
if (compare > maxLen) maxLen = compare;
intMatrix.setElementAt(irow, jcol, compare);
}
}
return intMatrix;
}
public static int maximumCommonLevel(String si, String sj) {
int leni = getLevelOfAtomTreeString(si);
int lenj = getLevelOfAtomTreeString(sj);
int len = Math.min(leni, lenj);
while (len >= 0) {
String sii = trimToLevel(si, len);
String sjj = trimToLevel(sj, len);
if (sii.equals(sjj)) {
return len;
}
len--;
}
return len;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy