org.xmlcml.cml.tools.InlineMolecule Maven / Gradle / Ivy
/**
* Copyright 2011 Peter Murray-Rust et. al.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.xmlcml.cml.tools;
import java.util.ArrayList;
import java.util.List;
import org.apache.log4j.Logger;
import org.xmlcml.cml.base.CMLConstants;
import org.xmlcml.cml.base.CMLUtil;
import org.xmlcml.cml.element.CMLAngle;
import org.xmlcml.cml.element.CMLAtom;
import org.xmlcml.cml.element.CMLBond;
import org.xmlcml.cml.element.CMLLength;
import org.xmlcml.cml.element.CMLMolecule;
import org.xmlcml.cml.element.CMLTorsion;
import org.xmlcml.cml.element.CMLZMatrix;
import org.xmlcml.euclid.Util;
import org.xmlcml.molutil.ChemicalElement;
/** create molecule from inline representation of the atoms and bonds.
*
* simple grammar to represent atoms, connecting bonds and qualifiers
* for both either. Allows branches but not rings.
* Syntax:
*
* (atomBond) * atom
*
* where (not yet finished):
*
* atom := [A-Z][a-z]? // must be valid PT element
* atomQualifier := '[' id? chirality? ']'// id must be unique
* bond := ['-' | '=' | '#']? //
* bondQualifier := '[' len? tor? ']'// must be unique
* branch := '(' branch* tree? ')'
* atomBond = (atom atomQualifier? branch* bond bondQualifier?)* atom
*
* @author pm286
*
*/
public class InlineMolecule implements CMLConstants {
private static Logger LOG = Logger.getLogger(InlineMolecule.class);
/** error messages.*/
public enum Error {
/** bad bond*/
BAD_BOND("Bad bond:"),
/** bad state*/
BAD_STATE("Bad state:"),
/** bad qualifier*/
BAD_QUALIFIER("Bad qualifier:"),
/** empty qualifier*/
EMPTY_QUALIFIER("Empty qualifier:"),
/** bad symbol*/
BAD_SYMBOL("Bad atom symbol:"),
;
String s;
private Error(String s) {
this.s = s;
}
}
/** states of parse.*/
public enum State {
/** start*/
START,
/** finished atom.*/
ATOM,
/** finished bond.*/
BOND,
/** started qualifier.*/
QUALIFIER,
/** started branch.*/
BRANCH
}
CMLMolecule cmlMolecule;
String formula;
State state;
InlineAtom rootAtom;
int serial = 0;
/** create from string.
*
* @param formula
*/
public InlineMolecule(String formula) {
this.formula = formula;
rootAtom = null;
createFromString(formula);
makeMolecule();
}
/** create new molecule.
*
* @param formula
*/
public void createFromString(String formula) {
cmlMolecule = new CMLMolecule();
state = State.START;
int i = 0;
serial = 0;
InlineAtom currentAtom = null;
InlineBond currentBond = null;
while (i < formula.length()) {
if (state == State.START || state == State.BOND) {
InlineAtom inlineAtom = InlineAtom.grab(formula.substring(i), this);
inlineAtom.cmlAtom.setId("a"+(++serial));
cmlMolecule.addAtom(inlineAtom.cmlAtom);
if (inlineAtom == null) {
throw new RuntimeException("NULL atom");
}
if (state == State.START) {
rootAtom = inlineAtom;
}
if (currentBond != null) {
inlineAtom.addBond(currentAtom, currentBond);
currentBond = null;
}
currentAtom = inlineAtom;
i += inlineAtom.getLength();
state = State.ATOM;
// boolean checkDuplicates = true;
} else if (state == State.ATOM) {
if (formula.substring(i).charAt(0) == InlineBranch.START) {
InlineBranch branch = InlineBranch.grab(formula.substring(i), currentAtom, this, serial);
i += branch.getLength(formula.substring(i));
serial = branch.serial;
} else {
currentBond = InlineBond.grab(formula.substring(i));
if (currentBond == null) {
throw new RuntimeException("NULL bond");
}
i += currentBond.getLength();
state = State.BOND;
}
} else {
throw new RuntimeException(InlineMolecule.Error.BAD_STATE+S_COLON+state+S_COLON);
}
}
}
void makeMolecule() {
try {
CMLZMatrix zMatrix = new CMLZMatrix(cmlMolecule);
zMatrix.addCartesiansTo(cmlMolecule);
} catch (RuntimeException e) {
LOG.debug("WARN of ZMAT "+e);
}
}
/** get the created molecule.
*
* @return molecule (null if none)
*/
public CMLMolecule getCmlMolecule() {
return cmlMolecule;
}
/** debug.
*/
public void debug() {
cmlMolecule.debug("INLINE");
rootAtom.debug();
}
}
class InlineAtom implements CMLConstants {
private static Logger LOG = Logger.getLogger(InlineAtom.class);
private InlineAtom greatGrandParent = null;
private InlineAtom grandParent = null;
private InlineAtom parent = null;
List childAtoms = null;
List childBonds = null;
CMLAtom cmlAtom;
InlineMolecule molecule;
/** components of qualifier.*/
public enum Qual {
/** chirality */
CHIRALITY("c"),
/** id */
ID("id"),
;
String value;
private Qual(String v) {
value = v;
}
}
ChemicalElement chemicalElement;
Qualifier qual;
double chirality = Double.NaN;
String id;
/** constructor.
* creates new CMLAtom as member.
* @param molecule
*/
public InlineAtom(InlineMolecule molecule) {
cmlAtom = new CMLAtom();
childAtoms = new ArrayList();
childBonds = new ArrayList();
this.molecule = molecule;
}
void addBond(InlineAtom atom, InlineBond bond) {
if (atom != null) {
this.parent = atom;
atom.childAtoms.add(this);
atom.childBonds.add(bond);
CMLBond cmlBond = new CMLBond(parent.cmlAtom, this.cmlAtom);
molecule.cmlMolecule.addBond(cmlBond);
String parentId = parent.cmlAtom.getId();
String atomId = cmlAtom.getId();
CMLLength length = new CMLLength();
length.setAtomRefs2(new String[]{parentId, atomId});
molecule.cmlMolecule.appendChild(length);
length.setXMLContent(bond.length);
grandParent = (parent == null) ? null : parent.parent;
String grandParentId = (grandParent == null) ? null :
grandParent.cmlAtom.getId();
if (grandParent != null) {
CMLAngle angle = new CMLAngle();
angle.setAtomRefs3(new String[]{grandParentId, parentId, atomId});
molecule.cmlMolecule.appendChild(angle);
angle.setXMLContent(bond.angle);
}
greatGrandParent = (grandParent == null) ? null : grandParent.parent;
String greatGrandParentId = (greatGrandParent == null) ? null :
greatGrandParent.cmlAtom.getId();
if (greatGrandParent != null) {
CMLTorsion torsion = new CMLTorsion();
torsion.setAtomRefs4(new String[]{
greatGrandParentId, grandParentId, parentId, atomId});
molecule.cmlMolecule.appendChild(torsion);
torsion.setXMLContent(bond.torsion);
}
}
}
/** process token and return Atom;
*
* @param s string to process
* @return Atom (null if end of string)
*/
static InlineAtom grab(String s, InlineMolecule molecule) {
InlineAtom inlineAtom = new InlineAtom(molecule);
inlineAtom.chemicalElement = ChemicalElement.grabChemicalElement(s);
if (inlineAtom.chemicalElement == null) {
throw new RuntimeException(InlineMolecule.Error.BAD_SYMBOL+CMLUtil.S_COLON+
s+CMLUtil.S_COLON);
}
inlineAtom.cmlAtom.setElementType(inlineAtom.chemicalElement.getSymbol());
int ll = inlineAtom.chemicalElement.getSymbol().length();
inlineAtom.qual = new Qualifier(s.substring(ll));
if (inlineAtom.qual != null) {
inlineAtom.process();
}
// LOG.debug("ATOM "+inlineAtom);
return inlineAtom;
}
void process() {
int i = 0;
String qq = qual.q;
while (i < qq.length()) {
String qqq = qq.substring(i);
if (qqq.startsWith(Qual.CHIRALITY.value+CMLUtil.S_LBRAK)) {
int is = (Qual.CHIRALITY.value+CMLUtil.S_LBRAK).length();
int idx = qqq.indexOf(CMLUtil.S_RBRAK);
if (idx == -1) {
throw new RuntimeException("Bad arg for chirality:"+qqq);
}
try {
chirality = new Double(qqq.substring(is, idx)).doubleValue();
} catch (NumberFormatException nfe) {
throw new RuntimeException("Bad value for chirality: "+qqq);
}
i += idx+1;
} else if (qqq.startsWith(Qual.ID.value+CMLUtil.S_LBRAK)) {
int is = (Qual.ID.value+CMLUtil.S_LBRAK).length();
int idx = qqq.indexOf(CMLUtil.S_RBRAK);
if (idx == -1) {
throw new RuntimeException("Bad arg for id:"+qqq);
}
id = qqq.substring(is, idx);
i += idx+1;
} else {
throw new RuntimeException("bad qual: "+qqq+S_SLASH+i);
}
}
LOG.debug("AQ:"+this.fullString());
}
@SuppressWarnings("unused")
void processAtoms() {
for (InlineAtom childAtom : childAtoms) {
;//
}
}
int getLength() {
int i = 0;
if (chemicalElement != null) {
i = chemicalElement.getSymbol().length();
}
i += qual.getLength();
return i;
}
/** get full string.
* @return the string with fuller interpretation
*/
public String fullString() {
return chemicalElement.getSymbol()+
"{chirality="+chirality+",id="+id+S_RCURLY;
}
/** get string.
* atom symbol followed by elementNumber
* @return lexical string followed by interpretation
*/
public String toString() {
String ss = chemicalElement.getSymbol();
ss += CMLUtil.S_LBRAK+chemicalElement.getAtomicNumber()+CMLUtil.S_RBRAK;
return ss;
}
void debug() {
LOG.info("ATOM: "+chemicalElement.getSymbol());
for (int i = 0; i < childAtoms.size(); i++) {
if (i > 0) {
LOG.info(S_LBRAK);
}
childBonds.get(i).debug();
childAtoms.get(i).debug();
if (i > 0) {
LOG.info(S_RBRAK);
}
}
}
}
class InlineBond implements CMLConstants {
private static Logger LOG = Logger.getLogger(InlineBond.class);
/** components of qualifier.*/
public enum Qual {
/** bond length */
LENGTH("l"),
/** bond angle */
ANGLE("a"),
/** torsion */
TORSION("t"),
;
String value;
private Qual(String v) {
value = v;
}
}
String s;
CMLBond bond;
String order;
Qualifier qual;
double length = Double.NaN;
double angle = Double.NaN;
double torsion = Double.NaN;
/** constructor.
*/
public InlineBond() {
}
static InlineBond grab(String s) {
s = s.trim();
InlineBond bond = null;
if (s.length() > 0) {
bond = new InlineBond();
bond.order = getOrder(s.charAt(0));
if (bond.order == null) {
throw new RuntimeException(InlineMolecule.Error.BAD_BOND+s+CMLUtil.S_COLON);
}
// qualifier
if (s.length() > 1 && Qualifier.START == s.charAt(1)) {
bond.qual = new Qualifier(s.substring(1));
bond.process();
} else {
bond.qual = null;
}
bond.s = s.substring(0,
((bond.qual == null) ? 1 : 1 + bond.qual.getLength()));
}
// LOG.debug("BOND "+bond);
return bond;
}
static String getOrder(char c) {
String order = null;
if (c == CMLUtil.C_MINUS) {
order = CMLBond.SINGLE_S;
} else if (c == CMLUtil.C_EQUALS) {
order = CMLBond.DOUBLE_D;
} else if (c == CMLUtil.C_HASH) {
order = CMLBond.TRIPLE_T;
}
return order;
}
void process() {
int i = 0;
String qq = qual.q;
while (i < qq.length()) {
String qqq = qq.substring(i);
if (qqq.startsWith(Qual.LENGTH.value+CMLUtil.S_LBRAK)) {
int is = (Qual.LENGTH.value+CMLUtil.S_LBRAK).length();
int idx = qqq.indexOf(CMLUtil.S_RBRAK);
if (idx == -1) {
throw new RuntimeException("Bad arg for length:"+qqq);
}
try {
length = new Double(qqq.substring(is, idx)).doubleValue();
} catch (NumberFormatException nfe) {
throw new RuntimeException("Bad value for length: "+qqq);
}
i += idx+1;
} else if (qqq.startsWith(Qual.ANGLE.value+CMLUtil.S_LBRAK)) {
int is = (Qual.ANGLE.value+CMLUtil.S_LBRAK).length();
int idx = qqq.indexOf(CMLUtil.S_RBRAK);
if (idx == -1) {
throw new RuntimeException("Bad arg for angle:"+qqq);
}
try {
angle = new Double(qqq.substring(is, idx)).doubleValue();
} catch (NumberFormatException nfe) {
throw new RuntimeException("Bad value for angle: "+qqq);
}
i += idx+1;
} else if (qqq.startsWith(Qual.TORSION.value+CMLUtil.S_LBRAK)) {
int is = (Qual.TORSION.value+CMLUtil.S_LBRAK).length();
int idx = qqq.indexOf(CMLUtil.S_RBRAK);
if (idx == -1) {
throw new RuntimeException("Bad arg for torsion:"+qqq);
}
try {
torsion = new Double(qqq.substring(is, idx)).doubleValue();
} catch (NumberFormatException nfe) {
throw new RuntimeException("Bad value for torsion: "+qqq);
}
i += idx+1;
} else if (qqq.startsWith(S_COMMA)) {
i += 1;
} else {
throw new RuntimeException("bad qual: "+qqq+S_SLASH+i);
}
}
LOG.debug("BQ"+this.fullString());
}
void createBond(String s) {
if (s.length() != 1) {
throw new RuntimeException("Bond must only be single character: "+s);
}
bond = new CMLBond();
bond.setOrder(InlineBond.getOrder(s.charAt(0)));
}
int getLength() {
return 1 + ((qual == null) ? 0 : qual.getLength());
}
/** get full string.
* @return the string with fuller interpretation
*/
public String fullString() {
return s+" {order="+order+",length="+length+",torsion="+torsion+S_RCURLY;
}
void debug() {
LOG.info("BOND: "+order+S_SLASH+length+S_SLASH+angle+S_SLASH+torsion);
}
/** get string.
* @return exact lexical
*/
public String toString() {
return s;
}
}
class Qualifier {
/** qualifier string without [ and ]
*/
String q;
/** start of qualifier */
public static char START = CMLUtil.C_LSQUARE;
/** end of qualifier */
public static char END = CMLUtil.C_RSQUARE;
/** create qualifier.
* @param s must be of form [...]
*/
public Qualifier(String s) {
q = CMLUtil.S_EMPTY;
if (s != null && s.length() > 0 && s.charAt(0) == START) {
int idx = (s.indexOf(END));
if (idx == -1) {
throw new RuntimeException(InlineMolecule.Error.BAD_QUALIFIER+s+CMLUtil.S_COLON);
}
if (idx == 1) {
throw new RuntimeException(InlineMolecule.Error.EMPTY_QUALIFIER+s+CMLUtil.S_COLON);
}
q = s.substring(1, idx);
}
}
/** gets length including delimiters.
* @return length (-1 if not a qualifier)
*/
int getLength() {
return (CMLUtil.S_EMPTY.equals(q)) ? 0 : q.length()+2;
}
/** get string.
* @return empty or qualifier surrounded by [...]
*/
public String toString() {
return CMLUtil.S_EMPTY.equals(q) ? CMLUtil.S_EMPTY :
new StringBuilder(10).append(START).append(q).append(END).toString();
}
}
class InlineBranch {
private static Logger LOG = Logger.getLogger(InlineBranch.class);
String b = null;
InlineMolecule molecule;
int serial;
static char START = CMLUtil.C_LBRAK;
static char END = CMLUtil.C_RBRAK;
/** constructor.
* @param s string to parse
* @param molecule
* @param serial
*/
public InlineBranch(String s, InlineMolecule molecule, int serial) {
this.molecule = molecule;
this.serial = serial;
}
static InlineBranch grab(String s, InlineAtom currentAtom, InlineMolecule molecule, int serial) {
InlineMolecule.State branchState;
branchState = InlineMolecule.State.START;
InlineBond branchBond = null;
int j = 1;
int idx = Util.indexOfBalancedBracket(START,s); //originally there
InlineBranch branch = new InlineBranch(s.substring(1, idx), molecule, serial);
while ( j < idx ){
if (branchState == InlineMolecule.State.START || branchState == InlineMolecule.State.ATOM){
if (s.substring(j).charAt(0) == InlineBranch.START) {
InlineBranch branchbranch = grab(s.substring(j), currentAtom, molecule, serial);
j += branchbranch.getLength(s.substring(j));
serial = branchbranch.serial;
} else {
branchBond = InlineBond.grab(s.substring(j));
if (branchBond == null){
throw new RuntimeException("NULL bond");
}
j += branchBond.getLength();
branchState = InlineMolecule.State.BOND;
}
}else if ( branchState == InlineMolecule.State.BOND ){
InlineAtom inlineAtom = InlineAtom.grab(s.substring(j), molecule);
inlineAtom.cmlAtom.setId("a"+(++serial));
molecule.cmlMolecule.addAtom(inlineAtom.cmlAtom);
if (inlineAtom == null) {
throw new RuntimeException("NULL atom");
}
if (branchBond != null) {
LOG.debug("III..."+inlineAtom);
inlineAtom.addBond(currentAtom, branchBond);
branchBond = null;
}
currentAtom = inlineAtom;
j += inlineAtom.getLength();
branchState = InlineMolecule.State.ATOM;
}
}
branch.serial = serial;
return branch;
}
/** get length of string to next balanced )
*
* @param s the string
* @return length of string to next bracket including () else 0 if not found
*/
int getLength(String s) {
int i = 1 + Util.indexOfBalancedBracket(START,s);
return i;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy