Maven / Gradle / Ivy
* Copyright 2011 Peter Murray-Rust et. al.
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* See the License for the specific language governing permissions and
* limitations under the License.
import java.util.ArrayList;
import java.util.List;
import org.apache.log4j.Logger;
import org.xmlcml.cml.base.CMLConstants;
import org.xmlcml.cml.base.CMLUtil;
import org.xmlcml.cml.element.CMLAngle;
import org.xmlcml.cml.element.CMLAtom;
import org.xmlcml.cml.element.CMLBond;
import org.xmlcml.cml.element.CMLLength;
import org.xmlcml.cml.element.CMLMolecule;
import org.xmlcml.cml.element.CMLTorsion;
import org.xmlcml.cml.element.CMLZMatrix;
import org.xmlcml.euclid.Util;
import org.xmlcml.molutil.ChemicalElement;
/** create molecule from inline representation of the atoms and bonds.
* simple grammar to represent atoms, connecting bonds and qualifiers
* for both either. Allows branches but not rings.
* Syntax:
* (atomBond) * atom
* where (not yet finished):
* atom := [A-Z][a-z]? // must be valid PT element
* atomQualifier := '[' id? chirality? ']'// id must be unique
* bond := ['-' | '=' | '#']? //
* bondQualifier := '[' len? tor? ']'// must be unique
* branch := '(' branch* tree? ')'
* atomBond = (atom atomQualifier? branch* bond bondQualifier?)* atom
* @author pm286
public class InlineMolecule implements CMLConstants {
private static Logger LOG = Logger.getLogger(InlineMolecule.class);
/** error messages.*/
public enum Error {
/** bad bond*/
BAD_BOND("Bad bond:"),
/** bad state*/
BAD_STATE("Bad state:"),
/** bad qualifier*/
BAD_QUALIFIER("Bad qualifier:"),
/** empty qualifier*/
EMPTY_QUALIFIER("Empty qualifier:"),
/** bad symbol*/
BAD_SYMBOL("Bad atom symbol:"),
String s;
private Error(String s) {
this.s = s;
/** states of parse.*/
public enum State {
/** start*/
/** finished atom.*/
/** finished bond.*/
/** started qualifier.*/
/** started branch.*/
CMLMolecule cmlMolecule;
String formula;
State state;
InlineAtom rootAtom;
int serial = 0;
/** create from string.
* @param formula
public InlineMolecule(String formula) {
this.formula = formula;
rootAtom = null;
/** create new molecule.
* @param formula
public void createFromString(String formula) {
cmlMolecule = new CMLMolecule();
state = State.START;
int i = 0;
serial = 0;
InlineAtom currentAtom = null;
InlineBond currentBond = null;
while (i < formula.length()) {
if (state == State.START || state == State.BOND) {
InlineAtom inlineAtom = InlineAtom.grab(formula.substring(i), this);
if (inlineAtom == null) {
throw new RuntimeException("NULL atom");
if (state == State.START) {
rootAtom = inlineAtom;
if (currentBond != null) {
inlineAtom.addBond(currentAtom, currentBond);
currentBond = null;
currentAtom = inlineAtom;
i += inlineAtom.getLength();
state = State.ATOM;
// boolean checkDuplicates = true;
} else if (state == State.ATOM) {
if (formula.substring(i).charAt(0) == InlineBranch.START) {
InlineBranch branch = InlineBranch.grab(formula.substring(i), currentAtom, this, serial);
i += branch.getLength(formula.substring(i));
serial = branch.serial;
} else {
currentBond = InlineBond.grab(formula.substring(i));
if (currentBond == null) {
throw new RuntimeException("NULL bond");
i += currentBond.getLength();
state = State.BOND;
} else {
throw new RuntimeException(InlineMolecule.Error.BAD_STATE+S_COLON+state+S_COLON);
void makeMolecule() {
try {
CMLZMatrix zMatrix = new CMLZMatrix(cmlMolecule);
} catch (RuntimeException e) {
LOG.debug("WARN of ZMAT "+e);
/** get the created molecule.
* @return molecule (null if none)
public CMLMolecule getCmlMolecule() {
return cmlMolecule;
/** debug.
public void debug() {
class InlineAtom implements CMLConstants {
private static Logger LOG = Logger.getLogger(InlineAtom.class);
private InlineAtom greatGrandParent = null;
private InlineAtom grandParent = null;
private InlineAtom parent = null;
List childAtoms = null;
List childBonds = null;
CMLAtom cmlAtom;
InlineMolecule molecule;
/** components of qualifier.*/
public enum Qual {
/** chirality */
/** id */
String value;
private Qual(String v) {
value = v;
ChemicalElement chemicalElement;
Qualifier qual;
double chirality = Double.NaN;
String id;
/** constructor.
* creates new CMLAtom as member.
* @param molecule
public InlineAtom(InlineMolecule molecule) {
cmlAtom = new CMLAtom();
childAtoms = new ArrayList();
childBonds = new ArrayList();
this.molecule = molecule;
void addBond(InlineAtom atom, InlineBond bond) {
if (atom != null) {
this.parent = atom;
CMLBond cmlBond = new CMLBond(parent.cmlAtom, this.cmlAtom);
String parentId = parent.cmlAtom.getId();
String atomId = cmlAtom.getId();
CMLLength length = new CMLLength();
length.setAtomRefs2(new String[]{parentId, atomId});
grandParent = (parent == null) ? null : parent.parent;
String grandParentId = (grandParent == null) ? null :
if (grandParent != null) {
CMLAngle angle = new CMLAngle();
angle.setAtomRefs3(new String[]{grandParentId, parentId, atomId});
greatGrandParent = (grandParent == null) ? null : grandParent.parent;
String greatGrandParentId = (greatGrandParent == null) ? null :
if (greatGrandParent != null) {
CMLTorsion torsion = new CMLTorsion();
torsion.setAtomRefs4(new String[]{
greatGrandParentId, grandParentId, parentId, atomId});
/** process token and return Atom;
* @param s string to process
* @return Atom (null if end of string)
static InlineAtom grab(String s, InlineMolecule molecule) {
InlineAtom inlineAtom = new InlineAtom(molecule);
inlineAtom.chemicalElement = ChemicalElement.grabChemicalElement(s);
if (inlineAtom.chemicalElement == null) {
throw new RuntimeException(InlineMolecule.Error.BAD_SYMBOL+CMLUtil.S_COLON+
int ll = inlineAtom.chemicalElement.getSymbol().length();
inlineAtom.qual = new Qualifier(s.substring(ll));
if (inlineAtom.qual != null) {
// LOG.debug("ATOM "+inlineAtom);
return inlineAtom;
void process() {
int i = 0;
String qq = qual.q;
while (i < qq.length()) {
String qqq = qq.substring(i);
if (qqq.startsWith(Qual.CHIRALITY.value+CMLUtil.S_LBRAK)) {
int is = (Qual.CHIRALITY.value+CMLUtil.S_LBRAK).length();
int idx = qqq.indexOf(CMLUtil.S_RBRAK);
if (idx == -1) {
throw new RuntimeException("Bad arg for chirality:"+qqq);
try {
chirality = new Double(qqq.substring(is, idx)).doubleValue();
} catch (NumberFormatException nfe) {
throw new RuntimeException("Bad value for chirality: "+qqq);
i += idx+1;
} else if (qqq.startsWith(Qual.ID.value+CMLUtil.S_LBRAK)) {
int is = (Qual.ID.value+CMLUtil.S_LBRAK).length();
int idx = qqq.indexOf(CMLUtil.S_RBRAK);
if (idx == -1) {
throw new RuntimeException("Bad arg for id:"+qqq);
id = qqq.substring(is, idx);
i += idx+1;
} else {
throw new RuntimeException("bad qual: "+qqq+S_SLASH+i);
void processAtoms() {
for (InlineAtom childAtom : childAtoms) {
int getLength() {
int i = 0;
if (chemicalElement != null) {
i = chemicalElement.getSymbol().length();
i += qual.getLength();
return i;
/** get full string.
* @return the string with fuller interpretation
public String fullString() {
return chemicalElement.getSymbol()+
/** get string.
* atom symbol followed by elementNumber
* @return lexical string followed by interpretation
public String toString() {
String ss = chemicalElement.getSymbol();
ss += CMLUtil.S_LBRAK+chemicalElement.getAtomicNumber()+CMLUtil.S_RBRAK;
return ss;
void debug() {"ATOM: "+chemicalElement.getSymbol());
for (int i = 0; i < childAtoms.size(); i++) {
if (i > 0) {;
if (i > 0) {;
class InlineBond implements CMLConstants {
private static Logger LOG = Logger.getLogger(InlineBond.class);
/** components of qualifier.*/
public enum Qual {
/** bond length */
/** bond angle */
/** torsion */
String value;
private Qual(String v) {
value = v;
String s;
CMLBond bond;
String order;
Qualifier qual;
double length = Double.NaN;
double angle = Double.NaN;
double torsion = Double.NaN;
/** constructor.
public InlineBond() {
static InlineBond grab(String s) {
s = s.trim();
InlineBond bond = null;
if (s.length() > 0) {
bond = new InlineBond();
bond.order = getOrder(s.charAt(0));
if (bond.order == null) {
throw new RuntimeException(InlineMolecule.Error.BAD_BOND+s+CMLUtil.S_COLON);
// qualifier
if (s.length() > 1 && Qualifier.START == s.charAt(1)) {
bond.qual = new Qualifier(s.substring(1));
} else {
bond.qual = null;
bond.s = s.substring(0,
((bond.qual == null) ? 1 : 1 + bond.qual.getLength()));
// LOG.debug("BOND "+bond);
return bond;
static String getOrder(char c) {
String order = null;
if (c == CMLUtil.C_MINUS) {
order = CMLBond.SINGLE_S;
} else if (c == CMLUtil.C_EQUALS) {
order = CMLBond.DOUBLE_D;
} else if (c == CMLUtil.C_HASH) {
order = CMLBond.TRIPLE_T;
return order;
void process() {
int i = 0;
String qq = qual.q;
while (i < qq.length()) {
String qqq = qq.substring(i);
if (qqq.startsWith(Qual.LENGTH.value+CMLUtil.S_LBRAK)) {
int is = (Qual.LENGTH.value+CMLUtil.S_LBRAK).length();
int idx = qqq.indexOf(CMLUtil.S_RBRAK);
if (idx == -1) {
throw new RuntimeException("Bad arg for length:"+qqq);
try {
length = new Double(qqq.substring(is, idx)).doubleValue();
} catch (NumberFormatException nfe) {
throw new RuntimeException("Bad value for length: "+qqq);
i += idx+1;
} else if (qqq.startsWith(Qual.ANGLE.value+CMLUtil.S_LBRAK)) {
int is = (Qual.ANGLE.value+CMLUtil.S_LBRAK).length();
int idx = qqq.indexOf(CMLUtil.S_RBRAK);
if (idx == -1) {
throw new RuntimeException("Bad arg for angle:"+qqq);
try {
angle = new Double(qqq.substring(is, idx)).doubleValue();
} catch (NumberFormatException nfe) {
throw new RuntimeException("Bad value for angle: "+qqq);
i += idx+1;
} else if (qqq.startsWith(Qual.TORSION.value+CMLUtil.S_LBRAK)) {
int is = (Qual.TORSION.value+CMLUtil.S_LBRAK).length();
int idx = qqq.indexOf(CMLUtil.S_RBRAK);
if (idx == -1) {
throw new RuntimeException("Bad arg for torsion:"+qqq);
try {
torsion = new Double(qqq.substring(is, idx)).doubleValue();
} catch (NumberFormatException nfe) {
throw new RuntimeException("Bad value for torsion: "+qqq);
i += idx+1;
} else if (qqq.startsWith(S_COMMA)) {
i += 1;
} else {
throw new RuntimeException("bad qual: "+qqq+S_SLASH+i);
void createBond(String s) {
if (s.length() != 1) {
throw new RuntimeException("Bond must only be single character: "+s);
bond = new CMLBond();
int getLength() {
return 1 + ((qual == null) ? 0 : qual.getLength());
/** get full string.
* @return the string with fuller interpretation
public String fullString() {
return s+" {order="+order+",length="+length+",torsion="+torsion+S_RCURLY;
void debug() {"BOND: "+order+S_SLASH+length+S_SLASH+angle+S_SLASH+torsion);
/** get string.
* @return exact lexical
public String toString() {
return s;
class Qualifier {
/** qualifier string without [ and ]
String q;
/** start of qualifier */
public static char START = CMLUtil.C_LSQUARE;
/** end of qualifier */
public static char END = CMLUtil.C_RSQUARE;
/** create qualifier.
* @param s must be of form [...]
public Qualifier(String s) {
q = CMLUtil.S_EMPTY;
if (s != null && s.length() > 0 && s.charAt(0) == START) {
int idx = (s.indexOf(END));
if (idx == -1) {
throw new RuntimeException(InlineMolecule.Error.BAD_QUALIFIER+s+CMLUtil.S_COLON);
if (idx == 1) {
throw new RuntimeException(InlineMolecule.Error.EMPTY_QUALIFIER+s+CMLUtil.S_COLON);
q = s.substring(1, idx);
/** gets length including delimiters.
* @return length (-1 if not a qualifier)
int getLength() {
return (CMLUtil.S_EMPTY.equals(q)) ? 0 : q.length()+2;
/** get string.
* @return empty or qualifier surrounded by [...]
public String toString() {
return CMLUtil.S_EMPTY.equals(q) ? CMLUtil.S_EMPTY :
new StringBuilder(10).append(START).append(q).append(END).toString();
class InlineBranch {
private static Logger LOG = Logger.getLogger(InlineBranch.class);
String b = null;
InlineMolecule molecule;
int serial;
static char START = CMLUtil.C_LBRAK;
static char END = CMLUtil.C_RBRAK;
/** constructor.
* @param s string to parse
* @param molecule
* @param serial
public InlineBranch(String s, InlineMolecule molecule, int serial) {
this.molecule = molecule;
this.serial = serial;
static InlineBranch grab(String s, InlineAtom currentAtom, InlineMolecule molecule, int serial) {
InlineMolecule.State branchState;
branchState = InlineMolecule.State.START;
InlineBond branchBond = null;
int j = 1;
int idx = Util.indexOfBalancedBracket(START,s); //originally there
InlineBranch branch = new InlineBranch(s.substring(1, idx), molecule, serial);
while ( j < idx ){
if (branchState == InlineMolecule.State.START || branchState == InlineMolecule.State.ATOM){
if (s.substring(j).charAt(0) == InlineBranch.START) {
InlineBranch branchbranch = grab(s.substring(j), currentAtom, molecule, serial);
j += branchbranch.getLength(s.substring(j));
serial = branchbranch.serial;
} else {
branchBond = InlineBond.grab(s.substring(j));
if (branchBond == null){
throw new RuntimeException("NULL bond");
j += branchBond.getLength();
branchState = InlineMolecule.State.BOND;
}else if ( branchState == InlineMolecule.State.BOND ){
InlineAtom inlineAtom = InlineAtom.grab(s.substring(j), molecule);
if (inlineAtom == null) {
throw new RuntimeException("NULL atom");
if (branchBond != null) {
inlineAtom.addBond(currentAtom, branchBond);
branchBond = null;
currentAtom = inlineAtom;
j += inlineAtom.getLength();
branchState = InlineMolecule.State.ATOM;
branch.serial = serial;
return branch;
/** get length of string to next balanced )
* @param s the string
* @return length of string to next bracket including () else 0 if not found
int getLength(String s) {
int i = 1 + Util.indexOfBalancedBracket(START,s);
return i;
© 2015 - 2025 Weber Informatics LLC | Privacy Policy