com.hfg.chem.MatterImpl Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.chem;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import com.hfg.util.CharUtil;
import com.hfg.util.CompareUtil;
import com.hfg.util.StringUtil;
import com.hfg.util.collection.CollectionUtil;
//------------------------------------------------------------------------------
/**
Elemental composition and mass tracking object for any organic or inorganic molecule.
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class MatterImpl implements Matter, Cloneable, Comparable
{
//##########################################################################
// PRIVATE FIELDS
//##########################################################################
private Map mElementalComposition;
private Double mMonoisotopicMass;
private Double mAverageMass;
private boolean mMonoisotopicMassIsUserSet;
private boolean mAverageMassIsUserSet;
private Integer mHashCode;
private static Set sSaltIndicatorsInChemicalFormulas = new HashSet<>(4);
static
{
sSaltIndicatorsInChemicalFormulas.add('.');
sSaltIndicatorsInChemicalFormulas.add('·');
sSaltIndicatorsInChemicalFormulas.add('•');
sSaltIndicatorsInChemicalFormulas.add('*');
}
//##########################################################################
// CONSTRUCTORS
//##########################################################################
//--------------------------------------------------------------------------
public MatterImpl()
{
}
//--------------------------------------------------------------------------
public MatterImpl(Map inMap)
{
setElementalComposition(inMap);
}
//--------------------------------------------------------------------------
public MatterImpl(Matter inInitialValue)
{
if (inInitialValue != null)
{
setMonoisotopicMass(inInitialValue.getMonoisotopicMass());
setAverageMass(inInitialValue.getAverageMass());
setElementalComposition(inInitialValue.getElementalComposition());
}
}
//##########################################################################
// PUBLIC METHODS
//##########################################################################
//--------------------------------------------------------------------------
// TODO: Add isotope support
public static Matter fromChemicalFormula(String inChemicalFormula)
{
MatterImpl matter = null;
Stack enclosingCharStack = new Stack<>();
if (StringUtil.isSet(inChemicalFormula))
{
matter = new MatterImpl();
MatterImpl.FormulaSubBlock currentSubBlock = matter.new FormulaSubBlock();
MatterImpl.FormulaSubBlock topBlock = currentSubBlock;
Stack blockStack = new Stack<>();
blockStack.push(topBlock);
String chemicaFormulaString = inChemicalFormula.trim();
for (int i = 0; i < chemicaFormulaString.length(); i++)
{
char currentChar = chemicaFormulaString.charAt(i);
if ('(' == currentChar
|| '[' == currentChar)
{
enclosingCharStack.push(currentChar);
MatterImpl.FormulaSubBlock subBlock = matter.new FormulaSubBlock().setBracketType(currentChar);
currentSubBlock.addSubBlock(subBlock);
blockStack.push(subBlock);
currentSubBlock = subBlock;
}
else if (')' == currentChar
|| ']' == currentChar)
{
// Ending enclosure w/o a starting enclosure?
if (0 == enclosingCharStack.size())
{
throw new ChemicalFormulaParseException("The chemical formula " + StringUtil.singleQuote(chemicaFormulaString) + " has an unbalanced '" + currentChar + "' near position " + (i + 1) + "!");
}
char openingChar = enclosingCharStack.pop();
// Mismatched enclosure characters?
if (('(' == openingChar && ')' != currentChar)
|| ('[' == openingChar && ']' != currentChar))
{
throw new ChemicalFormulaParseException("The chemical formula " + StringUtil.singleQuote(chemicaFormulaString) + " has an unbalanced '" + currentChar + "' near position " + (i + 1) + "!");
}
currentSubBlock.close();
i++;
String countString = "";
Character convertedSubscriptChar = null;
while (i < chemicaFormulaString.length())
{
char countChar = chemicaFormulaString.charAt(i);
if (Character.isDigit(countChar)
|| (convertedSubscriptChar = convertSubscriptChar(countChar)) != null)
{
countString += (convertedSubscriptChar != null ? convertedSubscriptChar : countChar);
i++;
}
else
{
break;
}
}
if (StringUtil.isSet(countString))
{
currentSubBlock.setCount(Integer.parseInt(countString));
}
if (i < chemicaFormulaString.length())
{
i--;
}
blockStack.pop();
currentSubBlock = blockStack.peek();
}
else
{
currentSubBlock.append(currentChar);
}
}
matter.setElementalComposition(topBlock.getChemicalComposition());
}
return matter;
}
//--------------------------------------------------------------------------
@Override
public int hashCode()
{
if (null == mHashCode)
{
int hashcode = 31;
if (mElementalComposition != null)
{
for (Element element : mElementalComposition.keySet())
{
hashcode = hashcode + 31 * element.hashCode() * mElementalComposition.get(element).intValue();
}
}
mHashCode = hashcode;
}
return mHashCode;
}
//--------------------------------------------------------------------------
@Override
public boolean equals(Object inObj)
{
boolean result = false;
if (inObj != null
&& inObj instanceof MatterImpl)
{
result = (0 == compareTo((MatterImpl) inObj));
}
return result;
}
//--------------------------------------------------------------------------
public int compareTo(MatterImpl inObj)
{
int result = -1;
if (inObj != null)
{
result = CompareUtil.compare(hashCode(), inObj.hashCode());
}
return result;
}
//--------------------------------------------------------------------------
public void setElementalComposition(Map inMap)
{
mElementalComposition = null;
if (CollectionUtil.hasValues(inMap))
{
mElementalComposition = new HashMap<>(inMap);
}
clearCalculatedProperties();
}
//--------------------------------------------------------------------------
public void clearElementalComposition()
{
mElementalComposition = null;
clearCalculatedProperties();
}
//--------------------------------------------------------------------------
public MatterImpl addElementalComposition(Map inMap)
{
return addElementalComposition(inMap, 1);
}
//--------------------------------------------------------------------------
public MatterImpl addElementalComposition(Map inMap, int inNum)
{
if (CollectionUtil.hasValues(inMap))
{
for (Element element : inMap.keySet())
{
addAtoms(element, inMap.get(element) * inNum);
}
clearCalculatedProperties();
}
return this;
}
//--------------------------------------------------------------------------
public MatterImpl add(Matter inValue)
{
return add(inValue, 1);
}
//--------------------------------------------------------------------------
public MatterImpl add(Matter inValue, int inCount)
{
if (inValue != null)
{
// This will also clear calculated properties
addElementalComposition(inValue.getElementalComposition(), inCount);
// Now deal with user-set masses...
if (mMonoisotopicMassIsUserSet
|| (null == inValue.getElementalComposition()
&& inValue.getMonoisotopicMass() != null))
{
setMonoisotopicMass((mMonoisotopicMassIsUserSet ? mMonoisotopicMass : CollectionUtil.hasValues(getElementalComposition()) ? getMonoisotopicMass() : 0.0)
+ (inValue.getMonoisotopicMass() != null ? inValue.getMonoisotopicMass() : 0) * inCount);
}
if (mAverageMassIsUserSet
|| (null == inValue.getElementalComposition()
&& inValue.getAverageMass() != null))
{
setAverageMass((mAverageMassIsUserSet ? mAverageMass : CollectionUtil.hasValues(getElementalComposition()) ? getAverageMass() : 0.0)
+ (inValue.getAverageMass() != null ? inValue.getAverageMass() : 0) * inCount);
}
}
return this;
}
//--------------------------------------------------------------------------
public MatterImpl remove(Matter inValue)
{
return remove(inValue, 1);
}
//--------------------------------------------------------------------------
public MatterImpl remove(Matter inValue, int inCount)
{
return add(inValue, - inCount);
}
//--------------------------------------------------------------------------
public MatterImpl addAtoms(Element inElement, int inNum)
{
return addAtoms(inElement, new Float(inNum));
}
//--------------------------------------------------------------------------
public MatterImpl addAtoms(Element inElement, float inNum)
{
if (null == mElementalComposition)
{
mElementalComposition = new HashMap();
}
Float count = mElementalComposition.get(inElement);
float newCount = inNum + (count != null ? count : 0);
mElementalComposition.put(inElement, newCount);
clearCalculatedProperties();
return this;
}
//--------------------------------------------------------------------------
/**
If the elemental composition is known, use setElementalComposition() and
the masses will be derived automatically; this method is for use in those
(hopefully) rare times when the mass is known but not the elemental composition.
@param inValue the mass to use as the monoisotopic mass for this object
@return this OrganicMatterImpl object to enable method chaining
*/
public MatterImpl setMonoisotopicMass(Double inValue)
{
mMonoisotopicMass = inValue;
mMonoisotopicMassIsUserSet = (inValue != null);
return this;
}
//--------------------------------------------------------------------------
/**
If the elemental composition is known, use setElementalComposition() and
the masses will be derived automatically; this method is for use in those
(hopefully) rare times when the mass is known but not the elemental composition.
@param inValue the mass to use as the average mass for this object
@return this OrganicMatterImpl object to enable method chaining
*/
public MatterImpl setAverageMass(Double inValue)
{
mAverageMass = inValue;
mAverageMassIsUserSet = (inValue != null);
return this;
}
//--------------------------------------------------------------------------
public Double getMonoisotopicMass()
{
if (null == mMonoisotopicMass)
{
calculateMassFromElementalComposition();
}
return mMonoisotopicMass;
}
//--------------------------------------------------------------------------
public Double getAverageMass()
{
if (null == mAverageMass)
{
calculateMassFromElementalComposition();
}
return mAverageMass;
}
//--------------------------------------------------------------------------
/**
Returns the elemental composition as an unmodifiable Map.
*/
public Map getElementalComposition()
{
return (mElementalComposition != null ? Collections.unmodifiableMap(mElementalComposition) : null);
}
//--------------------------------------------------------------------------
/**
Returns a chemical formula String like 'C5H11NO'. If carbon is present, it
is listed first followed by the other elements in ascending mass order.
Symbols for isotopes are enclosed in square brackets such as '[2H]2O'
for deuterated water.
@return the chemical formula string
*/
public String getChemicalFormula()
{
return getChemicalFormula(false);
}
//--------------------------------------------------------------------------
/**
Returns a chemical formula String like 'C₅H₁₁NO'. If carbon is present, it
is listed first followed by the other elements in ascending mass order.
Symbols for isotopes are enclosed in square brackets such as '[²H]₂O'
for deuterated water.
@return the chemical formula string
*/
public String getChemicalFormulaWithSubscripts()
{
return getChemicalFormula(true);
}
//--------------------------------------------------------------------------
private String getChemicalFormula(boolean inUseSubscripts)
{
StringBuilder compositionString = new StringBuilder();
Map elemComp = getElementalComposition();
if (CollectionUtil.hasValues(elemComp))
{
List elements = new ArrayList<>(elemComp.keySet());
Collections.sort(elements);
if (elements.remove(Element.CARBON))
{
Float count = elemComp.get(Element.CARBON);
if (count != null && count != 0)
{
String countString = (count == count.intValue() ? count.intValue() + "" : String.format("%3.1f", count.floatValue()) + "");
if (inUseSubscripts)
{
countString = StringUtil.toSubscript(countString);
}
compositionString.append("C" + (count == 1 ? "" : (count < 0 ? "(" + countString + ")" : countString)));
}
}
for (Element element : elements)
{
Float count = elemComp.get(element);
// Encode pure isotopes in square brackets like "[²H]" for deuterium
if (count != null && count != 0)
{
String countString = (count == count.intValue() ? count.intValue() + "" : String.format("%3.1f", count.floatValue()) + "");
String symbol = element.getSymbol();
if (inUseSubscripts)
{
countString = StringUtil.toSubscript(countString);
if (element instanceof Isotope)
{
symbol = StringUtil.toSuperscript(((Isotope) element).getMassNumber()) + ((Isotope) element).getElement().getSymbol();
}
}
compositionString.append((element instanceof Isotope ? "[" + symbol + "]" : symbol)
+ (count == 1 ? "" : (count < 0 ? "(" + countString + ")" : countString)));
}
}
}
return compositionString.toString();
}
//--------------------------------------------------------------------------
public MatterImpl clone()
{
MatterImpl copy;
try
{
copy = (MatterImpl) super.clone();
}
catch (CloneNotSupportedException e)
{
throw new RuntimeException("Coding problem! CloneNotSupportedException should not be possible when cloning a "
+ this.getClass().getSimpleName() + " object!", e);
}
if (mElementalComposition != null)
{
mElementalComposition = new HashMap<>(mElementalComposition);
}
return copy;
}
//--------------------------------------------------------------------------
public void clearCalculatedProperties()
{
if (! mMonoisotopicMassIsUserSet) mMonoisotopicMass = null;
if (! mAverageMassIsUserSet) mAverageMass = null;
mHashCode = null;
}
//##########################################################################
// PROTECTED METHODS
//##########################################################################
//--------------------------------------------------------------------------
protected void calculateMassFromElementalComposition()
{
Map elementalCompositionMap = getElementalComposition();
if (elementalCompositionMap != null)
{
double mono = 0.0;
double avg = 0.0;
for (Element element : elementalCompositionMap.keySet())
{
float count = elementalCompositionMap.get(element);
mono += count * element.getMonoisotopicMass();
Double elementalAvgMass = element.getAverageMass();
avg += count * (elementalAvgMass != null ? elementalAvgMass : element.getMonoisotopicMass()); // If we don't have an avg. mass for the element, use monoisotopic
}
if (! mMonoisotopicMassIsUserSet) mMonoisotopicMass = new Double(mono);
if (! mAverageMassIsUserSet) mAverageMass = new Double(avg);
}
}
//--------------------------------------------------------------------------
protected boolean massesAreUserSet()
{
return (mMonoisotopicMassIsUserSet || mAverageMassIsUserSet);
}
//---------------------------------------------------------------------------
// Used when parsing chemical formulas
private static Character convertSubscriptChar(char inChar)
{
Character subscriptChar = null;
switch (inChar)
{
case '\u2080':
subscriptChar = '0';
break;
case '\u2081':
subscriptChar = '1';
break;
case '\u2082':
subscriptChar = '2';
break;
case '\u2083':
subscriptChar = '3';
break;
case '\u2084':
subscriptChar = '4';
break;
case '\u2085':
subscriptChar = '5';
break;
case '\u2086':
subscriptChar = '6';
break;
case '\u2087':
subscriptChar = '7';
break;
case '\u2088':
subscriptChar = '8';
break;
case '\u2089':
subscriptChar = '9';
break;
}
return subscriptChar;
}
//---------------------------------------------------------------------------
// Used when parsing isotopes in chemical formulas
private static Character convertSuperscriptChar(char inChar)
{
Character superscriptChar = null;
switch (inChar)
{
case '\u207B':
superscriptChar = '-';
break;
case '\u2070':
superscriptChar = '0';
break;
case 0xB9:
superscriptChar = '1';
break;
case 0xB2:
superscriptChar = '2';
break;
case 0xB3:
superscriptChar = '3';
break;
case '\u2074':
superscriptChar = '4';
break;
case '\u2075':
superscriptChar = '5';
break;
case '\u2076':
superscriptChar = '6';
break;
case '\u2077':
superscriptChar = '7';
break;
case '\u2078':
superscriptChar = '8';
break;
case '\u2079':
superscriptChar = '9';
break;
}
return superscriptChar;
}
//##########################################################################
// PRIVATE CLASS
//##########################################################################
private class FormulaSubBlock
{
private StringBuilder mString = new StringBuilder();
private int mCount = 1;
private char mBracketType;
private boolean mClosed = false;
List mSubBlocks;
//-----------------------------------------------------------------------
public String toString()
{
return mString.toString();
}
//-----------------------------------------------------------------------
public void append(char inChar)
{
mString.append(inChar);
}
//-----------------------------------------------------------------------
public void setCount(int inValue)
{
mCount = inValue;
}
//-----------------------------------------------------------------------
public FormulaSubBlock setBracketType(char inValue)
{
mBracketType = inValue;
return this;
}
//-----------------------------------------------------------------------
public void close()
{
mClosed = true;
}
//-----------------------------------------------------------------------
public boolean isClosed()
{
return mClosed;
}
//-----------------------------------------------------------------------
public void addSubBlock(MatterImpl.FormulaSubBlock inValue)
{
if (null == mSubBlocks)
{
mSubBlocks = new ArrayList<>(4);
}
mSubBlocks.add(inValue);
}
//-----------------------------------------------------------------------
public Map getChemicalComposition()
{
Map chemicalCompositionMap = getChemicalComposition(mString.toString());
if (CollectionUtil.hasValues(mSubBlocks))
{
for (MatterImpl.FormulaSubBlock subBlock : mSubBlocks)
{
Map subBlockCompositionMap = subBlock.getChemicalComposition();
for (Element element : subBlockCompositionMap.keySet())
{
float updatedCount = subBlockCompositionMap.get(element);
Float existingCount = chemicalCompositionMap.get(element);
if (existingCount != null)
{
updatedCount += existingCount;
}
chemicalCompositionMap.put(element, updatedCount);
}
}
}
return chemicalCompositionMap;
}
//-----------------------------------------------------------------------
private Map getChemicalComposition(String inString)
{
Map chemicalCompositionMap = new HashMap<>(10);
int i = 0;
char prevChar = ' ';
while (i < inString.length())
{
char currentChar = inString.charAt(i);
if (currentChar == ' ' // Skip whitespace
|| currentChar == ':' // Skip bond notation
|| (currentChar == '-' && ! Character.isDigit(prevChar)) // Skip linear formula single bond notation
|| currentChar == '−' // Skip linear formula single bond notation
|| currentChar == '=' // Skip linear formula double bond notation
|| currentChar == '≡' // Skip linear formula triple bond notation
|| currentChar == '@') // Skip trapped atom notation
{
i++;
continue;
}
if (mBracketType == '['
&& 0 == i
&& (Character.isDigit(currentChar)
|| CharUtil.isSuperscript(currentChar)))
{
// Isotope
String massNumString = "";
Character convertedSuperscriptChar = null;
while (i < inString.length())
{
char theChar = inString.charAt(i);
if (Character.isDigit(theChar)
|| (convertedSuperscriptChar = convertSuperscriptChar(theChar)) != null)
{
massNumString += (convertedSuperscriptChar != null ? convertedSuperscriptChar : theChar);
i++;
}
else
{
break;
}
}
Element element = null;
if (i < inString.length() - 1)
{
element = Element.valueOf(inString.substring(i, i + 2));
}
if (element != null)
{
i += 2;
}
else
{
element = Element.valueOf(inString.substring(i, i + 1));
if (element != null)
{
i++;
}
else
{
throw new ChemicalFormulaParseException("Problem parsing elements from " + StringUtil.singleQuote(inString) + " at char " + (i + 1) + "!");
}
}
Isotope isotope = Isotope.valueOf(element, Integer.parseInt(massNumString));
float updatedCount = mCount;
Float existingCount = chemicalCompositionMap.get(isotope);
if (existingCount != null)
{
updatedCount += existingCount;
}
chemicalCompositionMap.put(isotope, updatedCount);
}
else if (sSaltIndicatorsInChemicalFormulas.contains(currentChar))
{
// molecule of crystallization
i++;
String countString = "";
while (i < inString.length())
{
char countChar = inString.charAt(i);
if (countChar != ' ') // Skip whitespace
{
if (Character.isDigit(countChar))
{
countString += countChar;
}
else
{
break;
}
}
i++;
}
float count = StringUtil.isSet(countString) ? Float.parseFloat(countString) : 1;
Map crystallizationChemicalCompositionMap = getChemicalComposition(inString.substring(i));
for (Element element : crystallizationChemicalCompositionMap.keySet())
{
float updatedCount = crystallizationChemicalCompositionMap.get(element) * count * mCount;
Float existingCount = chemicalCompositionMap.get(element);
if (existingCount != null)
{
updatedCount += existingCount;
}
chemicalCompositionMap.put(element, updatedCount);
}
// There shouldn't be anything after the crystallization molecule
break;
}
else
{
Element element = null;
if (i < inString.length() - 1)
{
element = Element.valueOf(inString.substring(i, i + 2));
}
if (element != null)
{
i += 2;
}
else
{
element = Element.valueOf(inString.substring(i, i + 1));
if (element != null)
{
i++;
}
else
{
throw new ChemicalFormulaParseException("Problem parsing elements from " + StringUtil.singleQuote(inString) + " at char " + (i + 1) + "!");
}
}
String countString = "";
Character convertedSubscriptChar = null;
while (i < inString.length())
{
char countChar = inString.charAt(i);
if (Character.isDigit(countChar)
|| (convertedSubscriptChar = convertSubscriptChar(countChar)) != null)
{
countString += (convertedSubscriptChar != null ? convertedSubscriptChar : countChar);
i++;
}
else if ('-' == countChar)
{
throw new ChemicalFormulaParseException("Problem parsing elements from " + StringUtil.singleQuote(inString) + " at char " + (i + 1) + ": Counts can't be ranges!");
}
else
{
break;
}
}
float updatedCount = mCount * (StringUtil.isSet(countString) ? Float.parseFloat(countString) : 1);
Float existingCount = chemicalCompositionMap.get(element);
if (existingCount != null)
{
updatedCount += existingCount;
}
chemicalCompositionMap.put(element, updatedCount);
}
prevChar = currentChar;
}
return chemicalCompositionMap;
}
}
}