com.hfg.bio.KaSet Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.HashMap;
import com.hfg.bio.glyco.Monosaccharide;
import com.hfg.bio.proteinproperty.ProteinAnalysisMode;
import com.hfg.chem.Charge;
import com.hfg.chem.IonizableGroup;
import com.hfg.chem.Molecule;
import com.hfg.exception.InvalidValueException;
import com.hfg.exception.UnmodifyableObjectException;
import com.hfg.util.BooleanUtil;
import com.hfg.util.CompareUtil;
import com.hfg.util.StringUtil;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.collection.OrderedMap;
import com.hfg.xml.XMLNode;
import com.hfg.xml.XMLTag;
//------------------------------------------------------------------------------
/**
* Encapsulation for a group of Ka (dissociation constant) values.
* Most commonly used for protein isoelectric point prediction.
*
* @author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class KaSet implements Cloneable, Comparable
{
private static Map sUniqueMap = new OrderedMap<>();
/**
Bjellqvist B, Hughes GJ, Pasquali C, et al. The focusing positions of polypeptides
in immobilized pH gradients can be predicted from their amino acid sequences.
Electrophoresis. 1993;14(10):1023-31.
Available at: http://www.ncbi.nlm.nih.gov/pubmed/8125050.
Intended for use with reduced and denatured proteins on 2D gels.
*/
public static final KaSet BJELLQVIST = new KaSet("Bjellqvist");
/**
Method from Expasy's Compute pI/Mw tool
which implements the values from Bjellqvist but mistakenly treats C-terminal Asp and Glu
sidechain values as C-terminal values.
Intended for use with reduced and denatured proteins on 2D gels.
*/
public static final KaSet EXPASY = new KaSet("Expasy");
/**
Method from EMBOSS's iep tool.
Intended for use with reduced and denatured proteins on 2D gels.
*/
public static final KaSet EMBOSS = new KaSet("EMBOSS");
/**
Sillero A, Ribeiro JM. Isoelectric points of proteins:
theoretical determination. Analytical biochemistry. 1989;179(2):319-325.
Available at: http://www.ncbi.nlm.nih.gov/pubmed/2774179.
Intended for use with reduced and denatured proteins on 2D gels.
*/
public static final KaSet SILLERO = new KaSet("Sillero");
/**
Sillero A, Ribeiro JM. Isoelectric points of proteins:
theoretical determination. Analytical biochemistry. 1989;179(2):319-325.
Available at: http://www.ncbi.nlm.nih.gov/pubmed/2774179.
Intended for use with reduced and denatured proteins on 2D gels. An oversimplification for ease of calculation.
*/
public static final KaSet SILLERO_ABRIDGED = new KaSet("Sillero (abridged)");
/**
Patrickios, CS, Yamasaki, EN. Polypeptide amino acid composition and isoelectric point. II. Comparison between experiment and theory.
Analytical Biochemistry. 1995:231(1):82-91. http://doi.org/10.1006/abio.1995.1506.
An oversimplification for ease of calculation.
*/
public static final KaSet PATRICKIOS_SIMPLE = new KaSet("Patrickios (simple)");
/**
Biochemistry by Lubert Stryer, 1995
*/
public static final KaSet STRYER_1995 = new KaSet("Stryer (1995)");
/**
Grimsley GR, Scholtz JM, Pace CN (2009). A summary of the measured pK values of the ionizable groups in folded proteins.
Protein Science 18(1), 247-251. Note that they do not provide a value for Arginine so a default value of 12 is used.
Intended for use with native proteins.
*/
public static final KaSet GRIMSLEY = new KaSet("Grimsley");
/**
Intended for use with native proteins.
*/
public static final KaSet TAYLOR_NATIVE = new KaSet("Taylor");
private String mName;
private ProteinAnalysisMode mDefaultProteinAnalysisMode = ProteinAnalysisMode.REDUCED;
private boolean mLocked;
private Map> mKaMap = new HashMap<>(20);
private Map mNTerminalKaMap = new HashMap<>(10);
private Map mCTerminalKaMap = new HashMap<>(10);
private Map mCTerminalSidechainKaMap = new HashMap<>(10);
static
{
BJELLQVIST.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.5f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.ALANINE, 7.59f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.METHIONINE, 7.00f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.SERINE, 6.93f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.PROLINE, 8.36f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.THREONIE, 6.82f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.VALINE, 7.44f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.GLUTAMIC_ACID, 7.70f, Charge.POSITIVE)
.addpKa(AminoAcid.ASPARTIC_ACID, 4.05f, Charge.NEUTRAL)
.addpKa(AminoAcid.GLUTAMIC_ACID, 4.45f, Charge.NEUTRAL)
.addpKa(AminoAcid.CYSTEINE, 9f, Charge.NEUTRAL)
.addpKa(AminoAcid.TYROSINE, 10f, Charge.NEUTRAL)
.addpKa(AminoAcid.HISTIDINE, 5.98f, Charge.POSITIVE)
.addpKa(AminoAcid.LYSINE, 10f, Charge.POSITIVE)
.addpKa(AminoAcid.ARGININE, 12f, Charge.POSITIVE)
.setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.55f, Charge.NEUTRAL)
.setCTerminalSidechainpKa(AminoAcid.ASPARTIC_ACID, 4.55f, Charge.NEUTRAL)
.setCTerminalSidechainpKa(AminoAcid.GLUTAMIC_ACID, 4.75f, Charge.NEUTRAL)
.lock();
EXPASY.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.5f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.ALANINE, 7.59f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.METHIONINE, 7.00f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.SERINE, 6.93f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.PROLINE, 8.36f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.THREONIE, 6.82f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.VALINE, 7.44f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.GLUTAMIC_ACID, 7.70f, Charge.POSITIVE)
.addpKa(AminoAcid.ASPARTIC_ACID, 4.05f, Charge.NEUTRAL)
.addpKa(AminoAcid.GLUTAMIC_ACID, 4.45f, Charge.NEUTRAL)
.addpKa(AminoAcid.CYSTEINE, 9f, Charge.NEUTRAL)
.addpKa(AminoAcid.TYROSINE, 10f, Charge.NEUTRAL)
.addpKa(AminoAcid.HISTIDINE, 5.98f, Charge.POSITIVE)
.addpKa(AminoAcid.LYSINE, 10f, Charge.POSITIVE)
.addpKa(AminoAcid.ARGININE, 12f, Charge.POSITIVE)
.setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.55f, Charge.NEUTRAL)
// Note: mistakenly treats C-terminal Asp and Glu sidechain values as C -terminal values.
.setCTerminalpKa(AminoAcid.ASPARTIC_ACID, 4.55f, Charge.NEUTRAL)
.setCTerminalpKa(AminoAcid.GLUTAMIC_ACID, 4.75f, Charge.NEUTRAL)
.lock();
EMBOSS.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 8.6f, Charge.POSITIVE)
.addpKa(AminoAcid.ASPARTIC_ACID, 3.9f, Charge.NEUTRAL)
.addpKa(AminoAcid.GLUTAMIC_ACID, 4.1f, Charge.NEUTRAL)
.addpKa(AminoAcid.CYSTEINE, 8.5f, Charge.NEUTRAL)
.addpKa(AminoAcid.TYROSINE, 10.1f, Charge.NEUTRAL)
.addpKa(AminoAcid.HISTIDINE, 6.5f, Charge.POSITIVE)
.addpKa(AminoAcid.LYSINE, 10.8f, Charge.POSITIVE)
.addpKa(AminoAcid.ARGININE, 12.5f, Charge.POSITIVE)
.setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.6f, Charge.NEUTRAL)
.lock();
SILLERO.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 8.2f, Charge.POSITIVE)
.addpKa(AminoAcid.ASPARTIC_ACID, 4.0f, Charge.NEUTRAL)
.addpKa(AminoAcid.GLUTAMIC_ACID, 4.5f, Charge.NEUTRAL)
.addpKa(AminoAcid.CYSTEINE, 9.0f, Charge.NEUTRAL)
.addpKa(AminoAcid.TYROSINE, 10.0f, Charge.NEUTRAL)
.addpKa(AminoAcid.HISTIDINE, 6.4f, Charge.POSITIVE)
.addpKa(AminoAcid.LYSINE, 10.4f, Charge.POSITIVE)
.addpKa(AminoAcid.ARGININE, 12.0f, Charge.POSITIVE)
.setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.2f, Charge.NEUTRAL)
.lock();
SILLERO_ABRIDGED.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 11.2f, Charge.POSITIVE)
.addpKa(AminoAcid.ASPARTIC_ACID, 4.2f, Charge.NEUTRAL)
.addpKa(AminoAcid.GLUTAMIC_ACID, 4.2f, Charge.NEUTRAL)
.addpKa(AminoAcid.CYSTEINE, 9.5f, Charge.NEUTRAL)
.addpKa(AminoAcid.TYROSINE, 9.5f, Charge.NEUTRAL)
.addpKa(AminoAcid.HISTIDINE, 6.4f, Charge.POSITIVE)
.addpKa(AminoAcid.LYSINE, 11.2f, Charge.POSITIVE)
.addpKa(AminoAcid.ARGININE, 11.2f, Charge.POSITIVE)
.setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 4.2f, Charge.NEUTRAL)
.lock();
PATRICKIOS_SIMPLE.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 11.2f, Charge.POSITIVE)
.addpKa(AminoAcid.ASPARTIC_ACID, 4.2f, Charge.NEUTRAL)
.addpKa(AminoAcid.GLUTAMIC_ACID, 4.2f, Charge.NEUTRAL)
.addpKa(AminoAcid.LYSINE, 11.2f, Charge.POSITIVE)
.addpKa(AminoAcid.ARGININE, 11.2f, Charge.POSITIVE)
.setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 4.2f, Charge.NEUTRAL)
.lock();
STRYER_1995.setDefaultProteinAnalysisMode(ProteinAnalysisMode.NATIVE)
.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 8.0f, Charge.POSITIVE)
.addpKa(AminoAcid.ASPARTIC_ACID, 4.4f, Charge.NEUTRAL)
.addpKa(AminoAcid.GLUTAMIC_ACID, 4.4f, Charge.NEUTRAL)
.addpKa(AminoAcid.CYSTEINE, 8.5f, Charge.NEUTRAL)
.addpKa(AminoAcid.TYROSINE, 10.0f, Charge.NEUTRAL)
.addpKa(AminoAcid.HISTIDINE, 6.5f, Charge.POSITIVE)
.addpKa(AminoAcid.LYSINE, 10.0f, Charge.POSITIVE)
.addpKa(AminoAcid.ARGININE, 12.0f, Charge.POSITIVE)
.setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.1f, Charge.NEUTRAL)
.lock();
GRIMSLEY.setDefaultProteinAnalysisMode(ProteinAnalysisMode.NATIVE)
.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.7f, Charge.POSITIVE)
.addpKa(AminoAcid.ASPARTIC_ACID, 3.5f, Charge.NEUTRAL)
.addpKa(AminoAcid.GLUTAMIC_ACID, 4.2f, Charge.NEUTRAL)
.addpKa(AminoAcid.CYSTEINE, 6.8f, Charge.NEUTRAL)
.addpKa(AminoAcid.TYROSINE, 10.3f, Charge.NEUTRAL)
.addpKa(AminoAcid.HISTIDINE, 6.6f, Charge.POSITIVE)
.addpKa(AminoAcid.LYSINE, 10.4f, Charge.POSITIVE)
.addpKa(AminoAcid.ARGININE, 12f, Charge.POSITIVE)
.setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.3f, Charge.NEUTRAL)
.lock();
TAYLOR_NATIVE.setDefaultProteinAnalysisMode(ProteinAnalysisMode.NATIVE)
.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.5f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.ALANINE, 7.59f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.METHIONINE, 7.00f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.SERINE, 6.93f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.PROLINE, 8.36f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.THREONIE, 6.82f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.VALINE, 7.44f, Charge.POSITIVE)
.setNTerminalpKa(AminoAcid.GLUTAMIC_ACID, 7.70f, Charge.POSITIVE)
.addpKa(AminoAcid.ASPARTIC_ACID, 4.5f, Charge.NEUTRAL)
.addpKa(AminoAcid.GLUTAMIC_ACID, 4.65f, Charge.NEUTRAL)
.addpKa(AminoAcid.CYSTEINE, 9f, Charge.NEUTRAL)
.addpKa(AminoAcid.TYROSINE, 10.1f, Charge.NEUTRAL)
.addpKa(AminoAcid.HISTIDINE, 6.6f, Charge.POSITIVE)
.addpKa(AminoAcid.LYSINE, 10.78f, Charge.POSITIVE)
.addpKa(AminoAcid.ARGININE, 12.25f, Charge.POSITIVE)
.setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.55f, Charge.NEUTRAL)
.setCTerminalpKa(AminoAcid.ASPARTIC_ACID, 4.55f, Charge.NEUTRAL)
.setCTerminalpKa(AminoAcid.GLUTAMIC_ACID, 4.75f, Charge.NEUTRAL)
.lock();
}
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//--------------------------------------------------------------------------
public KaSet(String inName)
{
mName = inName;
sUniqueMap.put(mName, this);
}
//--------------------------------------------------------------------------
public KaSet(XMLNode inXML)
{
inXML.verifyTagName(HfgBioXML.KA_SET_TAG);
mName = inXML.getAttributeValue(HfgBioXML.NAME_ATT);
XMLTag defaultAnalysisModeTag = inXML.getOptionalSubtagByName(HfgBioXML.DEFAULT_ANALYSIS_MODE_ATT);
if (defaultAnalysisModeTag != null)
{
XMLTag analysisModeTag = defaultAnalysisModeTag.getRequiredSubtagByName(HfgBioXML.PROTEIN_ANALYSIS_MODE_TAG);
setDefaultProteinAnalysisMode(ProteinAnalysisMode.instantiate(analysisModeTag));
}
List pKaTags = inXML.getSubtagsByName(HfgBioXML.PKA_TAG);
if (CollectionUtil.hasValues(pKaTags))
{
for (XMLTag pKaTag : pKaTags)
{
Molecule molecule = null;
if (pKaTag.hasAttribute(HfgBioXML.AA_ATT))
{
molecule = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT));
}
else if (pKaTag.hasAttribute(HfgBioXML.MONOSACCHARIDE_ATT))
{
molecule = Monosaccharide.valueOf(pKaTag.getAttributeValue(HfgBioXML.MONOSACCHARIDE_ATT));
}
List ionizableGroupTags = pKaTag.getSubtagsByName(IonizableGroup.IONIZABLE_GROUP_TAG);
if (CollectionUtil.hasValues(ionizableGroupTags))
{
for (XMLTag ionizableGroupTag : (List) (Object) pKaTag.getSubtags())
{
addpKa(molecule, new IonizableGroup(ionizableGroupTag));
}
}
else
{
// Old style
addpKa((AminoAcid)molecule, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
}
}
}
List nTerm_pKaTags = inXML.getSubtagsByName(HfgBioXML.N_TERM_PKA_TAG);
if (CollectionUtil.hasValues(nTerm_pKaTags))
{
for (XMLTag pKaTag : nTerm_pKaTags)
{
if (pKaTag.hasAttribute(HfgBioXML.AA_ATT))
{
AminoAcid aa = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT));
setNTerminalpKa(aa, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
}
else
{
NTerminalGroup nTerminalGroup = NTerminalGroup.valueOf(pKaTag.getAttributeValue(HfgBioXML.N_TERM_ATT));
setNTerminalpKa(nTerminalGroup, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
}
}
}
List cTerm_pKaTags = inXML.getSubtagsByName(HfgBioXML.C_TERM_PKA_TAG);
if (CollectionUtil.hasValues(cTerm_pKaTags))
{
for (XMLTag pKaTag : cTerm_pKaTags)
{
if (pKaTag.hasAttribute(HfgBioXML.AA_ATT))
{
AminoAcid aa = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT));
setCTerminalpKa(aa, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
}
else
{
CTerminalGroup cTerminalGroup = CTerminalGroup.valueOf(pKaTag.getAttributeValue(HfgBioXML.C_TERM_ATT));
setCTerminalpKa(cTerminalGroup, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
}
}
}
List cTermAA_pKaTags = inXML.getSubtagsByName(HfgBioXML.C_TERM_AA_PKA_TAG);
if (CollectionUtil.hasValues(cTermAA_pKaTags))
{
for (XMLTag pKaTag : cTermAA_pKaTags)
{
AminoAcid aa = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT));
setCTerminalSidechainpKa(aa, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
}
}
if (BooleanUtil.valueOf(inXML.getAttributeValue(HfgBioXML.LOCKED_ATT)))
{
lock();
}
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//--------------------------------------------------------------------------
public static Collection values()
{
return Collections.unmodifiableCollection(sUniqueMap.values());
}
//--------------------------------------------------------------------------
public static KaSet valueOf(String inName)
{
return sUniqueMap.get(inName);
}
//--------------------------------------------------------------------------
public String name()
{
return mName;
}
//--------------------------------------------------------------------------
public KaSet setName(String inValue)
{
if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
mName = inValue;
return this;
}
//--------------------------------------------------------------------------
public boolean isLocked()
{
return mLocked;
}
//--------------------------------------------------------------------------
public KaSet lock()
{
mLocked = true;
return this;
}
//--------------------------------------------------------------------------
public KaSet setDefaultProteinAnalysisMode(ProteinAnalysisMode inValue)
{
if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
mDefaultProteinAnalysisMode = inValue;
return this;
}
//--------------------------------------------------------------------------
public ProteinAnalysisMode getDefaultProteinAnalysisMode()
{
return mDefaultProteinAnalysisMode;
}
//--------------------------------------------------------------------------
public XMLNode toXMLNode()
{
XMLNode node = new XMLTag(HfgBioXML.KA_SET_TAG);
if (StringUtil.isSet(name())) node.setAttribute(HfgBioXML.NAME_ATT, name());
if (isLocked()) node.setAttribute(HfgBioXML.LOCKED_ATT, "true");
if (getDefaultProteinAnalysisMode() != null)
{
XMLTag subtag = new XMLTag(HfgBioXML.DEFAULT_ANALYSIS_MODE_ATT);
node.addSubtag(subtag);
subtag.addSubtag(getDefaultProteinAnalysisMode().toXMLTag());
}
if (CollectionUtil.hasValues(mKaMap))
{
List sortedKeys = new ArrayList<>(mKaMap.keySet());
Collections.sort(sortedKeys);
for (Molecule molecule : sortedKeys)
{
List ionizableGroups = mKaMap.get(molecule);
XMLTag pKaTag = new XMLTag(HfgBioXML.PKA_TAG);
if (molecule instanceof AminoAcid)
{
pKaTag.setAttribute(HfgBioXML.AA_ATT, ((AminoAcid) molecule).getThreeLetterCode());
}
else if (molecule instanceof Monosaccharide)
{
pKaTag.setAttribute(HfgBioXML.MONOSACCHARIDE_ATT, molecule.name());
}
for (IonizableGroup ionizableGroup : ionizableGroups)
{
pKaTag.addSubtag(ionizableGroup.toXMLNode());
}
node.addSubtag(pKaTag);
}
}
if (CollectionUtil.hasValues(mNTerminalKaMap))
{
List sortedKeys = new ArrayList<>(mNTerminalKaMap.keySet());
Collections.sort(sortedKeys);
for (Molecule nTerminalGroup : sortedKeys)
{
IonizableGroup ionizableGroup = mNTerminalKaMap.get(nTerminalGroup);
XMLTag nTerm_pKaTag = new XMLTag(HfgBioXML.N_TERM_PKA_TAG);
if (nTerminalGroup instanceof AminoAcid)
{
nTerm_pKaTag.setAttribute(HfgBioXML.AA_ATT, ((AminoAcid)nTerminalGroup).getThreeLetterCode());
}
else
{
nTerm_pKaTag.setAttribute(HfgBioXML.N_TERM_ATT, nTerminalGroup.name());
}
nTerm_pKaTag.setAttribute(HfgBioXML.VALUE_ATT, ionizableGroup.getpKa());
nTerm_pKaTag.setAttribute(HfgBioXML.PROTONATED_FORM_ATT, ionizableGroup.getProtonatedForm());
node.addSubtag(nTerm_pKaTag);
}
}
if (CollectionUtil.hasValues(mCTerminalKaMap))
{
List sortedKeys = new ArrayList<>(mCTerminalKaMap.keySet());
Collections.sort(sortedKeys);
for (Molecule cTerminalGroup : sortedKeys)
{
IonizableGroup ionizableGroup = mCTerminalKaMap.get(cTerminalGroup);
XMLTag cTerm_pKaTag = new XMLTag(HfgBioXML.C_TERM_PKA_TAG);
if (cTerminalGroup instanceof AminoAcid)
{
cTerm_pKaTag.setAttribute(HfgBioXML.AA_ATT, ((AminoAcid)cTerminalGroup).getThreeLetterCode());
}
else
{
cTerm_pKaTag.setAttribute(HfgBioXML.C_TERM_ATT, cTerminalGroup.name());
}
cTerm_pKaTag.setAttribute(HfgBioXML.VALUE_ATT, ionizableGroup.getpKa());
cTerm_pKaTag.setAttribute(HfgBioXML.PROTONATED_FORM_ATT, ionizableGroup.getProtonatedForm());
node.addSubtag(cTerm_pKaTag);
}
}
if (CollectionUtil.hasValues(mCTerminalSidechainKaMap))
{
List sortedKeys = new ArrayList<>(mCTerminalSidechainKaMap.keySet());
Collections.sort(sortedKeys);
for (AminoAcid cTerminalAA : sortedKeys)
{
IonizableGroup ionizableGroup = mCTerminalSidechainKaMap.get(cTerminalAA);
XMLTag cTermAA_pKaTag = new XMLTag(HfgBioXML.C_TERM_AA_PKA_TAG);
cTermAA_pKaTag.setAttribute(HfgBioXML.AA_ATT, cTerminalAA.getThreeLetterCode());
cTermAA_pKaTag.setAttribute(HfgBioXML.VALUE_ATT, ionizableGroup.getpKa());
cTermAA_pKaTag.setAttribute(HfgBioXML.PROTONATED_FORM_ATT, ionizableGroup.getProtonatedForm());
node.addSubtag(cTermAA_pKaTag);
}
}
return node;
}
//--------------------------------------------------------------------------
@Override
public String toString()
{
return name();
}
//--------------------------------------------------------------------------
@Override
public boolean equals(Object inObj2)
{
boolean result = false;
if (inObj2 != null
&& inObj2 instanceof KaSet)
{
result = (0 == compareTo((KaSet) inObj2));
}
return result;
}
//--------------------------------------------------------------------------
@Override
public int compareTo(KaSet inObj2)
{
int result = -1;
if (inObj2 != null)
{
result = 0;
if (this != inObj2)
{
result = CompareUtil.compare(mKaMap.size(), inObj2.mKaMap.size());
if (0 == result)
{
result = CompareUtil.compare(mNTerminalKaMap.size(), inObj2.mNTerminalKaMap.size());
}
if (0 == result)
{
result = CompareUtil.compare(mCTerminalKaMap.size(), inObj2.mCTerminalKaMap.size());
}
if (0 == result)
{
result = CompareUtil.compare(mCTerminalSidechainKaMap.size(), inObj2.mCTerminalSidechainKaMap.size());
}
if (0 == result)
{
for (Molecule molecule : mKaMap.keySet())
{
result = CompareUtil.compare(mKaMap.get(molecule), inObj2.mKaMap.get(molecule));
if (result != 0)
{
break;
}
}
}
if (0 == result)
{
for (Molecule molecule : mNTerminalKaMap.keySet())
{
result = CompareUtil.compare(mNTerminalKaMap.get(molecule), inObj2.mNTerminalKaMap.get(molecule));
if (result != 0)
{
break;
}
}
}
if (0 == result)
{
for (Molecule molecule : mCTerminalKaMap.keySet())
{
result = CompareUtil.compare(mCTerminalKaMap.get(molecule), inObj2.mCTerminalKaMap.get(molecule));
if (result != 0)
{
break;
}
}
}
if (0 == result)
{
for (AminoAcid aa : mCTerminalSidechainKaMap.keySet())
{
result = CompareUtil.compare(mCTerminalSidechainKaMap.get(aa), inObj2.mCTerminalSidechainKaMap.get(aa));
if (result != 0)
{
break;
}
}
}
}
}
return result;
}
//--------------------------------------------------------------------------
@Override
public KaSet clone()
{
KaSet newObj;
try
{
newObj = (KaSet) super.clone();
}
catch (CloneNotSupportedException e)
{
throw new RuntimeException(e);
}
if (mNTerminalKaMap != null)
{
newObj.mNTerminalKaMap = new HashMap<>(mNTerminalKaMap);
}
if (mKaMap != null)
{
newObj.mKaMap = new HashMap<>(mKaMap);
}
if (mCTerminalKaMap != null)
{
newObj.mCTerminalKaMap = new HashMap<>(mCTerminalKaMap);
}
if (mCTerminalSidechainKaMap != null)
{
newObj.mCTerminalSidechainKaMap = new HashMap<>(mCTerminalSidechainKaMap);
}
newObj.mLocked = false;
return newObj;
}
//--------------------------------------------------------------------------
/**
* Adds a pKa value for the specified amino acid to the set.
* Ka = 10^(-pKa)
* @param inResidue the amino acid that the pKa value should apply to
* @param inpKa the pKa value to associate with the specified amino acid
* @param inProtonatedForm the charge of the group's protonated form
* @return this KaSet to enable method chaining
*/
public KaSet addpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm)
{
return addpKa(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));
}
//--------------------------------------------------------------------------
/**
* Adds a pKa value for the specified molecule to the set. This form of addpKa()
* can be used to to assign pKa values to monosaccharide groups in a glycan.
* Ka = 10^(-pKa)
* @param inMolecule the amino acid that the pKa value should apply to
* @param inIonizableGroup the ionizable group to associate with the given molecule
* @return this KaSet to enable method chaining
*/
public KaSet addpKa(Molecule inMolecule, IonizableGroup inIonizableGroup)
{
if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
if (inIonizableGroup.getpKa() < 0 || inIonizableGroup.getpKa() > 14)
{
throw new InvalidValueException(StringUtil.singleQuote(inIonizableGroup.getpKa() + "") + " is not a valid value. The pKa must be between 0 and 14!");
}
List ionizableGroups = mKaMap.get(inMolecule);
if (null == ionizableGroups)
{
ionizableGroups = new ArrayList<>(3);
mKaMap.put(inMolecule, ionizableGroups);
}
ionizableGroups.add(inIonizableGroup);
return this;
}
//--------------------------------------------------------------------------
public List getIonizableGroups(Molecule inResidue)
{
return mKaMap.get(inResidue);
}
//--------------------------------------------------------------------------
public IonizableGroup getNTerminalKa(NTerminalGroup inNTerminalGroup, AminoAcid inNTerminalResidue)
{
IonizableGroup value = null;
if (inNTerminalGroup.equals(NTerminalGroup.UNMODIFIED_N_TERMINUS))
{
value = mNTerminalKaMap.get(inNTerminalResidue);
}
if (null == value)
{
value = mNTerminalKaMap.get(inNTerminalGroup);
}
return value;
}
//--------------------------------------------------------------------------
public KaSet setNTerminalpKa(NTerminalGroup inNTerminalGroup, float inpKa, Charge inProtonatedForm)
{
if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
if (inpKa < 0 || inpKa > 14)
{
throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!");
}
mNTerminalKaMap.put(inNTerminalGroup, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));
return this;
}
//--------------------------------------------------------------------------
public KaSet setNTerminalpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm)
{
if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
if (inpKa < 0 || inpKa > 14)
{
throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!");
}
mNTerminalKaMap.put(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));
return this;
}
//--------------------------------------------------------------------------
public IonizableGroup getCTerminalKa(CTerminalGroup inCTerminalGroup, AminoAcid inCTerminalResidue)
{
IonizableGroup value = null;
if (inCTerminalGroup.equals(CTerminalGroup.UNMODIFIED_C_TERMINUS))
{
value = mCTerminalKaMap.get(inCTerminalResidue);
}
if (null == value)
{
value = mCTerminalKaMap.get(inCTerminalGroup);
}
return value;
}
//--------------------------------------------------------------------------
public KaSet setCTerminalpKa(CTerminalGroup inCTerminalGroup, float inpKa, Charge inProtonatedForm)
{
if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
if (inpKa < 0 || inpKa > 14)
{
throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!");
}
mCTerminalKaMap.put(inCTerminalGroup, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));
return this;
}
//--------------------------------------------------------------------------
public KaSet setCTerminalpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm)
{
if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
if (inpKa < 0 || inpKa > 14)
{
throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!");
}
mCTerminalKaMap.put(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));
return this;
}
//--------------------------------------------------------------------------
public KaSet setCTerminalSidechainpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm)
{
if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
if (inpKa < 0 || inpKa > 14)
{
throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!");
}
mCTerminalSidechainKaMap.put(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));
return this;
}
//--------------------------------------------------------------------------
public IonizableGroup getCTerminalSidechainKa(AminoAcid inResidue)
{
return mCTerminalSidechainKaMap.get(inResidue);
}
}