All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.KaSet Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.HashMap;

import com.hfg.bio.glyco.Monosaccharide;
import com.hfg.bio.proteinproperty.ProteinAnalysisMode;
import com.hfg.chem.Charge;
import com.hfg.chem.IonizableGroup;
import com.hfg.chem.Molecule;
import com.hfg.exception.InvalidValueException;
import com.hfg.exception.UnmodifyableObjectException;
import com.hfg.util.BooleanUtil;
import com.hfg.util.CompareUtil;
import com.hfg.util.StringUtil;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.collection.OrderedMap;
import com.hfg.xml.XMLNode;
import com.hfg.xml.XMLTag;

//------------------------------------------------------------------------------
/**
 * Encapsulation for a group of Ka (dissociation constant) values.
 * Most commonly used for protein isoelectric point prediction.
 *
 * @author J. Alex Taylor, hairyfatguy.com
 */
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------

public class KaSet implements Cloneable, Comparable
{
   private static Map sUniqueMap = new OrderedMap<>();

   /**
    Bjellqvist B, Hughes GJ, Pasquali C, et al. The focusing positions of polypeptides
    in immobilized pH gradients can be predicted from their amino acid sequences.
    Electrophoresis. 1993;14(10):1023-31.
    Available at: http://www.ncbi.nlm.nih.gov/pubmed/8125050.
    Intended for use with reduced and denatured proteins on 2D gels.
    */
   public static final KaSet BJELLQVIST       = new KaSet("Bjellqvist");

   /**
    Method from Expasy's Compute pI/Mw tool
    which implements the values from Bjellqvist but mistakenly treats C-terminal Asp and Glu
    sidechain values as C-terminal values.
    Intended for use with reduced and denatured proteins on 2D gels.
    */
   public static final KaSet EXPASY           = new KaSet("Expasy");

   /**
    Method from EMBOSS's iep tool.
    Intended for use with reduced and denatured proteins on 2D gels.
    */
   public static final KaSet EMBOSS           = new KaSet("EMBOSS");

   /**
    Sillero A, Ribeiro JM. Isoelectric points of proteins:
    theoretical determination. Analytical biochemistry. 1989;179(2):319-325.
    Available at: http://www.ncbi.nlm.nih.gov/pubmed/2774179.
    Intended for use with reduced and denatured proteins on 2D gels.
    */
   public static final KaSet SILLERO          = new KaSet("Sillero");

   /**
    Sillero A, Ribeiro JM. Isoelectric points of proteins:
    theoretical determination. Analytical biochemistry. 1989;179(2):319-325.
    Available at: http://www.ncbi.nlm.nih.gov/pubmed/2774179.
    Intended for use with reduced and denatured proteins on 2D gels. An oversimplification for ease of calculation.
    */
   public static final KaSet SILLERO_ABRIDGED = new KaSet("Sillero (abridged)");

   /**
    Patrickios, CS, Yamasaki, EN. Polypeptide amino acid composition and isoelectric point. II. Comparison between experiment and theory.
    Analytical Biochemistry. 1995:231(1):82-91. http://doi.org/10.1006/abio.1995.1506.
    An oversimplification for ease of calculation.
    */
   public static final KaSet PATRICKIOS_SIMPLE = new KaSet("Patrickios (simple)");

   /**
    Biochemistry by Lubert Stryer, 1995
    */
   public static final KaSet STRYER_1995      = new KaSet("Stryer (1995)");

   /**
    Grimsley GR, Scholtz JM, Pace CN (2009). A summary of the measured pK values of the ionizable groups in folded proteins.
    Protein Science 18(1), 247-251. Note that they do not provide a value for Arginine so a default value of 12 is used.
    Intended for use with native proteins.
    */
   public static final KaSet GRIMSLEY      = new KaSet("Grimsley");

   /**
    Intended for use with native proteins.
    */
   public static final KaSet TAYLOR_NATIVE      = new KaSet("Taylor");


   private String                         mName;
   private ProteinAnalysisMode            mDefaultProteinAnalysisMode = ProteinAnalysisMode.REDUCED;
   private boolean                        mLocked;
   private Map>  mKaMap                   = new HashMap<>(20);
   private Map  mNTerminalKaMap          = new HashMap<>(10);
   private Map  mCTerminalKaMap          = new HashMap<>(10);
   private Map mCTerminalSidechainKaMap = new HashMap<>(10);

   static
   {

      BJELLQVIST.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.5f, Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.ALANINE,       7.59f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.METHIONINE,    7.00f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.SERINE,        6.93f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.PROLINE,       8.36f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.THREONIE,      6.82f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.VALINE,        7.44f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.GLUTAMIC_ACID, 7.70f,  Charge.POSITIVE)
         .addpKa(AminoAcid.ASPARTIC_ACID, 4.05f,  Charge.NEUTRAL)
         .addpKa(AminoAcid.GLUTAMIC_ACID, 4.45f,  Charge.NEUTRAL)
         .addpKa(AminoAcid.CYSTEINE,         9f,  Charge.NEUTRAL)
         .addpKa(AminoAcid.TYROSINE,        10f,  Charge.NEUTRAL)
         .addpKa(AminoAcid.HISTIDINE,     5.98f,  Charge.POSITIVE)
         .addpKa(AminoAcid.LYSINE,          10f,  Charge.POSITIVE)
         .addpKa(AminoAcid.ARGININE,        12f,  Charge.POSITIVE)
         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.55f, Charge.NEUTRAL)
         .setCTerminalSidechainpKa(AminoAcid.ASPARTIC_ACID, 4.55f, Charge.NEUTRAL)
         .setCTerminalSidechainpKa(AminoAcid.GLUTAMIC_ACID, 4.75f, Charge.NEUTRAL)
         .lock();

      EXPASY.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.5f, Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.ALANINE,       7.59f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.METHIONINE,    7.00f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.SERINE,        6.93f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.PROLINE,       8.36f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.THREONIE,      6.82f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.VALINE,        7.44f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.GLUTAMIC_ACID, 7.70f,  Charge.POSITIVE)
         .addpKa(AminoAcid.ASPARTIC_ACID, 4.05f,  Charge.NEUTRAL)
         .addpKa(AminoAcid.GLUTAMIC_ACID, 4.45f,  Charge.NEUTRAL)
         .addpKa(AminoAcid.CYSTEINE,         9f,  Charge.NEUTRAL)
         .addpKa(AminoAcid.TYROSINE,        10f,  Charge.NEUTRAL)
         .addpKa(AminoAcid.HISTIDINE,     5.98f,  Charge.POSITIVE)
         .addpKa(AminoAcid.LYSINE,          10f,  Charge.POSITIVE)
         .addpKa(AminoAcid.ARGININE,        12f,  Charge.POSITIVE)
         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.55f, Charge.NEUTRAL)
          // Note: mistakenly treats C-terminal Asp and Glu sidechain values as C -terminal values.
         .setCTerminalpKa(AminoAcid.ASPARTIC_ACID, 4.55f, Charge.NEUTRAL)
         .setCTerminalpKa(AminoAcid.GLUTAMIC_ACID, 4.75f, Charge.NEUTRAL)
         .lock();

      EMBOSS.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 8.6f, Charge.POSITIVE)
            .addpKa(AminoAcid.ASPARTIC_ACID, 3.9f, Charge.NEUTRAL)
            .addpKa(AminoAcid.GLUTAMIC_ACID, 4.1f, Charge.NEUTRAL)
            .addpKa(AminoAcid.CYSTEINE,      8.5f, Charge.NEUTRAL)
            .addpKa(AminoAcid.TYROSINE,     10.1f, Charge.NEUTRAL)
            .addpKa(AminoAcid.HISTIDINE,     6.5f, Charge.POSITIVE)
            .addpKa(AminoAcid.LYSINE,       10.8f, Charge.POSITIVE)
            .addpKa(AminoAcid.ARGININE,     12.5f, Charge.POSITIVE)
            .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.6f, Charge.NEUTRAL)
            .lock();

      SILLERO.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 8.2f, Charge.POSITIVE)
         .addpKa(AminoAcid.ASPARTIC_ACID, 4.0f, Charge.NEUTRAL)
         .addpKa(AminoAcid.GLUTAMIC_ACID, 4.5f, Charge.NEUTRAL)
         .addpKa(AminoAcid.CYSTEINE,      9.0f, Charge.NEUTRAL)
         .addpKa(AminoAcid.TYROSINE,     10.0f, Charge.NEUTRAL)
         .addpKa(AminoAcid.HISTIDINE,     6.4f, Charge.POSITIVE)
         .addpKa(AminoAcid.LYSINE,       10.4f, Charge.POSITIVE)
         .addpKa(AminoAcid.ARGININE,     12.0f, Charge.POSITIVE)
         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.2f, Charge.NEUTRAL)
         .lock();

      SILLERO_ABRIDGED.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 11.2f, Charge.POSITIVE)
         .addpKa(AminoAcid.ASPARTIC_ACID, 4.2f, Charge.NEUTRAL)
         .addpKa(AminoAcid.GLUTAMIC_ACID, 4.2f, Charge.NEUTRAL)
         .addpKa(AminoAcid.CYSTEINE,      9.5f, Charge.NEUTRAL)
         .addpKa(AminoAcid.TYROSINE,      9.5f, Charge.NEUTRAL)
         .addpKa(AminoAcid.HISTIDINE,     6.4f, Charge.POSITIVE)
         .addpKa(AminoAcid.LYSINE,       11.2f, Charge.POSITIVE)
         .addpKa(AminoAcid.ARGININE,     11.2f, Charge.POSITIVE)
         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 4.2f, Charge.NEUTRAL)
         .lock();

      PATRICKIOS_SIMPLE.setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 11.2f, Charge.POSITIVE)
         .addpKa(AminoAcid.ASPARTIC_ACID, 4.2f, Charge.NEUTRAL)
         .addpKa(AminoAcid.GLUTAMIC_ACID, 4.2f, Charge.NEUTRAL)
         .addpKa(AminoAcid.LYSINE,       11.2f, Charge.POSITIVE)
         .addpKa(AminoAcid.ARGININE,     11.2f, Charge.POSITIVE)
         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 4.2f, Charge.NEUTRAL)
         .lock();

      STRYER_1995.setDefaultProteinAnalysisMode(ProteinAnalysisMode.NATIVE)
         .setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 8.0f, Charge.POSITIVE)
         .addpKa(AminoAcid.ASPARTIC_ACID, 4.4f, Charge.NEUTRAL)
         .addpKa(AminoAcid.GLUTAMIC_ACID, 4.4f, Charge.NEUTRAL)
         .addpKa(AminoAcid.CYSTEINE,      8.5f, Charge.NEUTRAL)
         .addpKa(AminoAcid.TYROSINE,     10.0f, Charge.NEUTRAL)
         .addpKa(AminoAcid.HISTIDINE,     6.5f, Charge.POSITIVE)
         .addpKa(AminoAcid.LYSINE,       10.0f, Charge.POSITIVE)
         .addpKa(AminoAcid.ARGININE,     12.0f, Charge.POSITIVE)
         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.1f, Charge.NEUTRAL)
         .lock();

      GRIMSLEY.setDefaultProteinAnalysisMode(ProteinAnalysisMode.NATIVE)
         .setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.7f, Charge.POSITIVE)
         .addpKa(AminoAcid.ASPARTIC_ACID, 3.5f, Charge.NEUTRAL)
         .addpKa(AminoAcid.GLUTAMIC_ACID, 4.2f, Charge.NEUTRAL)
         .addpKa(AminoAcid.CYSTEINE,      6.8f, Charge.NEUTRAL)
         .addpKa(AminoAcid.TYROSINE,     10.3f, Charge.NEUTRAL)
         .addpKa(AminoAcid.HISTIDINE,     6.6f, Charge.POSITIVE)
         .addpKa(AminoAcid.LYSINE,       10.4f, Charge.POSITIVE)
         .addpKa(AminoAcid.ARGININE,     12f,   Charge.POSITIVE)
         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.3f, Charge.NEUTRAL)
         .lock();

      TAYLOR_NATIVE.setDefaultProteinAnalysisMode(ProteinAnalysisMode.NATIVE)
         .setNTerminalpKa(NTerminalGroup.UNMODIFIED_N_TERMINUS, 7.5f, Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.ALANINE,       7.59f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.METHIONINE,    7.00f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.SERINE,        6.93f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.PROLINE,       8.36f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.THREONIE,      6.82f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.VALINE,        7.44f,  Charge.POSITIVE)
         .setNTerminalpKa(AminoAcid.GLUTAMIC_ACID, 7.70f,  Charge.POSITIVE)
         .addpKa(AminoAcid.ASPARTIC_ACID,    4.5f,   Charge.NEUTRAL)
         .addpKa(AminoAcid.GLUTAMIC_ACID,    4.65f,  Charge.NEUTRAL)
         .addpKa(AminoAcid.CYSTEINE,         9f,     Charge.NEUTRAL)
         .addpKa(AminoAcid.TYROSINE,        10.1f,   Charge.NEUTRAL)
         .addpKa(AminoAcid.HISTIDINE,        6.6f,   Charge.POSITIVE)
         .addpKa(AminoAcid.LYSINE,          10.78f,  Charge.POSITIVE)
         .addpKa(AminoAcid.ARGININE,        12.25f,  Charge.POSITIVE)
         .setCTerminalpKa(CTerminalGroup.UNMODIFIED_C_TERMINUS, 3.55f, Charge.NEUTRAL)
         .setCTerminalpKa(AminoAcid.ASPARTIC_ACID, 4.55f, Charge.NEUTRAL)
         .setCTerminalpKa(AminoAcid.GLUTAMIC_ACID, 4.75f, Charge.NEUTRAL)
         .lock();

   }

   //###########################################################################
   // CONSTRUCTORS
   //###########################################################################

   //--------------------------------------------------------------------------
   public KaSet(String inName)
   {
      mName = inName;
      sUniqueMap.put(mName, this);
   }

   //--------------------------------------------------------------------------
   public KaSet(XMLNode inXML)
   {
      inXML.verifyTagName(HfgBioXML.KA_SET_TAG);
      mName = inXML.getAttributeValue(HfgBioXML.NAME_ATT);

      XMLTag defaultAnalysisModeTag = inXML.getOptionalSubtagByName(HfgBioXML.DEFAULT_ANALYSIS_MODE_ATT);
      if (defaultAnalysisModeTag != null)
      {
         XMLTag analysisModeTag = defaultAnalysisModeTag.getRequiredSubtagByName(HfgBioXML.PROTEIN_ANALYSIS_MODE_TAG);

         setDefaultProteinAnalysisMode(ProteinAnalysisMode.instantiate(analysisModeTag));
      }

      List pKaTags = inXML.getSubtagsByName(HfgBioXML.PKA_TAG);
      if (CollectionUtil.hasValues(pKaTags))
      {
         for (XMLTag pKaTag : pKaTags)
         {
            Molecule molecule = null;
            if (pKaTag.hasAttribute(HfgBioXML.AA_ATT))
            {
               molecule = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT));
            }
            else if (pKaTag.hasAttribute(HfgBioXML.MONOSACCHARIDE_ATT))
            {
               molecule = Monosaccharide.valueOf(pKaTag.getAttributeValue(HfgBioXML.MONOSACCHARIDE_ATT));
            }

            List ionizableGroupTags = pKaTag.getSubtagsByName(IonizableGroup.IONIZABLE_GROUP_TAG);
            if (CollectionUtil.hasValues(ionizableGroupTags))
            {
               for (XMLTag ionizableGroupTag : (List) (Object) pKaTag.getSubtags())
               {
                  addpKa(molecule, new IonizableGroup(ionizableGroupTag));
               }
            }
            else
            {
               // Old style
               addpKa((AminoAcid)molecule, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
            }
         }
      }

      List nTerm_pKaTags = inXML.getSubtagsByName(HfgBioXML.N_TERM_PKA_TAG);
      if (CollectionUtil.hasValues(nTerm_pKaTags))
      {
         for (XMLTag pKaTag : nTerm_pKaTags)
         {
            if (pKaTag.hasAttribute(HfgBioXML.AA_ATT))
            {
               AminoAcid aa = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT));
               setNTerminalpKa(aa, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
            }
            else
            {
               NTerminalGroup nTerminalGroup = NTerminalGroup.valueOf(pKaTag.getAttributeValue(HfgBioXML.N_TERM_ATT));
               setNTerminalpKa(nTerminalGroup, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
            }
         }
      }

      List cTerm_pKaTags = inXML.getSubtagsByName(HfgBioXML.C_TERM_PKA_TAG);
      if (CollectionUtil.hasValues(cTerm_pKaTags))
      {
         for (XMLTag pKaTag : cTerm_pKaTags)
         {
            if (pKaTag.hasAttribute(HfgBioXML.AA_ATT))
            {
               AminoAcid aa = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT));
               setCTerminalpKa(aa, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
            }
            else
            {
               CTerminalGroup cTerminalGroup = CTerminalGroup.valueOf(pKaTag.getAttributeValue(HfgBioXML.C_TERM_ATT));
               setCTerminalpKa(cTerminalGroup, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));
            }
         }
      }


      List cTermAA_pKaTags = inXML.getSubtagsByName(HfgBioXML.C_TERM_AA_PKA_TAG);
      if (CollectionUtil.hasValues(cTermAA_pKaTags))
      {
         for (XMLTag pKaTag : cTermAA_pKaTags)
         {
            AminoAcid aa = AminoAcid.valueOf(pKaTag.getAttributeValue(HfgBioXML.AA_ATT));
            setCTerminalSidechainpKa(aa, Float.parseFloat(pKaTag.getAttributeValue(HfgBioXML.VALUE_ATT)), Charge.valueOf(pKaTag.getAttributeValue(HfgBioXML.PROTONATED_FORM_ATT)));

         }
      }


      if (BooleanUtil.valueOf(inXML.getAttributeValue(HfgBioXML.LOCKED_ATT)))
      {
         lock();
      }
   }


   //###########################################################################
   // PUBLIC METHODS
   //###########################################################################

   //--------------------------------------------------------------------------
   public static Collection values()
   {
      return Collections.unmodifiableCollection(sUniqueMap.values());
   }

   //--------------------------------------------------------------------------
   public static KaSet valueOf(String inName)
   {
      return sUniqueMap.get(inName);
   }

   //--------------------------------------------------------------------------
   public String name()
   {
      return mName;
   }

   //--------------------------------------------------------------------------
   public KaSet setName(String inValue)
   {
      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");
      mName = inValue;
      return this;
   }

   //--------------------------------------------------------------------------
   public boolean isLocked()
   {
      return mLocked;
   }

   //--------------------------------------------------------------------------
   public KaSet lock()
   {
      mLocked = true;
      return this;
   }

   //--------------------------------------------------------------------------
   public KaSet setDefaultProteinAnalysisMode(ProteinAnalysisMode inValue)
   {
      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");

      mDefaultProteinAnalysisMode = inValue;
      return this;
   }

   //--------------------------------------------------------------------------
   public ProteinAnalysisMode getDefaultProteinAnalysisMode()
   {
      return mDefaultProteinAnalysisMode;
   }


   //--------------------------------------------------------------------------
   public XMLNode toXMLNode()
   {
      XMLNode node = new XMLTag(HfgBioXML.KA_SET_TAG);

      if (StringUtil.isSet(name())) node.setAttribute(HfgBioXML.NAME_ATT, name());

      if (isLocked()) node.setAttribute(HfgBioXML.LOCKED_ATT, "true");

      if (getDefaultProteinAnalysisMode() != null)
      {
         XMLTag subtag = new XMLTag(HfgBioXML.DEFAULT_ANALYSIS_MODE_ATT);
         node.addSubtag(subtag);
         subtag.addSubtag(getDefaultProteinAnalysisMode().toXMLTag());
      }

      if (CollectionUtil.hasValues(mKaMap))
      {
         List sortedKeys = new ArrayList<>(mKaMap.keySet());
         Collections.sort(sortedKeys);
         for (Molecule molecule : sortedKeys)
         {
            List ionizableGroups = mKaMap.get(molecule);
            XMLTag pKaTag = new XMLTag(HfgBioXML.PKA_TAG);
            if (molecule instanceof AminoAcid)
            {
               pKaTag.setAttribute(HfgBioXML.AA_ATT, ((AminoAcid) molecule).getThreeLetterCode());
            }
            else if (molecule instanceof Monosaccharide)
            {
               pKaTag.setAttribute(HfgBioXML.MONOSACCHARIDE_ATT, molecule.name());
            }

            for (IonizableGroup ionizableGroup : ionizableGroups)
            {
               pKaTag.addSubtag(ionizableGroup.toXMLNode());
            }

            node.addSubtag(pKaTag);
         }
      }

      if (CollectionUtil.hasValues(mNTerminalKaMap))
      {
         List sortedKeys = new ArrayList<>(mNTerminalKaMap.keySet());
         Collections.sort(sortedKeys);
         for (Molecule nTerminalGroup : sortedKeys)
         {
            IonizableGroup ionizableGroup = mNTerminalKaMap.get(nTerminalGroup);

            XMLTag nTerm_pKaTag = new XMLTag(HfgBioXML.N_TERM_PKA_TAG);

            if (nTerminalGroup instanceof AminoAcid)
            {
               nTerm_pKaTag.setAttribute(HfgBioXML.AA_ATT, ((AminoAcid)nTerminalGroup).getThreeLetterCode());
            }
            else
            {
               nTerm_pKaTag.setAttribute(HfgBioXML.N_TERM_ATT, nTerminalGroup.name());
            }

            nTerm_pKaTag.setAttribute(HfgBioXML.VALUE_ATT, ionizableGroup.getpKa());
            nTerm_pKaTag.setAttribute(HfgBioXML.PROTONATED_FORM_ATT, ionizableGroup.getProtonatedForm());
            node.addSubtag(nTerm_pKaTag);
         }
      }

      if (CollectionUtil.hasValues(mCTerminalKaMap))
      {
         List sortedKeys = new ArrayList<>(mCTerminalKaMap.keySet());
         Collections.sort(sortedKeys);
         for (Molecule cTerminalGroup : sortedKeys)
         {
            IonizableGroup ionizableGroup = mCTerminalKaMap.get(cTerminalGroup);

            XMLTag cTerm_pKaTag = new XMLTag(HfgBioXML.C_TERM_PKA_TAG);
            if (cTerminalGroup instanceof AminoAcid)
            {
               cTerm_pKaTag.setAttribute(HfgBioXML.AA_ATT, ((AminoAcid)cTerminalGroup).getThreeLetterCode());
            }
            else
            {
               cTerm_pKaTag.setAttribute(HfgBioXML.C_TERM_ATT, cTerminalGroup.name());
            }

            cTerm_pKaTag.setAttribute(HfgBioXML.VALUE_ATT, ionizableGroup.getpKa());
            cTerm_pKaTag.setAttribute(HfgBioXML.PROTONATED_FORM_ATT, ionizableGroup.getProtonatedForm());
            node.addSubtag(cTerm_pKaTag);
         }
      }

      if (CollectionUtil.hasValues(mCTerminalSidechainKaMap))
      {
         List sortedKeys = new ArrayList<>(mCTerminalSidechainKaMap.keySet());
         Collections.sort(sortedKeys);
         for (AminoAcid cTerminalAA : sortedKeys)
         {
            IonizableGroup ionizableGroup = mCTerminalSidechainKaMap.get(cTerminalAA);

            XMLTag cTermAA_pKaTag = new XMLTag(HfgBioXML.C_TERM_AA_PKA_TAG);
            cTermAA_pKaTag.setAttribute(HfgBioXML.AA_ATT, cTerminalAA.getThreeLetterCode());
            cTermAA_pKaTag.setAttribute(HfgBioXML.VALUE_ATT,  ionizableGroup.getpKa());
            cTermAA_pKaTag.setAttribute(HfgBioXML.PROTONATED_FORM_ATT, ionizableGroup.getProtonatedForm());
            node.addSubtag(cTermAA_pKaTag);
         }
      }

      return node;
   }

   //--------------------------------------------------------------------------
   @Override
   public String toString()
   {
      return name();
   }

   //--------------------------------------------------------------------------
   @Override
   public boolean equals(Object inObj2)
   {
      boolean result = false;

      if (inObj2 != null
            && inObj2 instanceof KaSet)
      {
         result = (0 == compareTo((KaSet) inObj2));
      }

      return result;
   }

   //--------------------------------------------------------------------------
   @Override
   public int compareTo(KaSet inObj2)
   {
      int result = -1;

      if (inObj2 != null)
      {
         result = 0;

         if (this != inObj2)
         {
            result = CompareUtil.compare(mKaMap.size(), inObj2.mKaMap.size());

            if (0 == result)
            {
               result = CompareUtil.compare(mNTerminalKaMap.size(), inObj2.mNTerminalKaMap.size());
            }

            if (0 == result)
            {
               result = CompareUtil.compare(mCTerminalKaMap.size(), inObj2.mCTerminalKaMap.size());
            }

            if (0 == result)
            {
               result = CompareUtil.compare(mCTerminalSidechainKaMap.size(), inObj2.mCTerminalSidechainKaMap.size());
            }

            if (0 == result)
            {
               for (Molecule molecule : mKaMap.keySet())
               {
                  result = CompareUtil.compare(mKaMap.get(molecule), inObj2.mKaMap.get(molecule));
                  if (result != 0)
                  {
                     break;
                  }
               }
            }

            if (0 == result)
            {
               for (Molecule molecule : mNTerminalKaMap.keySet())
               {
                  result = CompareUtil.compare(mNTerminalKaMap.get(molecule), inObj2.mNTerminalKaMap.get(molecule));
                  if (result != 0)
                  {
                     break;
                  }
               }
            }

            if (0 == result)
            {
               for (Molecule molecule : mCTerminalKaMap.keySet())
               {
                  result = CompareUtil.compare(mCTerminalKaMap.get(molecule), inObj2.mCTerminalKaMap.get(molecule));
                  if (result != 0)
                  {
                     break;
                  }
               }
            }

            if (0 == result)
            {
               for (AminoAcid aa : mCTerminalSidechainKaMap.keySet())
               {
                  result = CompareUtil.compare(mCTerminalSidechainKaMap.get(aa), inObj2.mCTerminalSidechainKaMap.get(aa));
                  if (result != 0)
                  {
                     break;
                  }
               }
            }
         }
      }

      return result;
   }

   //--------------------------------------------------------------------------
   @Override
   public KaSet clone()
   {
      KaSet newObj;
      try
      {
         newObj = (KaSet) super.clone();
      }
      catch (CloneNotSupportedException e)
      {
         throw new RuntimeException(e);
      }

      if (mNTerminalKaMap != null)
      {
         newObj.mNTerminalKaMap = new HashMap<>(mNTerminalKaMap);
      }

      if (mKaMap != null)
      {
         newObj.mKaMap = new HashMap<>(mKaMap);
      }

      if (mCTerminalKaMap != null)
      {
         newObj.mCTerminalKaMap = new HashMap<>(mCTerminalKaMap);
      }

      if (mCTerminalSidechainKaMap != null)
      {
         newObj.mCTerminalSidechainKaMap = new HashMap<>(mCTerminalSidechainKaMap);
      }

      newObj.mLocked = false;

      return newObj;
   }

   //--------------------------------------------------------------------------
   /**
    * Adds a pKa value for the specified amino acid to the set.
    * Ka = 10^(-pKa)
    * @param inResidue the amino acid that the pKa value should apply to
    * @param inpKa the pKa value to associate with the specified amino acid
    * @param inProtonatedForm the charge of the group's protonated form
    * @return this KaSet to enable method chaining
    */
   public KaSet addpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm)
   {
      return addpKa(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));
   }

   //--------------------------------------------------------------------------
   /**
    * Adds a pKa value for the specified molecule to the set. This form of addpKa()
    * can be used to to assign pKa values to monosaccharide groups in a glycan.
    * Ka = 10^(-pKa)
    * @param inMolecule the amino acid that the pKa value should apply to
    * @param inIonizableGroup the ionizable group to associate with the given molecule
    * @return this KaSet to enable method chaining
    */
   public KaSet addpKa(Molecule inMolecule, IonizableGroup inIonizableGroup)
   {
      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");

      if (inIonizableGroup.getpKa() < 0 || inIonizableGroup.getpKa() > 14)
      {
         throw new InvalidValueException(StringUtil.singleQuote(inIonizableGroup.getpKa() + "") + " is not a valid value. The pKa must be between 0 and 14!");
      }

      List ionizableGroups = mKaMap.get(inMolecule);
      if (null == ionizableGroups)
      {
         ionizableGroups = new ArrayList<>(3);
         mKaMap.put(inMolecule, ionizableGroups);
      }

      ionizableGroups.add(inIonizableGroup);

      return this;
   }

   //--------------------------------------------------------------------------
   public List getIonizableGroups(Molecule inResidue)
   {
      return mKaMap.get(inResidue);
   }


   //--------------------------------------------------------------------------
   public IonizableGroup getNTerminalKa(NTerminalGroup inNTerminalGroup, AminoAcid inNTerminalResidue)
   {
      IonizableGroup value = null;

      if (inNTerminalGroup.equals(NTerminalGroup.UNMODIFIED_N_TERMINUS))
      {
         value =  mNTerminalKaMap.get(inNTerminalResidue);
      }

      if (null == value)
      {
         value =  mNTerminalKaMap.get(inNTerminalGroup);
      }

      return value;
   }

   //--------------------------------------------------------------------------
   public KaSet setNTerminalpKa(NTerminalGroup inNTerminalGroup, float inpKa, Charge inProtonatedForm)
   {
      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");

      if (inpKa < 0 || inpKa > 14)
      {
         throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!");
      }

      mNTerminalKaMap.put(inNTerminalGroup, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));

      return this;
   }

   //--------------------------------------------------------------------------
   public KaSet setNTerminalpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm)
   {
      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");

      if (inpKa < 0 || inpKa > 14)
      {
         throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!");
      }

      mNTerminalKaMap.put(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));

      return this;
   }




   //--------------------------------------------------------------------------
   public IonizableGroup getCTerminalKa(CTerminalGroup inCTerminalGroup, AminoAcid inCTerminalResidue)
   {
      IonizableGroup value = null;

      if (inCTerminalGroup.equals(CTerminalGroup.UNMODIFIED_C_TERMINUS))
      {
         value =  mCTerminalKaMap.get(inCTerminalResidue);
      }

      if (null == value)
      {
         value =  mCTerminalKaMap.get(inCTerminalGroup);
      }

      return value;
   }

   //--------------------------------------------------------------------------
   public KaSet setCTerminalpKa(CTerminalGroup inCTerminalGroup, float inpKa, Charge inProtonatedForm)
   {
      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");

      if (inpKa < 0 || inpKa > 14)
      {
         throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!");
      }

      mCTerminalKaMap.put(inCTerminalGroup, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));

      return this;
   }

   //--------------------------------------------------------------------------
   public KaSet setCTerminalpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm)
   {
      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");

      if (inpKa < 0 || inpKa > 14)
      {
         throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!");
      }

      mCTerminalKaMap.put(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));

      return this;
   }

   //--------------------------------------------------------------------------
   public KaSet setCTerminalSidechainpKa(AminoAcid inResidue, float inpKa, Charge inProtonatedForm)
   {
      if (mLocked) throw new UnmodifyableObjectException(name() + " is locked and cannot be modified!");

      if (inpKa < 0 || inpKa > 14)
      {
         throw new InvalidValueException(StringUtil.singleQuote(inpKa + "") + " is not a valid value. The pKa must be between 0 and 14!");
      }

      mCTerminalSidechainKaMap.put(inResidue, new IonizableGroup(new Float(Math.pow(10, -inpKa)), inProtonatedForm));

      return this;
   }

   //--------------------------------------------------------------------------
   public IonizableGroup getCTerminalSidechainKa(AminoAcid inResidue)
   {
      return mCTerminalSidechainKaMap.get(inResidue);
   }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy