com.hfg.bio.proteinproperty.IsoelectricPoint Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.proteinproperty;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import com.hfg.bio.*;
import com.hfg.bio.glyco.Glycan;
import com.hfg.bio.seq.AminoAcidComposition;
import com.hfg.bio.seq.Protein;
import com.hfg.chem.IonizableGroup;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.collection.OrderedMap;
//------------------------------------------------------------------------------
/**
Isoelectric point calculation packaged as a protein property for ease of integration
with other protein properties.
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class IsoelectricPoint extends SimpleProteinProperty
{
private KaSet mKaSet;
private static final Map> sUniqueMap = new OrderedMap<>(10);
public static final IsoelectricPoint BJELLQVIST = new IsoelectricPoint<>(KaSet.BJELLQVIST,
"Bjellqvist B, Hughes GJ, Pasquali C, et al. (1993). \"The focusing positions of polypeptides in immobilized pH gradients can be predicted from their amino acid sequences\". Electrophoresis. 14(10):1023-31.");
public static final IsoelectricPoint EXPASY = new IsoelectricPoint<>(KaSet.EXPASY);
public static final IsoelectricPoint EMBOSS = new IsoelectricPoint<>(KaSet.EMBOSS);
public static final IsoelectricPoint SILLERO = new IsoelectricPoint<>(KaSet.SILLERO,
"Sillero A, Ribeiro JM (1989). \"Isoelectric points of proteins: theoretical determination\". Analytical biochemistry. 179(2):319-325.");
public static final IsoelectricPoint SILLERO_ABRIDGED = new IsoelectricPoint<>(KaSet.SILLERO_ABRIDGED,
"Sillero A, Ribeiro JM (1989). \"Isoelectric points of proteins: theoretical determination\". Analytical biochemistry. 179(2):319-325.");
public static final IsoelectricPoint PATRICKIOS_SIMPLE = new IsoelectricPoint<>(KaSet.PATRICKIOS_SIMPLE,
"Patrickios CS, Yamasaki EN (1995). \"Polypeptide amino acid composition and isoelectric point. II. Comparison between experiment and theory\". Analytical biochemistry. 231(1):82-91.");
public static final IsoelectricPoint STRYER_1995 = new IsoelectricPoint<>(KaSet.STRYER_1995,
"Stryer L (1995) \"Biochemistry\"");
public static final IsoelectricPoint GRIMSLEY = new IsoelectricPoint<>(KaSet.GRIMSLEY,
"Grimsley GR, Scholtz JM, Pace CN (2009). \"A summary of the measured pK values of the ionizable groups in folded proteins\". Protein Science. 18(1), 247-251.");
public static final IsoelectricPoint TAYLOR_NATIVE = new IsoelectricPoint<>(KaSet.TAYLOR_NATIVE);
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//--------------------------------------------------------------------------
public IsoelectricPoint(String inName, KaSet inKaSet)
{
super(inName);
mKaSet = inKaSet;
sUniqueMap.put(inName, (IsoelectricPoint) this);
}
//--------------------------------------------------------------------------
public IsoelectricPoint(KaSet inKaSet)
{
this(inKaSet.name(), inKaSet);
}
//--------------------------------------------------------------------------
public IsoelectricPoint(KaSet inKaSet, String inReference)
{
this(inKaSet.name(), inKaSet);
setReference(inReference);
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
public static IsoelectricPoint valueOf(KaSet inKaSet)
{
IsoelectricPoint value = null;
for (IsoelectricPoint isoelectricPoint : sUniqueMap.values())
{
if (isoelectricPoint.getKaSet().equals(inKaSet))
{
value = isoelectricPoint;
break;
}
}
if (null == value)
{
value = new IsoelectricPoint<>(inKaSet);
}
return value;
}
//---------------------------------------------------------------------------
public static Collection> values()
{
return sUniqueMap != null ? sUniqueMap.values() : null;
}
//--------------------------------------------------------------------------
public String getType()
{
return "pI";
}
//--------------------------------------------------------------------------
/**
Determines the isoelectric point (the pH at which the net charge is zero) for the protein.
@param inProtein the protein on which the calculation should be performed
@param inSettings settings for the calculation
@return the calculated isoelectric point
*/
public Float calculate(Protein inProtein, S inSettings)
{
double incrementSize = 1.0; // The size of the pH increment to be taken. Starts @ 1 then decreases at ea. sign change.
double pH = 7.0;
Float isoelectricPoint = null;
Map ionizableGroupMap = constructIonizableGroupMap(inProtein, inSettings);
if (CollectionUtil.hasValues(ionizableGroupMap))
{
double netCharge = getNetCharge(pH, ionizableGroupMap);
double lastNetCharge = netCharge;
pH -= incrementSize;
while (true)
{
netCharge = getNetCharge(pH, ionizableGroupMap);
if (Math.abs(netCharge) < 0.0001
&& incrementSize < 0.1)
{
break;
}
else if (netCharge > 0 && lastNetCharge > 0)
{
pH += incrementSize;
}
else if (netCharge < 0 && lastNetCharge < 0)
{
pH -= incrementSize;
}
else // The net charge must have changed signs
{
incrementSize = 0.1 * incrementSize;
if (netCharge > 0) pH += incrementSize;
else pH -= incrementSize;
}
lastNetCharge = netCharge;
}
isoelectricPoint = (float) (Math.round(pH * 100) / 100.0);
}
return isoelectricPoint;
}
//--------------------------------------------------------------------------
/**
Estimates the protein's net charge at the specified pH.
@param pH the pH at which the calculation should be performed
@param inProtein the protein on which the calculation should be performed
@param inSettings settings for the calculation
@return the calculated net charge
*/
public double getNetCharge(double pH, Protein inProtein , S inSettings)
{
return getNetCharge(pH, constructIonizableGroupMap(inProtein, inSettings));
}
//--------------------------------------------------------------------------
protected void setKaSet(KaSet inValue)
{
mKaSet = inValue;
}
//--------------------------------------------------------------------------
protected KaSet getKaSet()
{
return mKaSet;
}
//--------------------------------------------------------------------------
protected Map constructIonizableGroupMap(Protein inProtein, S inSettings)
{
Map ionizableGroupMap = new HashMap<>(25);
boolean nativeMode = (inSettings.getProteinAnalysisMode() instanceof NativeAnalysisMode);
if (CollectionUtil.hasValues(inProtein.getChains()))
{
for (Protein chain : inProtein.getChains())
{ // TODO: Force reducing?
Map chainMap = constructIonizableGroupMap(chain, inSettings);
for (IonizableGroup group : chainMap.keySet())
{
Integer oldValue = ionizableGroupMap.get(group);
int newValue = (oldValue != null ? oldValue : 0) + chainMap.get(group);
ionizableGroupMap.put(group, newValue);
}
}
if (nativeMode)
{
// Exclude disulfide-linked cysteines
List cysGroups = mKaSet.getIonizableGroups(AminoAcid.CYSTEINE);
if (cysGroups != null)
{
ionizableGroupMap.put(cysGroups.get(0), inProtein.getTotalNumFreeCysteines());
}
}
}
else if (inProtein.length() > 0)
{
AminoAcid cTerminalResidue = inProtein.aminoAcidAt(inProtein.length());
AminoAcidComposition aaComposition = inProtein.getAminoAcidComposition();
for (AminoAcid aa : aaComposition.keySet())
{
Integer aaCount = aaComposition.get(aa);
if (aaCount != null && aaCount > 0)
{
if (aa == cTerminalResidue
&& mKaSet.getCTerminalSidechainKa(cTerminalResidue) != null
&& inProtein.getAminoAcidSet().getCTerminalGroup().equals(CTerminalGroup.UNMODIFIED_C_TERMINUS))
{
IonizableGroup group = mKaSet.getCTerminalSidechainKa(cTerminalResidue);
if (group != null)
{
ionizableGroupMap.put(group, 1);
aaCount--;
}
}
Collection groups = mKaSet.getIonizableGroups(aa);
if (null == groups
&& aa.getSidechainKas() != null)
{
// The KaSet didn't provide values for this amino acid. Include default values.
groups = aa.getSidechainKas();
}
if (groups != null)
{
if (aa.equals(AminoAcid.CYSTEINE))
{
if (nativeMode)
{
// Exclude disulfide-linked cysteines
aaCount = inProtein.getTotalNumFreeCysteines();
}
else
{
ReducedAnalysisMode analysisMode = (ReducedAnalysisMode) inSettings.getProteinAnalysisMode();
if (analysisMode.getAlkylatedCysteine() != null)
{
List alkCysGroups = mKaSet.getIonizableGroups(analysisMode.getAlkylatedCysteine());
if (CollectionUtil.hasValues(alkCysGroups))
{
for (IonizableGroup group : alkCysGroups)
{
ionizableGroupMap.put(group, aaCount);
}
}
aaCount = 0;
}
}
}
for (IonizableGroup group : groups)
{
Integer existingCount = ionizableGroupMap.get(group);
if (null == existingCount)
{
existingCount = 0;
}
ionizableGroupMap.put(group, aaCount + existingCount);
}
}
}
}
IonizableGroup group = mKaSet.getNTerminalKa(inProtein.getAminoAcidSet().getNTerminalGroup(), inProtein.aminoAcidAt(1));
if (group != null)
{
Integer existingCount = ionizableGroupMap.get(group);
if (null == existingCount)
{
existingCount = 0;
}
ionizableGroupMap.put(group, 1 + existingCount);
}
group = mKaSet.getCTerminalKa(inProtein.getAminoAcidSet().getCTerminalGroup(), inProtein.aminoAcidAt(inProtein.length()));
if (group != null)
{
Integer existingCount = ionizableGroupMap.get(group);
if (null == existingCount)
{
existingCount = 0;
}
ionizableGroupMap.put(group, 1 + existingCount);
}
if (CollectionUtil.hasValues(inProtein.getGlycans()))
{
for (Glycan glycan : inProtein.getGlycans())
{
List ionizableGroups = mKaSet.getIonizableGroups(glycan);
if (null == ionizableGroups)
{
// Default Ka values?
ionizableGroups = glycan.getKas();
}
if (CollectionUtil.hasValues(ionizableGroups))
{
for (IonizableGroup ionizableGroup : ionizableGroups)
{
Integer existingCount = ionizableGroupMap.get(ionizableGroup);
if (null == existingCount)
{
existingCount = 0;
}
ionizableGroupMap.put(ionizableGroup, 1 + existingCount);
}
}
}
}
}
return ionizableGroupMap;
}
//--------------------------------------------------------------------------
/**
Estimates the protein's net charge at the specified pH.
*/
private double getNetCharge(double pH, Map inIonizableGroupMap)
{
double netCharge = 0;
double concOfHIions = Math.pow(10, -pH);
if (inIonizableGroupMap != null)
{
for (IonizableGroup group : inIonizableGroupMap.keySet())
{
netCharge += group.getCharge(inIonizableGroupMap.get(group), concOfHIions);
}
}
return netCharge;
}
}