com.hfg.bio.Protease Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio;
import java.util.*;
import java.io.Reader;
import java.io.IOException;
import com.hfg.bio.seq.Protein;
import com.hfg.bio.seq.ProteinXLink;
import com.hfg.bio.seq.ProteinXLinkType;
import com.hfg.exception.UnmodifyableObjectException;
import com.hfg.util.CompareUtil;
import com.hfg.util.StringUtil;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.collection.OrderedSet;
//------------------------------------------------------------------------------
/**
* Chemical or biological proteolytic agent which can be used to theoretically
* digest a Protein.
*
* @author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
/*
What to do if...
- X-links are defined?
- Should the DigestSettings include an optional cys-alkylation form? This would
allow the user to specify whether the digest should be a 'native digest' or
whether the cysteines (including those specified as x-linked) should be considered
reduced and alkylated.
- Multiple identical chains exist?
- If the fragments aren't x-linked, concatenate the chain ids in the DigestFragment: 'H1/H2'
*/
public class Protease implements Comparable
{
//##########################################################################
// PRIVATE FIELDS
//##########################################################################
private static Set sValues = new OrderedSet<>();
private String mName;
private String mP1Specificity = "";
private String mExcludedP1PrimeResidues = "";
// For endoproteases
private String mP1PrimeSpecifity = "";
private String mExcludedP1Residues = "";
private boolean mLocked;
//##########################################################################
// PUBLIC FIELDS
//##########################################################################
public static final Protease TRYPSIN = new Protease("Trypsin");
public static final Protease LYS_C = new Protease("Lys-C");
public static final Protease CHYMOTRYPSIN = new Protease("Chymotrypsin");
public static final Protease GLU_C = new Protease("Glu-C");
public static final Protease ASP_N = new Protease("Asp-N");
public static final Protease ASP_N_DE = new Protease("Asp-N (DE)");
static
{
TRYPSIN.setP1Specificity("KR")
.setExcludedP1PrimeResidues("P")
.lock();
LYS_C.setP1Specificity("K")
.lock();
CHYMOTRYPSIN.setP1Specificity("YFWL")
.lock();
GLU_C.setP1Specificity("E")
.lock();
ASP_N.setP1PrimeSpecificity("D")
.lock();
ASP_N_DE.setP1PrimeSpecificity("DE")
.lock();
}
//##########################################################################
// CONSTRUCTORS
//##########################################################################
//--------------------------------------------------------------------------
public Protease(String inName)
{
mName = inName;
sValues.add(this);
}
//##########################################################################
// PUBLIC METHODS
//##########################################################################
//--------------------------------------------------------------------------
public static Protease[] values()
{
return sValues.toArray(new Protease[0]);
}
//--------------------------------------------------------------------------
public static Protease valueOf(String inName)
{
Protease requestedProtease = null;
if (StringUtil.isSet(inName))
{
for (Protease protease : sValues)
{
if (protease.name().equalsIgnoreCase(inName.trim()))
{
requestedProtease = protease;
break;
}
}
}
return requestedProtease;
}
//--------------------------------------------------------------------------
@Override
public int hashCode()
{
return name().hashCode();
}
//--------------------------------------------------------------------------
@Override
public boolean equals(Object inObj2)
{
return 0 == compareTo(inObj2);
}
//--------------------------------------------------------------------------
@Override
public int compareTo(Object inObj2)
{
return inObj2 instanceof Protease ? CompareUtil.compare(name(), ((Protease)inObj2).name()) : -1;
}
//--------------------------------------------------------------------------
public String name()
{
return mName;
}
//--------------------------------------------------------------------------
@Override
public String toString()
{
return name();
}
//--------------------------------------------------------------------------
public Protease setP1Specificity(String inValue)
{
checkLock();
mP1Specificity = inValue;
return this;
}
//--------------------------------------------------------------------------
public String getP1Specificity()
{
return mP1Specificity;
}
//--------------------------------------------------------------------------
public Protease setExcludedP1PrimeResidues(String inValue)
{
checkLock();
mExcludedP1PrimeResidues = inValue;
return this;
}
//--------------------------------------------------------------------------
public String getExcludedP1PrimeResidues()
{
return mExcludedP1PrimeResidues;
}
//--------------------------------------------------------------------------
public Protease setP1PrimeSpecificity(String inValue)
{
checkLock();
mP1PrimeSpecifity = inValue;
return this;
}
//--------------------------------------------------------------------------
public String getP1PrimeSpecificity()
{
return mP1PrimeSpecifity;
}
//--------------------------------------------------------------------------
public Protease setExcludedP1Residues(String inValue)
{
checkLock();
mExcludedP1Residues = inValue;
return this;
}
//--------------------------------------------------------------------------
public String getExcludedP1Residues()
{
return mExcludedP1Residues;
}
//--------------------------------------------------------------------------
public Protease lock()
{
mLocked = true;
return this;
}
//--------------------------------------------------------------------------
public List digest(Protein inProtein, DigestSettings inSettings)
{
List outFrags;
if (inSettings.getAlkylatedCys() != null)
{
Protein proteinCopy = inProtein.clone();
proteinCopy.removeXLinks(ProteinXLinkType.DISULFIDE);
// Reflect that the cysteines in the protein are alkylated.
AminoAcidSet aaSet = new AminoAcidSet(inProtein.getAminoAcidSet());
aaSet.setMapping('c', inSettings.getAlkylatedCys());
aaSet.setMapping('C', inSettings.getAlkylatedCys());
proteinCopy.setAminoAcidSet(aaSet);
// Are there other types of x-links?
if (CollectionUtil.hasValues(proteinCopy.getXLinks()))
{
outFrags = complexDigestion(proteinCopy, inSettings);
}
else
{
outFrags = simpleDigestion(proteinCopy, inSettings);
}
for (DigestFragment frag : outFrags)
{
frag.setAminoAcidSet(aaSet);
}
}
else
{
// Native digest. Deal with X-links
outFrags = complexDigestion(inProtein, inSettings);
}
return outFrags;
}
//--------------------------------------------------------------------------
public boolean isCleavageSite(char inP1Residue, char inP1PrimeResidue)
{
char p1Residue = Character.toUpperCase(inP1Residue);
char p1PrimeResidue = Character.toUpperCase(inP1PrimeResidue);
return ((mP1Specificity.indexOf(p1Residue) >= 0
&& mExcludedP1PrimeResidues.indexOf(p1PrimeResidue) == -1)
|| (mP1PrimeSpecifity.indexOf(p1PrimeResidue) >= 0
&& mExcludedP1Residues.indexOf(p1Residue) == -1));
}
//##########################################################################
// PRIVATE METHODS
//##########################################################################
//--------------------------------------------------------------------------
private void checkLock()
{
if (mLocked) throw new UnmodifyableObjectException("This object is locked and cannot be modified.");
}
//--------------------------------------------------------------------------
private List simpleDigestion(Protein inProtein, DigestSettings inSettings)
{
List outFrags = new ArrayList();
if (CollectionUtil.hasValues(inProtein.getChains()))
{
for (Protein chain : inProtein.getChains())
{
outFrags.addAll(simpleDigestion(chain, inSettings));
}
}
else
{
try
{
Reader seqReader = null;
try
{
seqReader = inProtein.getSequenceReader();
SlidingWindow fragWindow = new SlidingWindow(inProtein.getID(), inProtein.getAminoAcidSet(), inSettings);
int p1Residue = seqReader.read();
if (p1Residue != -1)
{
StringBuilder frag = new StringBuilder((char)p1Residue + "");
int p1PrimeResidue;
while ((p1PrimeResidue = seqReader.read()) != -1)
{
if (isCleavageSite((char)p1Residue, (char)p1PrimeResidue))
{
List fragments = fragWindow.push(frag.toString());
if (fragments != null)
{
outFrags.addAll(fragments);
}
frag.setLength(0);
frag.append((char)p1PrimeResidue);
}
else
{
frag.append((char)p1PrimeResidue);
}
p1Residue = p1PrimeResidue;
}
List fragments = fragWindow.lastPush(frag.toString());
if (fragments != null)
{
outFrags.addAll(fragments);
}
}
}
finally
{
if (seqReader != null) seqReader.close();
}
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
// Apply the DigestSettings limits.
for (int i = 0; i < outFrags.size(); i++)
{
if (! inSettings.meetsCriteria(outFrags.get(i)))
{
outFrags.remove(i--);
}
}
return outFrags;
}
//--------------------------------------------------------------------------
private List complexDigestion(Protein inProtein,
DigestSettings inSettings)
{
// Initially disable limits when finding fragments.
// We'll apply the desired settings after all the fragments have been linked up.
DigestSettings settingsWithoutLimits = inSettings.clone();
settingsWithoutLimits.setMinFragmentLength(null);
settingsWithoutLimits.setMaxFragmentLength(null);
settingsWithoutLimits.setMinFragmentMass(null);
settingsWithoutLimits.setMaxFragmentMass(null);
List rawFrags = simpleDigestion(inProtein, settingsWithoutLimits);
// For ea. x-link, bind it to the combinations of raw fragments
for (ProteinXLink xlink : inProtein.getXLinks())
{
List donorFrags = new ArrayList<>();
List acceptorFrags = new ArrayList<>();
Set fragsToAdd = new HashSet<>();
Set fragsToRemove = new HashSet<>();
for (DigestFragment frag : rawFrags)
{
DigestFragment donorChain = null;
if (CollectionUtil.hasValues(frag.getChains()))
{
donorChain = (DigestFragment) frag.getChain(xlink.getDonorChainId());
}
else if (xlink.getDonorChainId().equals(frag.getID()))
{
donorChain = frag;
}
boolean hasDonorSite = false;
if (donorChain != null
&& xlink.getDonorPosition() >= donorChain.getBegin()
&& xlink.getDonorPosition() <= donorChain.getEnd())
{
// Donor site is within this frag.
hasDonorSite = true;
}
DigestFragment acceptorChain = null;
if (CollectionUtil.hasValues(frag.getChains()))
{
acceptorChain = (DigestFragment) frag.getChain(xlink.getAcceptorChainId());
}
else if (xlink.getAcceptorChainId().equals(frag.getID()))
{
acceptorChain = frag;
}
boolean hasAcceptorSite = false;
if (acceptorChain != null
&& xlink.getAcceptorPosition() >= acceptorChain.getBegin()
&& xlink.getAcceptorPosition() <= acceptorChain.getEnd())
{
// Acceptor site is within this frag.
hasAcceptorSite = true;
}
if (hasDonorSite && hasAcceptorSite)
{
DigestFragment linkedFrag;
if (CollectionUtil.hasValues(frag.getChains()))
{
linkedFrag = frag;
}
else
{
linkedFrag = new DigestFragment();
linkedFrag.addChain(frag.clone());
// Add it back to the pool
fragsToAdd.add(linkedFrag);
fragsToRemove.add(frag);
}
linkedFrag.addXLink(xlink);
}
else
{
if (hasDonorSite) donorFrags.add(frag);
if (hasAcceptorSite) acceptorFrags.add(frag);
}
}
// Link the donors & acceptors in all possible combinations
for (DigestFragment donorFrag : donorFrags)
{
for (DigestFragment acceptorFrag : acceptorFrags)
{
DigestFragment linkedFrag;
if (CollectionUtil.hasValues(donorFrag.getChains()))
{
linkedFrag = (DigestFragment) donorFrag.clone();
}
else
{
linkedFrag = new DigestFragment();
linkedFrag.addChain(donorFrag.clone());
}
// Add the acceptor chain (if it isn't already present)
Protein acceptorChain = linkedFrag.getChain(xlink.getAcceptorChainId());
if (null == acceptorChain)
{
linkedFrag.addChain(acceptorFrag.clone());
}
linkedFrag.addXLink(xlink);
// Add it back to the pool
rawFrags.add(linkedFrag);
}
}
// Now remove the raw frags that were linked.
for (DigestFragment donorFrag : donorFrags)
{
rawFrags.remove(donorFrag);
}
for (DigestFragment acceptorFrag : acceptorFrags)
{
rawFrags.remove(acceptorFrag);
}
for (DigestFragment frag : fragsToRemove)
{
rawFrags.remove(frag);
}
rawFrags.addAll(fragsToAdd);
}
// Apply the DigestSettings limits.
for (int i = 0; i < rawFrags.size(); i++)
{
if (! inSettings.meetsCriteria(rawFrags.get(i)))
{
rawFrags.remove(i--);
}
}
return rawFrags;
}
//##########################################################################
// INNER CLASS
//##########################################################################
protected class SlidingWindow
{
private String mChainId;
private AminoAcidSet mAminoAcidSet;
private StringBuilder[] mFrags;
private DigestSettings mDigestSettings;
private int mIndex = 1;
private int mLength;
private int mCurrentFragIndex = 0;
//-----------------------------------------------------------------------
public SlidingWindow(String inChainId, AminoAcidSet inAASet, DigestSettings inSettings)
{
mChainId = inChainId;
mAminoAcidSet = inAASet;
mDigestSettings = inSettings;
mFrags = new StringBuilder[inSettings.getMaxMissedCleavages() + 3];
mFrags[0] = new StringBuilder();
mFrags[1] = new StringBuilder();
mLength = 2;
}
//-----------------------------------------------------------------------
public List push(String inFrag)
{
List outFrags = null;
if (mLength < mFrags.length)
{
// Still filling the window
mFrags[mLength++] = new StringBuilder(inFrag);
}
else
{
StringBuilder tmp = mFrags[0];
for (int i = 1; i < mFrags.length; i++)
{
mFrags[i - 1] = mFrags[i];
}
mFrags[mFrags.length - 1] = tmp;
mFrags[mFrags.length - 1].setLength(0);
mFrags[mFrags.length - 1].append(inFrag);
mIndex += mFrags[0].length();
mCurrentFragIndex++;
List frags = evaluateCurrentFrag();
if (frags != null)
{
outFrags = new ArrayList<>(frags);
}
}
return outFrags;
}
//-----------------------------------------------------------------------
public List lastPush(String inFrag)
{
List outFrags = null;
List frags = push(inFrag);
if (frags != null)
{
outFrags = new ArrayList<>(frags);
}
for (int i = 0; i <= mDigestSettings.getMaxMissedCleavages(); i++)
{
frags = push("");
if (frags != null)
{
if (null == outFrags)
{
outFrags = new ArrayList<>(frags.size());
}
outFrags.addAll(frags);
}
}
return outFrags;
}
//-----------------------------------------------------------------------
private List evaluateCurrentFrag()
{
List fragments = null;
int maxMissedCleavages = 0;
if (mDigestSettings != null
&& mDigestSettings.getMaxMissedCleavages() != null)
{
maxMissedCleavages = mDigestSettings.getMaxMissedCleavages();
}
if (null == mDigestSettings
|| null == mDigestSettings.getMaxFragmentLength()
|| mFrags[1].length() <= mDigestSettings.getMaxFragmentLength())
{
StringBuilder frag = new StringBuilder();
for (int i = 1; i < mFrags.length - 1; i++)
{
if (mFrags[i].length() == 0) break;
frag.append(mFrags[i]);
if (null == mDigestSettings
|| null == mDigestSettings.getMinFragmentLength()
|| frag.length() >= mDigestSettings.getMinFragmentLength())
{
// Don't get too big
if (mDigestSettings != null
&& mDigestSettings.getMaxFragmentLength() != null
&& frag.length() > mDigestSettings.getMaxFragmentLength())
{
break;
}
DigestFragment digestFrag = allocateNewDigestFragment();
digestFrag.setSequence(frag.toString());
digestFrag.setBegin(mIndex);
digestFrag.setEnd(mIndex + frag.length() - 1);
digestFrag.setNumUncleavedSites(i - 1);
digestFrag.setBeginFragIndex(mCurrentFragIndex);
digestFrag.setEndFragIndex(mCurrentFragIndex + (i - 1));
if (null == fragments)
{
fragments = new ArrayList<>(maxMissedCleavages);
}
fragments.add(digestFrag);
}
}
}
return fragments;
}
//-----------------------------------------------------------------------
private DigestFragment allocateNewDigestFragment()
{
DigestFragment frag = new DigestFragment();
frag.setID(mChainId);
frag.setAminoAcidSet(mAminoAcidSet);
return frag;
}
}
}