com.hfg.bio.seq.ProteinDigest Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.seq;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import com.hfg.bio.AminoAcidSet;
import com.hfg.bio.CTerminalGroup;
import com.hfg.bio.DigestFragment;
import com.hfg.bio.DigestSettings;
import com.hfg.bio.NTerminalGroup;
import com.hfg.bio.Protease;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.collection.OrderedSet;
//------------------------------------------------------------------------------
/**
Performs a digestion of a protein via protease(s).
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class ProteinDigest
{
private DigestSettings mSettings;
private Protein mProtein;
private Set mProteases;
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//---------------------------------------------------------------------------
public ProteinDigest(Protein inProtein, DigestSettings inSettings)
{
mProtein = inProtein;
mSettings = inSettings;
mProteases = inSettings.getProteases();
if (! CollectionUtil.hasValues(mProteases))
{
throw new RuntimeException("No protease(s) specified!");
}
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
public DigestSettings getSettings()
{
return mSettings;
}
//---------------------------------------------------------------------------
public Protein getProtein()
{
return mProtein;
}
//---------------------------------------------------------------------------
public List getFragments()
{
List outFrags;
if (getSettings().getAlkylatedCys() != null)
{
Protein proteinCopy = getProtein().clone();
proteinCopy.removeXLinks(ProteinXLinkType.DISULFIDE);
// Reflect that the cysteines in the protein are alkylated.
AminoAcidSet aaSet = new AminoAcidSet(getProtein().getAminoAcidSet());
aaSet.setMapping('c', getSettings().getAlkylatedCys());
aaSet.setMapping('C', getSettings().getAlkylatedCys());
proteinCopy.setAminoAcidSet(aaSet);
// Are there other types of x-links?
if (CollectionUtil.hasValues(proteinCopy.getXLinks()))
{
outFrags = complexDigestion(proteinCopy, getSettings());
}
else
{
outFrags = simpleDigestion(proteinCopy, getSettings());
}
}
else
{
// Native digest. Deal with X-links
outFrags = complexDigestion(getProtein(), getSettings());
}
/* TODO: What was this for?
Set fragSet = new OrderedSet<>(outFrags);
outFrags.clear();
outFrags.addAll(fragSet);
*/
return outFrags;
}
//###########################################################################
// PRIVATE METHODS
//###########################################################################
//--------------------------------------------------------------------------
public boolean isCleavageSite(char inP1Residue, char inP1PrimeResidue)
{
char p1Residue = Character.toUpperCase(inP1Residue);
char p1PrimeResidue = Character.toUpperCase(inP1PrimeResidue);
boolean isCleavageSite = false;
for (Protease protease : mProteases)
{
isCleavageSite = ((protease.getP1Specificity().indexOf(p1Residue) >= 0
&& protease.getExcludedP1PrimeResidues().indexOf(p1PrimeResidue) == -1)
|| (protease.getP1PrimeSpecificity().indexOf(p1PrimeResidue) >= 0
&& protease.getExcludedP1Residues().indexOf(p1Residue) == -1));
if (isCleavageSite) break;
}
return isCleavageSite;
}
//--------------------------------------------------------------------------
private void addPrecedingAndTrailingResidues(List inFragments, Protein inProtein)
{
for (DigestFragment fragment : inFragments)
{
int position = fragment.getBegin() - 1;
if (position > 0)
{
fragment.setPrecedingResidue(inProtein.residueAt(position));
}
position = fragment.getEnd() + 1;
if (position <= inProtein.length())
{
fragment.setTrailingResidue(inProtein.residueAt(position));
}
}
}
//--------------------------------------------------------------------------
private List simpleDigestion(Protein inProtein, DigestSettings inSettings)
{
List outFrags = new ArrayList<>();
if (CollectionUtil.hasValues(inProtein.getChains()))
{
for (Protein chain : inProtein.getChains())
{
outFrags.addAll(simpleDigestion(chain, inSettings));
}
}
else
{
AminoAcidSet aaSet;
if (inSettings.getAlkylatedCys() != null)
{
aaSet = new AminoAcidSet(inProtein.getAminoAcidSet());
aaSet.setMapping('c', inSettings.getAlkylatedCys());
aaSet.setMapping('C', inSettings.getAlkylatedCys());
}
else
{
aaSet = inProtein.getAminoAcidSet().clone();
}
NTerminalGroup nTerminalGroup = inProtein.getAminoAcidSet().getNTerminalGroup();
if (! nTerminalGroup.equals(NTerminalGroup.UNMODIFIED_N_TERMINUS))
{
aaSet.setNTerminalGroup(NTerminalGroup.UNMODIFIED_N_TERMINUS);
}
CTerminalGroup cTerminalGroup = inProtein.getAminoAcidSet().getCTerminalGroup();
if (! cTerminalGroup.equals(CTerminalGroup.UNMODIFIED_C_TERMINUS))
{
aaSet.setCTerminalGroup(CTerminalGroup.UNMODIFIED_C_TERMINUS);
}
try
{
Reader seqReader = null;
try
{
seqReader = inProtein.getSequenceReader();
SlidingWindow fragWindow = new SlidingWindow(inProtein.getID(), aaSet, inSettings);
int p1Residue = seqReader.read();
if (p1Residue != -1)
{
StringBuilder frag = new StringBuilder((char)p1Residue + "");
int p1PrimeResidue;
while ((p1PrimeResidue = seqReader.read()) != -1)
{
if (isCleavageSite((char)p1Residue, (char)p1PrimeResidue))
{
List fragments = fragWindow.push(frag.toString());
if (fragments != null)
{
for (DigestFragment fragment : fragments)
{
fragment.setSrcChainId(inProtein.getID());
// Make sure any custom termini are passed along
if (1 == fragment.getBegin()
&& ! nTerminalGroup.equals(NTerminalGroup.UNMODIFIED_N_TERMINUS))
{
AminoAcidSet fragAASet = fragment.getAminoAcidSet().clone();
fragAASet.setNTerminalGroup(nTerminalGroup);
fragment.setAminoAcidSet(fragAASet);
}
if (inProtein.length() == fragment.getEnd()
&& ! cTerminalGroup.equals(CTerminalGroup.UNMODIFIED_C_TERMINUS))
{
AminoAcidSet fragAASet = fragment.getAminoAcidSet().clone();
fragAASet.setCTerminalGroup(cTerminalGroup);
fragment.setAminoAcidSet(fragAASet);
}
}
addPrecedingAndTrailingResidues(fragments, inProtein);
outFrags.addAll(fragments);
}
frag.setLength(0);
frag.append((char)p1PrimeResidue);
}
else
{
frag.append((char)p1PrimeResidue);
}
p1Residue = p1PrimeResidue;
}
List fragments = fragWindow.lastPush(frag.toString());
if (fragments != null)
{
for (DigestFragment fragment : fragments)
{
fragment.setSrcChainId(inProtein.getID());
// Make sure any custom termini are passed along
if (1 == fragment.getBegin()
&& ! nTerminalGroup.equals(NTerminalGroup.UNMODIFIED_N_TERMINUS))
{
AminoAcidSet fragAASet = fragment.getAminoAcidSet().clone();
fragAASet.setNTerminalGroup(nTerminalGroup);
fragment.setAminoAcidSet(fragAASet);
}
if (inProtein.length() == fragment.getEnd()
&& ! cTerminalGroup.equals(CTerminalGroup.UNMODIFIED_C_TERMINUS))
{
AminoAcidSet fragAASet = fragment.getAminoAcidSet().clone();
fragAASet.setCTerminalGroup(cTerminalGroup);
fragment.setAminoAcidSet(fragAASet);
}
}
addPrecedingAndTrailingResidues(fragments, inProtein);
outFrags.addAll(fragments);
}
}
}
finally
{
if (seqReader != null) seqReader.close();
}
}
catch (IOException e)
{
throw new RuntimeException(e);
}
}
// Apply the DigestSettings limits.
for (int i = 0; i < outFrags.size(); i++)
{
if (! inSettings.meetsCriteria(outFrags.get(i)))
{
outFrags.remove(i--);
}
}
return outFrags;
}
//--------------------------------------------------------------------------
private List complexDigestion(Protein inProtein, DigestSettings inSettings)
{
// Initially disable limits when finding fragments.
// We'll apply the desired settings after all the fragments have been linked up.
DigestSettings settingsWithoutLimits = inSettings.clone();
settingsWithoutLimits.setMinFragmentLength(null);
settingsWithoutLimits.setMaxFragmentLength(null);
settingsWithoutLimits.setMinFragmentMass(null);
settingsWithoutLimits.setMaxFragmentMass(null);
List rawFrags = simpleDigestion(inProtein, settingsWithoutLimits);
if (CollectionUtil.hasValues(inProtein.getXLinks()))
{
// For ea. x-link, bind it to the combinations of raw fragments
List orderedXLinks = new ArrayList<>(inProtein.getXLinks());
Collections.sort(orderedXLinks);
for (ProteinXLink xlink : orderedXLinks)
{
List donorFragIndexes = new ArrayList<>(5);
List acceptorFragIndexes = new ArrayList<>(5);
Set fragsToAdd = new HashSet<>(10);
Set fragsIndexesToRemove = new HashSet<>(10);
for (int fragIdx = 0; fragIdx < rawFrags.size(); fragIdx++)
{
DigestFragment frag = rawFrags.get(fragIdx);
boolean hasDonorSite = false;
if (frag.hasChains())
{
for (DigestFragment fragChain : (List) (Object) frag.getChains())
{
if (fragChain.getSrcChainId().equals(xlink.getDonorChainId())
&& xlink.getDonorPosition() >= fragChain.getBegin()
&& xlink.getDonorPosition() <= fragChain.getEnd())
{
// Donor site is within this frag.
hasDonorSite = true;
break;
}
}
}
else if (frag.getSrcChainId().equals(xlink.getDonorChainId())
&& xlink.getDonorPosition() >= frag.getBegin()
&& xlink.getDonorPosition() <= frag.getEnd())
{
hasDonorSite = true;
}
boolean hasAcceptorSite = false;
if (CollectionUtil.hasValues(frag.getChains()))
{
for (DigestFragment fragChain : (List) (Object) frag.getChains())
{
if (fragChain.getSrcChainId().equals(xlink.getAcceptorChainId())
&& xlink.getAcceptorPosition() >= fragChain.getBegin()
&& xlink.getAcceptorPosition() <= fragChain.getEnd())
{
// Acceptor site is within this frag.
hasAcceptorSite = true;
break;
}
}
}
else if (frag.getSrcChainId().equals(xlink.getAcceptorChainId())
&& xlink.getAcceptorPosition() >= frag.getBegin()
&& xlink.getAcceptorPosition() <= frag.getEnd())
{
hasAcceptorSite = true;
}
if (hasDonorSite && hasAcceptorSite)
{
DigestFragment linkedFrag;
if (CollectionUtil.hasValues(frag.getChains()))
{
linkedFrag = frag;
}
else
{
linkedFrag = new DigestFragment();
linkedFrag.addChain(frag.clone());
// Add it back to the pool
fragsToAdd.add(linkedFrag);
// Flag the original unlinked frag for removal
fragsIndexesToRemove.add(fragIdx);
}
String donorFragId = null;
String acceptorFragId = null;
for (DigestFragment fragChain : (List) (Object) linkedFrag.getChains())
{
if (fragChain.getSrcChainId().equals(xlink.getDonorChainId())
&& xlink.getDonorPosition() >= fragChain.getBegin()
&& xlink.getDonorPosition() <= fragChain.getEnd())
{
donorFragId = fragChain.getID();
}
if (fragChain.getSrcChainId().equals(xlink.getAcceptorChainId())
&& xlink.getAcceptorPosition() >= fragChain.getBegin()
&& xlink.getAcceptorPosition() <= fragChain.getEnd())
{
acceptorFragId = fragChain.getID();
}
}
linkedFrag.addXLink(xlink.clone()
.setDonorChainId(donorFragId)
.setAcceptorChainId(acceptorFragId));
}
else
{
if (hasDonorSite) donorFragIndexes.add(fragIdx);
if (hasAcceptorSite) acceptorFragIndexes.add(fragIdx);
}
}
// Link the donors & acceptors in all possible combinations
for (int donorFragIndex : donorFragIndexes)
{
DigestFragment donorFrag = rawFrags.get(donorFragIndex);
String donorFragId = null;
if (CollectionUtil.hasValues(donorFrag.getChains()))
{
for (DigestFragment linkedChain : (Collection) (Object) donorFrag.getChains())
{
if (linkedChain.getSrcChainId().equals(xlink.getDonorChainId())
&& xlink.getDonorPosition() >= linkedChain.getBegin()
&& xlink.getDonorPosition() <= linkedChain.getEnd())
{
donorFragId = linkedChain.getID();
break;
}
}
}
else
{
donorFragId = donorFrag.getID();
}
for (int acceptorFragIndex : acceptorFragIndexes)
{
DigestFragment acceptorFrag = rawFrags.get(acceptorFragIndex);
String acceptorFragId = null;
DigestFragment linkedFrag;
if (CollectionUtil.hasValues(donorFrag.getChains()))
{
linkedFrag = (DigestFragment) donorFrag.clone();
}
else
{
linkedFrag = new DigestFragment();
donorFrag = (DigestFragment) donorFrag.clone()
.setID(donorFrag.getID() + ":" + donorFrag.getBeginFragIndex() + (donorFrag.getEndFragIndex() == donorFrag.getBeginFragIndex() ? "" : ".." + donorFrag.getEndFragIndex()));
donorFragId = donorFrag.getID();
linkedFrag.addChain(donorFrag);
}
// Add the acceptor chain (if it isn't already present)
boolean containsAcceptorFrag = false;
for (DigestFragment linkedChain : (Collection) (Object) linkedFrag.getChains())
{
if (linkedChain.getSrcChainId().equals(xlink.getAcceptorChainId())
&& acceptorFrag.getBegin() == linkedChain.getBegin()
&& acceptorFrag.getEnd() == linkedChain.getEnd())
{
acceptorFragId = linkedChain.getID();
containsAcceptorFrag = true;
break;
}
}
if (! containsAcceptorFrag)
{
acceptorFrag = (DigestFragment) acceptorFrag.clone()
.setID(acceptorFrag.getID() + ":" + acceptorFrag.getBeginFragIndex() + (acceptorFrag.getEndFragIndex() == acceptorFrag.getBeginFragIndex() ? "" : ".." + acceptorFrag.getEndFragIndex()));
acceptorFragId = acceptorFrag.getID();
linkedFrag.addChain(acceptorFrag);
}
linkedFrag.addXLink(xlink.clone()
.setDonorChainId(donorFragId)
.setAcceptorChainId(acceptorFragId));
// Add it back to the pool
rawFrags.add(linkedFrag);
}
}
// Now remove the raw frags that were linked.
// We have to remove them in reverse order for all the indices to be valid
fragsIndexesToRemove.addAll(donorFragIndexes);
fragsIndexesToRemove.addAll(acceptorFragIndexes);
if (CollectionUtil.hasValues(fragsIndexesToRemove))
{
List orderedFragIndices = new ArrayList<>(fragsIndexesToRemove);
Collections.sort(orderedFragIndices);
Collections.reverse(orderedFragIndices);
for (int fragIndex : orderedFragIndices)
{
rawFrags.remove(fragIndex);
}
}
rawFrags.addAll(fragsToAdd);
}
}
// Apply the DigestSettings limits.
for (int i = 0; i < rawFrags.size(); i++)
{
if (! inSettings.meetsCriteria(rawFrags.get(i)))
{
rawFrags.remove(i--);
}
}
return rawFrags;
}
//##########################################################################
// INNER CLASS
//##########################################################################
protected class SlidingWindow
{
private String mChainId;
private AminoAcidSet mAminoAcidSet;
private StringBuilder[] mFrags;
private DigestSettings mDigestSettings;
private int mIndex = 1;
private int mLength;
private int mCurrentFragIndex = 0;
//-----------------------------------------------------------------------
public SlidingWindow(String inChainId, AminoAcidSet inAASet, DigestSettings inSettings)
{
mChainId = inChainId;
mAminoAcidSet = inAASet;
mDigestSettings = inSettings;
mFrags = new StringBuilder[inSettings.getMaxMissedCleavages() + 3];
mFrags[0] = new StringBuilder();
mFrags[1] = new StringBuilder();
mLength = 2;
}
//-----------------------------------------------------------------------
public List push(String inFrag)
{
List outFrags = null;
if (mLength < mFrags.length)
{
// Still filling the window
mFrags[mLength++] = new StringBuilder(inFrag);
}
else
{
StringBuilder tmp = mFrags[0];
for (int i = 1; i < mFrags.length; i++)
{
mFrags[i - 1] = mFrags[i];
}
mFrags[mFrags.length - 1] = tmp;
mFrags[mFrags.length - 1].setLength(0);
mFrags[mFrags.length - 1].append(inFrag);
mIndex += mFrags[0].length();
mCurrentFragIndex++;
List frags = evaluateCurrentFrag();
if (frags != null)
{
outFrags = new ArrayList<>(frags);
}
}
return outFrags;
}
//-----------------------------------------------------------------------
public List lastPush(String inFrag)
{
List outFrags = null;
List frags = push(inFrag);
if (frags != null)
{
outFrags = new ArrayList<>(frags);
}
for (int i = 0; i <= mDigestSettings.getMaxMissedCleavages(); i++)
{
frags = push("");
if (frags != null)
{
if (null == outFrags)
{
outFrags = new ArrayList<>(frags.size());
}
outFrags.addAll(frags);
}
}
return outFrags;
}
//-----------------------------------------------------------------------
private List evaluateCurrentFrag()
{
List fragments = null;
int maxMissedCleavages = 0;
if (mDigestSettings != null
&& mDigestSettings.getMaxMissedCleavages() != null)
{
maxMissedCleavages = mDigestSettings.getMaxMissedCleavages();
}
if (null == mDigestSettings
|| null == mDigestSettings.getMaxFragmentLength()
|| mFrags[1].length() <= mDigestSettings.getMaxFragmentLength())
{
StringBuilder frag = new StringBuilder();
for (int i = 1; i < mFrags.length - 1; i++)
{
if (mFrags[i].length() == 0) break;
frag.append(mFrags[i]);
if (null == mDigestSettings
|| null == mDigestSettings.getMinFragmentLength()
|| frag.length() >= mDigestSettings.getMinFragmentLength())
{
// Don't get too big
if (mDigestSettings != null
&& mDigestSettings.getMaxFragmentLength() != null
&& frag.length() > mDigestSettings.getMaxFragmentLength())
{
break;
}
DigestFragment digestFrag = allocateNewDigestFragment();
digestFrag.setSequence(frag.toString());
digestFrag.setBegin(mIndex);
digestFrag.setEnd(mIndex + frag.length() - 1);
digestFrag.setNumUncleavedSites(i - 1);
digestFrag.setBeginFragIndex(mCurrentFragIndex);
digestFrag.setEndFragIndex(mCurrentFragIndex + (i - 1));
if (null == fragments)
{
fragments = new ArrayList<>(maxMissedCleavages);
}
fragments.add(digestFrag);
}
}
}
return fragments;
}
//-----------------------------------------------------------------------
private DigestFragment allocateNewDigestFragment()
{
DigestFragment frag = new DigestFragment();
frag.setID(mChainId);
frag.setAminoAcidSet(mAminoAcidSet);
return frag;
}
}
}