All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.Protease Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio;

import java.util.*;
import java.io.Reader;
import java.io.IOException;

import com.hfg.bio.seq.Protein;
import com.hfg.bio.seq.ProteinXLink;
import com.hfg.bio.seq.ProteinXLinkType;
import com.hfg.exception.UnmodifyableObjectException;
import com.hfg.util.CompareUtil;
import com.hfg.util.StringUtil;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.collection.OrderedSet;

//------------------------------------------------------------------------------
/**
 * Chemical or biological proteolytic agent which can be used to theoretically
 * digest a Protein.
 *
 * @author J. Alex Taylor, hairyfatguy.com
 */
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------

/*

   What to do if...

      - X-links are defined?

          - Should the DigestSettings include an optional cys-alkylation form? This would
            allow the user to specify whether the digest should be a 'native digest' or
            whether the cysteines (including those specified as x-linked) should be considered
            reduced and alkylated.

      - Multiple identical chains exist?

          - If the fragments aren't x-linked, concatenate the chain ids in the DigestFragment: 'H1/H2'

*/

public class Protease implements Comparable
{
   //##########################################################################
   // PRIVATE FIELDS
   //##########################################################################

   private static final Set sValues = new OrderedSet<>(12);

   private final String mName;

   private String mP6Specificity = "";
   private String mExcludedP6Residues = "";

   private String mP5Specificity = "";
   private String mExcludedP5Residues = "";

   private String mP4Specificity = "";
   private String mExcludedP4Residues = "";

   private String mP3Specificity = "";
   private String mExcludedP3Residues = "";

   private String mP2Specificity = "";
   private String mExcludedP2Residues = "";
   
   private String mP1Specificity = "";
   private String mExcludedP1Residues = "";
   
   
   private String mP1PrimeSpecificity   = "";
   private String mExcludedP1PrimeResidues = "";
   
   private String mP2PrimeSpecificity   = "";
   private String mExcludedP2PrimeResidues = "";
   
   private String mP3PrimeSpecificity   = "";
   private String mExcludedP3PrimeResidues = "";

   private boolean mLocked;

   private static final int NUM_N_TERMINAL_POSITIONS = 6;
   private static final int NUM_C_TERMINAL_POSITIONS = 3;
   
   //##########################################################################
   // PUBLIC FIELDS
   //##########################################################################

   public static final Protease TRYPSIN      = new Protease("Trypsin");
   public static final Protease LYS_C        = new Protease("Lys-C");
   public static final Protease CHYMOTRYPSIN = new Protease("Chymotrypsin");
   public static final Protease GLU_C        = new Protease("Glu-C");
   public static final Protease ASP_N        = new Protease("Asp-N");
   public static final Protease ASP_N_DE     = new Protease("Asp-N (DE)");

   /**
    Human thrombin protease.
    
Uses data from Gallwitz, M. et. al. (2012). "The extended cleavage specificity of human thrombin." PLoS ONE, 7 (2).
Note: Experiments suggest a preference for P in P2 and R in P3'. */ public static final Protease THROMBIN = new Protease("Thrombin"); /** Tobacco Etch virus protease.
Uses data from Kostallas, G. et. al. (2011). "Substrate profiling of tobacco Etch virus protease using a novel Fluorescence-Assisted whole-cell assay." PLoS ONE, 6 (1).
*/ public static final Protease TEV = new Protease("TEV"); /** Human rhinovirus 3C protease.
Uses data from Cordingley, M.G. et. al. (1990). "Substrate requirements of human rhinovirus 3C protease for peptide cleavage in vitro." Journal of Biological Chemistry, 265 (16), 9062-9065.
*/ public static final Protease HRV_3C = new Protease("HRV 3C"); /** Factor Xa protease.
Uses data from Harris, J. et. al. (2000). "Rapid and general profiling of protease specificity by using combinatorial fluorogenic substrate libraries." PNAS, 97 (14), 7754-7759.
*/ public static final Protease FACTOR_Xa = new Protease("Factor Xa"); public static final Protease ENTEROKINASE = new Protease("Enterokinase"); static { TRYPSIN.setP1Specificity("KR") .setExcludedP1PrimeResidues("P") .lock(); LYS_C.setP1Specificity("K") .lock(); CHYMOTRYPSIN.setP1Specificity("YFWL") .lock(); GLU_C.setP1Specificity("E") .lock(); ASP_N.setP1PrimeSpecificity("D") .lock(); ASP_N_DE.setP1PrimeSpecificity("DE") .lock(); THROMBIN.setP1Specificity("R") .setP1PrimeSpecificity("SAGT") .setExcludedP2PrimeResidues("DE") .lock(); TEV.setP6Specificity("E") .setP5Specificity("RSGLVWCEAPQKN") .setP4Specificity("VLGARESWDTIKMNY") .setP3Specificity("Y") .setP2Specificity("FGASVCREQTHLPWDIN") .setP1Specificity("Q") .setP1PrimeSpecificity("GAVSRDECKMLQIT") .lock(); HRV_3C.setP5Specificity("EQRIDHF") .setP4Specificity("TAVDF") .setP3Specificity("L") .setP2Specificity("F") .setP1Specificity("Q") .setP1PrimeSpecificity("G") .setP2PrimeSpecificity("P") .lock(); FACTOR_Xa.setP4Specificity("ILPV") .setExcludedP3Residues("P") .setP2Specificity("GAFPSWY") .setP1Specificity("R") .lock(); ENTEROKINASE .setP5Specificity("D") .setP4Specificity("D") .setP3Specificity("D") .setP2Specificity("D") .setP1Specificity("K") .lock(); } //########################################################################## // CONSTRUCTORS //########################################################################## //-------------------------------------------------------------------------- public Protease(String inName) { mName = inName; sValues.add(this); } //########################################################################## // PUBLIC METHODS //########################################################################## //-------------------------------------------------------------------------- public static Protease[] values() { return sValues.toArray(new Protease[0]); } //-------------------------------------------------------------------------- public static Protease valueOf(String inName) { Protease requestedProtease = null; if (StringUtil.isSet(inName)) { for (Protease protease : sValues) { if (protease.name().equalsIgnoreCase(inName.trim())) { requestedProtease = protease; break; } } } return requestedProtease; } //-------------------------------------------------------------------------- @Override public int hashCode() { return name().hashCode(); } //-------------------------------------------------------------------------- @Override public boolean equals(Object inObj2) { return 0 == compareTo(inObj2); } //-------------------------------------------------------------------------- @Override public int compareTo(Object inObj2) { return inObj2 instanceof Protease ? CompareUtil.compare(name(), ((Protease)inObj2).name()) : -1; } //-------------------------------------------------------------------------- public String name() { return mName; } //-------------------------------------------------------------------------- @Override public String toString() { return name(); } //-------------------------------------------------------------------------- public Protease setP6Specificity(String inValue) { checkLock(); mP6Specificity = inValue; return this; } //-------------------------------------------------------------------------- public String getP6Specificity() { return mP6Specificity; } //-------------------------------------------------------------------------- public Protease setExcludedP6Residues(String inValue) { checkLock(); mExcludedP6Residues = inValue; return this; } //-------------------------------------------------------------------------- public String getExcludedP6Residues() { return mExcludedP6Residues; } //-------------------------------------------------------------------------- public Protease setP5Specificity(String inValue) { checkLock(); mP5Specificity = inValue; return this; } //-------------------------------------------------------------------------- public String getP5Specificity() { return mP5Specificity; } //-------------------------------------------------------------------------- public Protease setExcludedP5Residues(String inValue) { checkLock(); mExcludedP5Residues = inValue; return this; } //-------------------------------------------------------------------------- public String getExcludedP5Residues() { return mExcludedP5Residues; } //-------------------------------------------------------------------------- public Protease setP4Specificity(String inValue) { checkLock(); mP4Specificity = inValue; return this; } //-------------------------------------------------------------------------- public String getP4Specificity() { return mP4Specificity; } //-------------------------------------------------------------------------- public Protease setExcludedP4Residues(String inValue) { checkLock(); mExcludedP4Residues = inValue; return this; } //-------------------------------------------------------------------------- public String getExcludedP4Residues() { return mExcludedP4Residues; } //-------------------------------------------------------------------------- public Protease setP3Specificity(String inValue) { checkLock(); mP3Specificity = inValue; return this; } //-------------------------------------------------------------------------- public String getP3Specificity() { return mP3Specificity; } //-------------------------------------------------------------------------- public Protease setExcludedP3Residues(String inValue) { checkLock(); mExcludedP3Residues = inValue; return this; } //-------------------------------------------------------------------------- public String getExcludedP3Residues() { return mExcludedP3Residues; } //-------------------------------------------------------------------------- public Protease setP2Specificity(String inValue) { checkLock(); mP2Specificity = inValue; return this; } //-------------------------------------------------------------------------- public String getP2Specificity() { return mP2Specificity; } //-------------------------------------------------------------------------- public Protease setExcludedP2Residues(String inValue) { checkLock(); mExcludedP2Residues = inValue; return this; } //-------------------------------------------------------------------------- public String getExcludedP2Residues() { return mExcludedP2Residues; } //-------------------------------------------------------------------------- public Protease setP1Specificity(String inValue) { checkLock(); mP1Specificity = inValue; return this; } //-------------------------------------------------------------------------- public String getP1Specificity() { return mP1Specificity; } //-------------------------------------------------------------------------- public Protease setExcludedP1PrimeResidues(String inValue) { checkLock(); mExcludedP1PrimeResidues = inValue; return this; } //-------------------------------------------------------------------------- public String getExcludedP1PrimeResidues() { return mExcludedP1PrimeResidues; } //-------------------------------------------------------------------------- public Protease setP1PrimeSpecificity(String inValue) { checkLock(); mP1PrimeSpecificity = inValue; return this; } //-------------------------------------------------------------------------- public String getP1PrimeSpecificity() { return mP1PrimeSpecificity; } //-------------------------------------------------------------------------- public Protease setExcludedP1Residues(String inValue) { checkLock(); mExcludedP1Residues = inValue; return this; } //-------------------------------------------------------------------------- public String getExcludedP1Residues() { return mExcludedP1Residues; } //-------------------------------------------------------------------------- public Protease setExcludedP2PrimeResidues(String inValue) { checkLock(); mExcludedP2PrimeResidues = inValue; return this; } //-------------------------------------------------------------------------- public String getExcludedP2PrimeResidues() { return mExcludedP2PrimeResidues; } //-------------------------------------------------------------------------- public Protease setP2PrimeSpecificity(String inValue) { checkLock(); mP2PrimeSpecificity = inValue; return this; } //-------------------------------------------------------------------------- public String getP2PrimeSpecificity() { return mP2PrimeSpecificity; } //-------------------------------------------------------------------------- public Protease setP3PrimeSpecificity(String inValue) { checkLock(); mP3PrimeSpecificity = inValue; return this; } //-------------------------------------------------------------------------- public String getP3PrimeSpecificity() { return mP3PrimeSpecificity; } //-------------------------------------------------------------------------- public Protease lock() { mLocked = true; return this; } //-------------------------------------------------------------------------- public List digest(Protein inProtein, DigestSettings inSettings) { List outFrags; if (inSettings.getAlkylatedCys() != null) { Protein proteinCopy = inProtein.clone(); proteinCopy.removeXLinks(ProteinXLinkType.DISULFIDE); // Reflect that the cysteines in the protein are alkylated. AminoAcidSet aaSet = new AminoAcidSet(inProtein.getAminoAcidSet()); aaSet.setMapping('c', inSettings.getAlkylatedCys()); aaSet.setMapping('C', inSettings.getAlkylatedCys()); proteinCopy.setAminoAcidSet(aaSet); // Are there other types of x-links? if (CollectionUtil.hasValues(proteinCopy.getXLinks())) { outFrags = complexDigestion(proteinCopy, inSettings); } else { outFrags = simpleDigestion(proteinCopy, inSettings); } for (DigestFragment frag : outFrags) { frag.setAminoAcidSet(aaSet); } } else { // Native digest. Deal with X-links outFrags = complexDigestion(inProtein, inSettings); } return outFrags; } //-------------------------------------------------------------------------- public boolean isCleavageSite(char inP1Residue, char inP1PrimeResidue) { char p1Residue = Character.toUpperCase(inP1Residue); char p1PrimeResidue = Character.toUpperCase(inP1PrimeResidue); return ((mP1Specificity.indexOf(p1Residue) >= 0 && mExcludedP1PrimeResidues.indexOf(p1PrimeResidue) == -1) || (mP1PrimeSpecificity.indexOf(p1PrimeResidue) >= 0 && mExcludedP1Residues.indexOf(p1Residue) == -1)); } //-------------------------------------------------------------------------- public boolean isCleavageSite(CharSequence inNTerminalResidues, CharSequence inCTerminalResidues) { String nTerminalResidues = inNTerminalResidues.toString().toUpperCase(); String cTerminalResidues = inCTerminalResidues.toString().toUpperCase(); return ( ((mP6Specificity.length() == 0 && mExcludedP6Residues.length() == 0 ) || nTerminalResidues.length() < 6 || mP6Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 6)) >= 0 || (mExcludedP6Residues.length() > 0 && mExcludedP6Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 6)) == -1)) && ((mP5Specificity.length() == 0 && mExcludedP5Residues.length() == 0 ) || nTerminalResidues.length() < 5 || mP5Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 5)) >= 0 || (mExcludedP5Residues.length() > 0 && mExcludedP5Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 5)) == -1)) && ((mP4Specificity.length() == 0 && mExcludedP4Residues.length() == 0 ) || nTerminalResidues.length() < 4 || mP4Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 4)) >= 0 || (mExcludedP4Residues.length() > 0 && mExcludedP4Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 4)) == -1)) && ((mP3Specificity.length() == 0 && mExcludedP3Residues.length() == 0 ) || nTerminalResidues.length() < 3 || mP3Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 3)) >= 0 || (mExcludedP3Residues.length() > 0 && mExcludedP3Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 3)) == -1)) && ((mP2Specificity.length() == 0 && mExcludedP2Residues.length() == 0 ) || nTerminalResidues.length() < 2 || mP2Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 2)) >= 0 || (mExcludedP2Residues.length() > 0 && mExcludedP2Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 2)) == -1)) && ((mP1Specificity.length() == 0 && mExcludedP1Residues.length() == 0 ) || nTerminalResidues.length() < 1 || (mP1Specificity.length() > 0 && mP1Specificity.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 1)) >= 0) || (mExcludedP1Residues.length() > 0 && mExcludedP1Residues.indexOf(nTerminalResidues.charAt(nTerminalResidues.length() - 1)) == -1)) && ((mP1PrimeSpecificity.length() == 0 && mExcludedP1PrimeResidues.length() == 0 ) || cTerminalResidues.length() < 1 || (mP1PrimeSpecificity.length() > 0 && mP1PrimeSpecificity.indexOf(cTerminalResidues.charAt(0)) >= 0) || (mExcludedP1PrimeResidues.length() > 0 && mExcludedP1PrimeResidues.indexOf(cTerminalResidues.charAt(0)) == -1)) && ((mP2PrimeSpecificity.length() == 0 && mExcludedP2PrimeResidues.length() == 0) || cTerminalResidues.length() < 2 || (mP2PrimeSpecificity.length() > 0 && mP2PrimeSpecificity.indexOf(cTerminalResidues.charAt(1)) >= 0) || (mExcludedP2PrimeResidues.length() > 0 && mExcludedP2PrimeResidues.indexOf(cTerminalResidues.charAt(1)) == -1)) && ((mP3PrimeSpecificity.length() == 0 && mExcludedP3PrimeResidues.length() == 0) || cTerminalResidues.length() < 3 || (mP3PrimeSpecificity.length() > 0 && mP3PrimeSpecificity.indexOf(cTerminalResidues.charAt(2)) >= 0) || (mExcludedP3PrimeResidues.length() > 0 && mExcludedP3PrimeResidues.indexOf(cTerminalResidues.charAt(2)) == -1))); } //########################################################################## // PRIVATE METHODS //########################################################################## //-------------------------------------------------------------------------- private void checkLock() { if (mLocked) throw new UnmodifyableObjectException("This object is locked and cannot be modified."); } //-------------------------------------------------------------------------- private List simpleDigestion(Protein inProtein, DigestSettings inSettings) { List outFrags = new ArrayList<>(); if (CollectionUtil.hasValues(inProtein.getChains())) { for (Protein chain : inProtein.getChains()) { outFrags.addAll(simpleDigestion(chain, inSettings)); } } else { try { Reader seqReader = null; try { seqReader = inProtein.getSequenceReader(); SlidingWindow fragWindow = new SlidingWindow(inProtein.getID(), inProtein.getAminoAcidSet(), inSettings); int bufferSize = NUM_N_TERMINAL_POSITIONS + NUM_C_TERMINAL_POSITIONS; int[] site = new int[bufferSize]; for (int i = 0; i < bufferSize; i++) { site[i] = -1; } StringBuilder nTerminalResidues = new StringBuilder(); StringBuilder cTerminalResidues = new StringBuilder(); int residue = seqReader.read(); if (residue != -1) { StringBuilder frag = new StringBuilder((char) residue + ""); site[bufferSize - 1] = residue; // Drops in on the right side and slides left while ((residue = seqReader.read()) != -1) { for (int i = 0; i < bufferSize - 1; i++) { site[i] = site[i + 1]; } site[bufferSize - 1] = residue; if (-1 == site[NUM_N_TERMINAL_POSITIONS - 1]) { continue; } nTerminalResidues.setLength(0); for (int i = 0; i < NUM_N_TERMINAL_POSITIONS; i++) { nTerminalResidues.append((char) site[i]); } cTerminalResidues.setLength(0); for (int i = NUM_N_TERMINAL_POSITIONS; i < bufferSize; i++) { cTerminalResidues.append((char) site[i]); } if (isCleavageSite(nTerminalResidues, cTerminalResidues)) { List fragments = fragWindow.push(frag.toString()); if (fragments != null) { outFrags.addAll(fragments); } frag.setLength(0); frag.append((char)site[NUM_N_TERMINAL_POSITIONS]); } else { frag.append((char)site[NUM_N_TERMINAL_POSITIONS]); } } // Process residues remaining in the site buffer while (hasUnprocessedBufferContent(site)) { for (int i = 0; i < bufferSize - 1; i++) { site[i] = site[i + 1]; } site[bufferSize - 1] = -1; if (-1 == site[NUM_N_TERMINAL_POSITIONS - 1] || -1 == site[NUM_N_TERMINAL_POSITIONS]) { continue; } nTerminalResidues.setLength(0); for (int i = 0; i < NUM_N_TERMINAL_POSITIONS; i++) { int aa = site[i]; nTerminalResidues.append(aa != -1 ? (char) aa : ""); } cTerminalResidues.setLength(0); for (int i = NUM_N_TERMINAL_POSITIONS; i < bufferSize; i++) { int aa = site[i]; cTerminalResidues.append(aa != -1 ? (char) aa : ""); } if (isCleavageSite(nTerminalResidues, cTerminalResidues)) { List fragments = fragWindow.push(frag.toString()); if (fragments != null) { outFrags.addAll(fragments); } frag.setLength(0); frag.append((char)site[NUM_N_TERMINAL_POSITIONS]); } else { frag.append((char)site[NUM_N_TERMINAL_POSITIONS]); } } List fragments = fragWindow.lastPush(frag.toString()); if (fragments != null) { outFrags.addAll(fragments); } } } finally { if (seqReader != null) seqReader.close(); } } catch (IOException e) { throw new RuntimeException(e); } } // Apply the DigestSettings limits. for (int i = 0; i < outFrags.size(); i++) { if (! inSettings.meetsCriteria(outFrags.get(i))) { outFrags.remove(i--); } } return outFrags; } //-------------------------------------------------------------------------- private boolean hasUnprocessedBufferContent(int[] inBuffer) { boolean hasUnprocessedContent = false; for (int i = NUM_N_TERMINAL_POSITIONS - 1; i < NUM_N_TERMINAL_POSITIONS + NUM_C_TERMINAL_POSITIONS; i++) { if (inBuffer[i] != -1) { hasUnprocessedContent = true; break; } } return hasUnprocessedContent; } //-------------------------------------------------------------------------- private List complexDigestion(Protein inProtein, DigestSettings inSettings) { // Initially disable limits when finding fragments. // We'll apply the desired settings after all the fragments have been linked up. DigestSettings settingsWithoutLimits = inSettings.clone(); settingsWithoutLimits.setMinFragmentLength(null); settingsWithoutLimits.setMaxFragmentLength(null); settingsWithoutLimits.setMinFragmentMass(null); settingsWithoutLimits.setMaxFragmentMass(null); List rawFrags = simpleDigestion(inProtein, settingsWithoutLimits); // For ea. x-link, bind it to the combinations of raw fragments for (ProteinXLink xlink : inProtein.getXLinks()) { List donorFrags = new ArrayList<>(); List acceptorFrags = new ArrayList<>(); Set fragsToAdd = new HashSet<>(); Set fragsToRemove = new HashSet<>(); for (DigestFragment frag : rawFrags) { DigestFragment donorChain = null; if (CollectionUtil.hasValues(frag.getChains())) { donorChain = (DigestFragment) frag.getChain(xlink.getDonorChainId()); } else if (xlink.getDonorChainId().equals(frag.getID())) { donorChain = frag; } boolean hasDonorSite = false; if (donorChain != null && xlink.getDonorPosition() >= donorChain.getBegin() && xlink.getDonorPosition() <= donorChain.getEnd()) { // Donor site is within this frag. hasDonorSite = true; } DigestFragment acceptorChain = null; if (CollectionUtil.hasValues(frag.getChains())) { acceptorChain = (DigestFragment) frag.getChain(xlink.getAcceptorChainId()); } else if (xlink.getAcceptorChainId().equals(frag.getID())) { acceptorChain = frag; } boolean hasAcceptorSite = false; if (acceptorChain != null && xlink.getAcceptorPosition() >= acceptorChain.getBegin() && xlink.getAcceptorPosition() <= acceptorChain.getEnd()) { // Acceptor site is within this frag. hasAcceptorSite = true; } if (hasDonorSite && hasAcceptorSite) { DigestFragment linkedFrag; if (CollectionUtil.hasValues(frag.getChains())) { linkedFrag = frag; } else { linkedFrag = new DigestFragment(); linkedFrag.addChain(frag.clone()); // Add it back to the pool fragsToAdd.add(linkedFrag); fragsToRemove.add(frag); } linkedFrag.addXLink(xlink); } else { if (hasDonorSite) donorFrags.add(frag); if (hasAcceptorSite) acceptorFrags.add(frag); } } // Link the donors & acceptors in all possible combinations for (DigestFragment donorFrag : donorFrags) { for (DigestFragment acceptorFrag : acceptorFrags) { DigestFragment linkedFrag; if (CollectionUtil.hasValues(donorFrag.getChains())) { linkedFrag = (DigestFragment) donorFrag.clone(); } else { linkedFrag = new DigestFragment(); linkedFrag.addChain(donorFrag.clone()); } // Add the acceptor chain (if it isn't already present) Protein acceptorChain = linkedFrag.getChain(xlink.getAcceptorChainId()); if (null == acceptorChain) { linkedFrag.addChain(acceptorFrag.clone()); } linkedFrag.addXLink(xlink); // Add it back to the pool rawFrags.add(linkedFrag); } } // Now remove the raw frags that were linked. for (DigestFragment donorFrag : donorFrags) { rawFrags.remove(donorFrag); } for (DigestFragment acceptorFrag : acceptorFrags) { rawFrags.remove(acceptorFrag); } for (DigestFragment frag : fragsToRemove) { rawFrags.remove(frag); } rawFrags.addAll(fragsToAdd); } // Apply the DigestSettings limits. for (int i = 0; i < rawFrags.size(); i++) { if (! inSettings.meetsCriteria(rawFrags.get(i))) { rawFrags.remove(i--); } } return rawFrags; } //########################################################################## // INNER CLASS //########################################################################## protected class SlidingWindow { private String mChainId; private AminoAcidSet mAminoAcidSet; private StringBuilder[] mFrags; private DigestSettings mDigestSettings; private int mIndex = 1; private int mLength; private int mCurrentFragIndex = 0; //----------------------------------------------------------------------- public SlidingWindow(String inChainId, AminoAcidSet inAASet, DigestSettings inSettings) { mChainId = inChainId; mAminoAcidSet = inAASet; mDigestSettings = inSettings; mFrags = new StringBuilder[inSettings.getMaxMissedCleavages() + 3]; mFrags[0] = new StringBuilder(); mFrags[1] = new StringBuilder(); mLength = 2; } //----------------------------------------------------------------------- public List push(String inFrag) { List outFrags = null; if (mLength < mFrags.length) { // Still filling the window mFrags[mLength++] = new StringBuilder(inFrag); } else { StringBuilder tmp = mFrags[0]; for (int i = 1; i < mFrags.length; i++) { mFrags[i - 1] = mFrags[i]; } mFrags[mFrags.length - 1] = tmp; mFrags[mFrags.length - 1].setLength(0); mFrags[mFrags.length - 1].append(inFrag); mIndex += mFrags[0].length(); mCurrentFragIndex++; List frags = evaluateCurrentFrag(); if (frags != null) { outFrags = new ArrayList<>(frags); } } return outFrags; } //----------------------------------------------------------------------- public List lastPush(String inFrag) { List outFrags = null; List frags = push(inFrag); if (frags != null) { outFrags = new ArrayList<>(frags); } for (int i = 0; i <= mDigestSettings.getMaxMissedCleavages(); i++) { frags = push(""); if (frags != null) { if (null == outFrags) { outFrags = new ArrayList<>(frags.size()); } outFrags.addAll(frags); } } return outFrags; } //----------------------------------------------------------------------- private List evaluateCurrentFrag() { List fragments = null; int maxMissedCleavages = 0; if (mDigestSettings != null && mDigestSettings.getMaxMissedCleavages() != null) { maxMissedCleavages = mDigestSettings.getMaxMissedCleavages(); } if (null == mDigestSettings || null == mDigestSettings.getMaxFragmentLength() || mFrags[1].length() <= mDigestSettings.getMaxFragmentLength()) { StringBuilder frag = new StringBuilder(); for (int i = 1; i < mFrags.length - 1; i++) { if (mFrags[i].length() == 0) break; frag.append(mFrags[i]); if (null == mDigestSettings || null == mDigestSettings.getMinFragmentLength() || frag.length() >= mDigestSettings.getMinFragmentLength()) { // Don't get too big if (mDigestSettings != null && mDigestSettings.getMaxFragmentLength() != null && frag.length() > mDigestSettings.getMaxFragmentLength()) { break; } DigestFragment digestFrag = allocateNewDigestFragment(); digestFrag.setSequence(frag.toString()); digestFrag.setBegin(mIndex); digestFrag.setEnd(mIndex + frag.length() - 1); digestFrag.setNumUncleavedSites(i - 1); digestFrag.setBeginFragIndex(mCurrentFragIndex); digestFrag.setEndFragIndex(mCurrentFragIndex + (i - 1)); if (null == fragments) { fragments = new ArrayList<>(maxMissedCleavages); } fragments.add(digestFrag); } } } return fragments; } //----------------------------------------------------------------------- private DigestFragment allocateNewDigestFragment() { DigestFragment frag = new DigestFragment(); frag.setID(mChainId); frag.setAminoAcidSet(mAminoAcidSet); return frag; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy