All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.Protease Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio;

import java.util.*;
import java.io.Reader;
import java.io.IOException;

import com.hfg.bio.seq.Protein;
import com.hfg.bio.seq.ProteinXLink;
import com.hfg.bio.seq.ProteinXLinkType;
import com.hfg.exception.UnmodifyableObjectException;
import com.hfg.util.CompareUtil;
import com.hfg.util.StringUtil;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.collection.OrderedSet;


//------------------------------------------------------------------------------
/**
 * Chemical or biological proteolytic agent which can be used to theoretically
 * digest a Protein.
 *
 * @author J. Alex Taylor, hairyfatguy.com
 */
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------

/*

   What to do if...

      - X-links are defined?

          - Should the DigestSettings include an optional cys-alkylation form? This would
            allow the user to specify whether the digest should be a 'native digest' or
            whether the cysteines (including those specified as x-linked) should be considered
            reduced and alkylated.

      - Multiple identical chains exist?

          - If the fragments aren't x-linked, concatenate the chain ids in the DigestFragment: 'H1/H2'

*/

public class Protease implements Comparable
{
   //##########################################################################
   // PRIVATE FIELDS
   //##########################################################################

   private static Set sValues = new OrderedSet<>();

   private String mName;

   private String mP1Specificity = "";
   private String mExcludedP1PrimeResidues = "";
   // For endoproteases
   private String mP1PrimeSpecifity   = "";
   private String mExcludedP1Residues = "";

   private boolean mLocked;

   //##########################################################################
   // PUBLIC FIELDS
   //##########################################################################

   public static final Protease TRYPSIN      = new Protease("Trypsin");
   public static final Protease LYS_C        = new Protease("Lys-C");
   public static final Protease CHYMOTRYPSIN = new Protease("Chymotrypsin");
   public static final Protease GLU_C        = new Protease("Glu-C");
   public static final Protease ASP_N        = new Protease("Asp-N");
   public static final Protease ASP_N_DE     = new Protease("Asp-N (DE)");

   static
   {
      TRYPSIN.setP1Specificity("KR")
            .setExcludedP1PrimeResidues("P")
            .lock();

      LYS_C.setP1Specificity("K")
            .lock();

      CHYMOTRYPSIN.setP1Specificity("YFWL")
            .lock();

      GLU_C.setP1Specificity("E")
            .lock();

      ASP_N.setP1PrimeSpecificity("D")
            .lock();

      ASP_N_DE.setP1PrimeSpecificity("DE")
            .lock();
   }


   //##########################################################################
   // CONSTRUCTORS
   //##########################################################################

   //--------------------------------------------------------------------------
   public Protease(String inName)
   {
      mName = inName;
      sValues.add(this);
   }

   //##########################################################################
   // PUBLIC METHODS
   //##########################################################################

   //--------------------------------------------------------------------------
   public static Protease[] values()
   {
      return sValues.toArray(new Protease[0]);
   }

   //--------------------------------------------------------------------------
   public static Protease valueOf(String inName)
   {
      Protease requestedProtease = null;
      if (StringUtil.isSet(inName))
      {
         for (Protease protease : sValues)
         {
            if (protease.name().equalsIgnoreCase(inName.trim()))
            {
               requestedProtease = protease;
               break;
            }
         }
      }

      return requestedProtease;
   }

   //--------------------------------------------------------------------------
   @Override
   public int hashCode()
   {
      return name().hashCode();
   }

   //--------------------------------------------------------------------------
   @Override
   public boolean equals(Object inObj2)
   {
      return 0 == compareTo(inObj2);
   }

   //--------------------------------------------------------------------------
   @Override
   public int compareTo(Object inObj2)
   {
      return inObj2 instanceof Protease ? CompareUtil.compare(name(), ((Protease)inObj2).name()) : -1;
   }

   //--------------------------------------------------------------------------
   public String name()
   {
      return mName;
   }

   //--------------------------------------------------------------------------
   @Override
   public String toString()
   {
      return name();
   }

   //--------------------------------------------------------------------------
   public Protease setP1Specificity(String inValue)
   {
      checkLock();
      mP1Specificity = inValue;
      return this;
   }

   //--------------------------------------------------------------------------
   public String getP1Specificity()
   {
      return mP1Specificity;
   }


   //--------------------------------------------------------------------------
   public Protease setExcludedP1PrimeResidues(String inValue)
   {
      checkLock();
      mExcludedP1PrimeResidues = inValue;
      return this;
   }

   //--------------------------------------------------------------------------
   public String getExcludedP1PrimeResidues()
   {
      return mExcludedP1PrimeResidues;
   }


   //--------------------------------------------------------------------------
   public Protease setP1PrimeSpecificity(String inValue)
   {
      checkLock();
      mP1PrimeSpecifity = inValue;
      return this;
   }

   //--------------------------------------------------------------------------
   public String getP1PrimeSpecificity()
   {
      return mP1PrimeSpecifity;
   }


   //--------------------------------------------------------------------------
   public Protease setExcludedP1Residues(String inValue)
   {
      checkLock();
      mExcludedP1Residues = inValue;
      return this;
   }

   //--------------------------------------------------------------------------
   public String getExcludedP1Residues()
   {
      return mExcludedP1Residues;
   }


   //--------------------------------------------------------------------------
   public Protease lock()
   {
      mLocked = true;
      return this;
   }

   //--------------------------------------------------------------------------
   public List digest(Protein inProtein, DigestSettings inSettings)
   {
      List outFrags;

      if (inSettings.getAlkylatedCys() != null)
      {
         Protein proteinCopy = inProtein.clone();
         proteinCopy.removeXLinks(ProteinXLinkType.DISULFIDE);

         // Reflect that the cysteines in the protein are alkylated.
         AminoAcidSet aaSet = new AminoAcidSet(inProtein.getAminoAcidSet());
         aaSet.setMapping('c', inSettings.getAlkylatedCys());
         aaSet.setMapping('C', inSettings.getAlkylatedCys());
         proteinCopy.setAminoAcidSet(aaSet);

         // Are there other types of x-links?
         if (CollectionUtil.hasValues(proteinCopy.getXLinks()))
         {
            outFrags = complexDigestion(proteinCopy, inSettings);
         }
         else
         {
            outFrags = simpleDigestion(proteinCopy, inSettings);
         }

         for (DigestFragment frag : outFrags)
         {
            frag.setAminoAcidSet(aaSet);
         }
      }
      else
      {
         // Native digest. Deal with X-links
         outFrags = complexDigestion(inProtein, inSettings);
      }

      return outFrags;
   }

   //--------------------------------------------------------------------------
   public boolean isCleavageSite(char inP1Residue, char inP1PrimeResidue)
   {
      char p1Residue      = Character.toUpperCase(inP1Residue);
      char p1PrimeResidue = Character.toUpperCase(inP1PrimeResidue);

      return ((mP1Specificity.indexOf(p1Residue) >= 0
               && mExcludedP1PrimeResidues.indexOf(p1PrimeResidue) == -1)
              || (mP1PrimeSpecifity.indexOf(p1PrimeResidue) >= 0
                  && mExcludedP1Residues.indexOf(p1Residue) == -1));

   }

   //##########################################################################
   // PRIVATE METHODS
   //##########################################################################

   //--------------------------------------------------------------------------
   private void checkLock()
   {
      if (mLocked) throw new UnmodifyableObjectException("This object is locked and cannot be modified.");
   }

   //--------------------------------------------------------------------------
   private List simpleDigestion(Protein inProtein, DigestSettings inSettings)
   {
      List outFrags = new ArrayList();


      if (CollectionUtil.hasValues(inProtein.getChains()))
      {
         for (Protein chain : inProtein.getChains())
         {
            outFrags.addAll(simpleDigestion(chain, inSettings));
         }
      }
      else
      {
         try
         {
            Reader seqReader = null;
            try
            {
               seqReader = inProtein.getSequenceReader();

               SlidingWindow fragWindow = new SlidingWindow(inProtein.getID(), inProtein.getAminoAcidSet(), inSettings);

               int p1Residue = seqReader.read();
               if (p1Residue != -1)
               {
                  StringBuilder frag = new StringBuilder((char)p1Residue + "");
                  int p1PrimeResidue;
                  while ((p1PrimeResidue = seqReader.read()) != -1)
                  {
                     if (isCleavageSite((char)p1Residue, (char)p1PrimeResidue))
                     {
                        List fragments = fragWindow.push(frag.toString());
                        if (fragments != null)
                        {
                           outFrags.addAll(fragments);
                        }
                        frag.setLength(0);
                        frag.append((char)p1PrimeResidue);
                     }
                     else
                     {
                        frag.append((char)p1PrimeResidue);
                     }
                     p1Residue = p1PrimeResidue;
                  }

                  List fragments = fragWindow.lastPush(frag.toString());
                  if (fragments != null)
                  {
                     outFrags.addAll(fragments);
                  }
               }
            }
            finally
            {
               if (seqReader != null) seqReader.close();
            }
         }
         catch (IOException e)
         {
            throw new RuntimeException(e);
         }
      }
      
      // Apply the DigestSettings limits.
      for (int i = 0; i < outFrags.size(); i++)
      {
         if (! inSettings.meetsCriteria(outFrags.get(i)))
         {
            outFrags.remove(i--);
         }
      }

      return outFrags;
   }

   //--------------------------------------------------------------------------
   private List complexDigestion(Protein inProtein,
                                               DigestSettings inSettings)
   {
      // Initially disable limits when finding fragments.
      // We'll apply the desired settings after all the fragments have been linked up.
      DigestSettings settingsWithoutLimits = inSettings.clone();
      settingsWithoutLimits.setMinFragmentLength(null);
      settingsWithoutLimits.setMaxFragmentLength(null);
      settingsWithoutLimits.setMinFragmentMass(null);
      settingsWithoutLimits.setMaxFragmentMass(null);

      List rawFrags = simpleDigestion(inProtein, settingsWithoutLimits);

      // For ea. x-link, bind it to the combinations of raw fragments
      for (ProteinXLink xlink : inProtein.getXLinks())
      {
         List donorFrags    = new ArrayList<>();
         List acceptorFrags = new ArrayList<>();
         Set  fragsToAdd    = new HashSet<>();
         Set  fragsToRemove = new HashSet<>();

         for (DigestFragment frag : rawFrags)
         {
            DigestFragment donorChain = null;
            if (CollectionUtil.hasValues(frag.getChains()))
            {
               donorChain = (DigestFragment) frag.getChain(xlink.getDonorChainId());
            }
            else if (xlink.getDonorChainId().equals(frag.getID()))
            {
               donorChain = frag;
            }

            boolean hasDonorSite = false;
            if (donorChain != null
                && xlink.getDonorPosition() >= donorChain.getBegin()
                && xlink.getDonorPosition() <= donorChain.getEnd())
            {
               // Donor site is within this frag.
               hasDonorSite = true;
            }


            DigestFragment acceptorChain = null;
            if (CollectionUtil.hasValues(frag.getChains()))
            {
               acceptorChain = (DigestFragment) frag.getChain(xlink.getAcceptorChainId());
            }
            else if (xlink.getAcceptorChainId().equals(frag.getID()))
            {
               acceptorChain = frag;
            }

            boolean hasAcceptorSite = false;
            if (acceptorChain != null
                && xlink.getAcceptorPosition() >= acceptorChain.getBegin()
                && xlink.getAcceptorPosition() <= acceptorChain.getEnd())
            {
               // Acceptor site is within this frag.
               hasAcceptorSite = true;
            }

            if (hasDonorSite && hasAcceptorSite)
            {
               DigestFragment linkedFrag;
               if (CollectionUtil.hasValues(frag.getChains()))
               {
                  linkedFrag = frag;
               }
               else
               {
                  linkedFrag = new DigestFragment();
                  linkedFrag.addChain(frag.clone());
                  // Add it back to the pool
                  fragsToAdd.add(linkedFrag);

                  fragsToRemove.add(frag);
               }

               linkedFrag.addXLink(xlink);
            }
            else
            {
               if (hasDonorSite)    donorFrags.add(frag);
               if (hasAcceptorSite) acceptorFrags.add(frag);
            }
         }

         // Link the donors & acceptors in all possible combinations
         for (DigestFragment donorFrag : donorFrags)
         {
            for (DigestFragment acceptorFrag : acceptorFrags)
            {
               DigestFragment linkedFrag;
               if (CollectionUtil.hasValues(donorFrag.getChains()))
               {
                  linkedFrag = (DigestFragment) donorFrag.clone();
               }
               else
               {
                  linkedFrag = new DigestFragment();
                  linkedFrag.addChain(donorFrag.clone());
               }

               // Add the acceptor chain (if it isn't already present)
               Protein acceptorChain = linkedFrag.getChain(xlink.getAcceptorChainId());
               if (null == acceptorChain)
               {
                  linkedFrag.addChain(acceptorFrag.clone());
               }

               linkedFrag.addXLink(xlink);
               // Add it back to the pool
               rawFrags.add(linkedFrag);
            }
         }

         // Now remove the raw frags that were linked.
         for (DigestFragment donorFrag : donorFrags)
         {
            rawFrags.remove(donorFrag);
         }

         for (DigestFragment acceptorFrag : acceptorFrags)
         {
            rawFrags.remove(acceptorFrag);
         }

         for (DigestFragment frag : fragsToRemove)
         {
            rawFrags.remove(frag);
         }

         rawFrags.addAll(fragsToAdd);
      }

      // Apply the DigestSettings limits.
      for (int i = 0; i < rawFrags.size(); i++)
      {
         if (! inSettings.meetsCriteria(rawFrags.get(i)))
         {
            rawFrags.remove(i--);
         }
      }

      return rawFrags;
   }

   //##########################################################################
   // INNER CLASS
   //##########################################################################

   protected class SlidingWindow
   {
      private String          mChainId;
      private AminoAcidSet    mAminoAcidSet;
      private StringBuilder[] mFrags;
      private DigestSettings  mDigestSettings;
      private int             mIndex = 1;
      private int             mLength;
      private int             mCurrentFragIndex = 0;

      //-----------------------------------------------------------------------
      public SlidingWindow(String inChainId, AminoAcidSet inAASet, DigestSettings inSettings)
      {
         mChainId        = inChainId;
         mAminoAcidSet   = inAASet;
         mDigestSettings = inSettings;
         mFrags    = new StringBuilder[inSettings.getMaxMissedCleavages() + 3];
         mFrags[0] = new StringBuilder();
         mFrags[1] = new StringBuilder();
         mLength   = 2;
      }

      //-----------------------------------------------------------------------
      public List push(String inFrag)
      {
         List outFrags = null;

         if (mLength < mFrags.length)
         {
            // Still filling the window
            mFrags[mLength++] = new StringBuilder(inFrag);
         }
         else
         {
            StringBuilder tmp = mFrags[0];

            for (int i = 1; i < mFrags.length; i++)
            {
               mFrags[i - 1] = mFrags[i];
            }

            mFrags[mFrags.length - 1] = tmp;
            mFrags[mFrags.length - 1].setLength(0);
            mFrags[mFrags.length - 1].append(inFrag);

            mIndex += mFrags[0].length();

            mCurrentFragIndex++;

            List frags = evaluateCurrentFrag();
            if (frags != null)
            {
               outFrags = new ArrayList<>(frags);
            }
         }

         return outFrags;
      }

      //-----------------------------------------------------------------------
      public List lastPush(String inFrag)
      {
         List outFrags = null;

         List frags = push(inFrag);
         if (frags != null)
         {
            outFrags = new ArrayList<>(frags);
         }

         for (int i = 0; i <= mDigestSettings.getMaxMissedCleavages(); i++)
         {
            frags = push("");
            if (frags != null)
            {
               if (null == outFrags)
               {
                  outFrags = new ArrayList<>(frags.size());
               }
               outFrags.addAll(frags);
            }
         }

         return outFrags;
      }

      //-----------------------------------------------------------------------
      private List evaluateCurrentFrag()
      {
         List fragments = null;

         int maxMissedCleavages = 0;
         if (mDigestSettings != null
             && mDigestSettings.getMaxMissedCleavages() != null)
         {
            maxMissedCleavages = mDigestSettings.getMaxMissedCleavages();
         }

         if (null == mDigestSettings
             || null == mDigestSettings.getMaxFragmentLength()
             || mFrags[1].length() <= mDigestSettings.getMaxFragmentLength())
         {
            StringBuilder frag = new StringBuilder();
            for (int i = 1; i < mFrags.length - 1; i++)
            {
               if (mFrags[i].length() == 0) break;

               frag.append(mFrags[i]);

               if (null == mDigestSettings
                   || null == mDigestSettings.getMinFragmentLength()
                   || frag.length() >= mDigestSettings.getMinFragmentLength())
               {
                  // Don't get too big
                  if (mDigestSettings != null
                      && mDigestSettings.getMaxFragmentLength() != null
                      && frag.length() > mDigestSettings.getMaxFragmentLength())
                  {
                     break;
                  }

                  DigestFragment digestFrag = allocateNewDigestFragment();
                  digestFrag.setSequence(frag.toString());
                  digestFrag.setBegin(mIndex);
                  digestFrag.setEnd(mIndex + frag.length() - 1);
                  digestFrag.setNumUncleavedSites(i - 1);
                  digestFrag.setBeginFragIndex(mCurrentFragIndex);
                  digestFrag.setEndFragIndex(mCurrentFragIndex + (i - 1));

                  if (null == fragments)
                  {
                     fragments = new ArrayList<>(maxMissedCleavages);
                  }
                  fragments.add(digestFrag);
               }
            }
         }

         return fragments;
      }

      //-----------------------------------------------------------------------
      private DigestFragment allocateNewDigestFragment()
      {
         DigestFragment frag = new DigestFragment();
         frag.setID(mChainId);
         frag.setAminoAcidSet(mAminoAcidSet);

         return frag;
      }

   }
}
































































© 2015 - 2024 Weber Informatics LLC | Privacy Policy