All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.seq.ProteinDigest Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio.seq;

import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import com.hfg.bio.AminoAcidSet;
import com.hfg.bio.CTerminalGroup;
import com.hfg.bio.DigestFragment;
import com.hfg.bio.DigestSettings;
import com.hfg.bio.NTerminalGroup;
import com.hfg.bio.Protease;
import com.hfg.util.collection.CollectionUtil;
import com.hfg.util.collection.OrderedSet;

//------------------------------------------------------------------------------
/**
 Performs a digestion of a protein via protease(s).
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg XML/HTML Coding Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class ProteinDigest { private DigestSettings mSettings; private Protein mProtein; private Set mProteases; //########################################################################### // CONSTRUCTORS //########################################################################### //--------------------------------------------------------------------------- public ProteinDigest(Protein inProtein, DigestSettings inSettings) { mProtein = inProtein; mSettings = inSettings; mProteases = inSettings.getProteases(); if (! CollectionUtil.hasValues(mProteases)) { throw new RuntimeException("No protease(s) specified!"); } } //########################################################################### // PUBLIC METHODS //########################################################################### //--------------------------------------------------------------------------- public DigestSettings getSettings() { return mSettings; } //--------------------------------------------------------------------------- public Protein getProtein() { return mProtein; } //--------------------------------------------------------------------------- public List getFragments() { List outFrags; if (getSettings().getAlkylatedCys() != null) { Protein proteinCopy = getProtein().clone(); proteinCopy.removeXLinks(ProteinXLinkType.DISULFIDE); // Reflect that the cysteines in the protein are alkylated. AminoAcidSet aaSet = new AminoAcidSet(getProtein().getAminoAcidSet()); aaSet.setMapping('c', getSettings().getAlkylatedCys()); aaSet.setMapping('C', getSettings().getAlkylatedCys()); proteinCopy.setAminoAcidSet(aaSet); // Are there other types of x-links? if (CollectionUtil.hasValues(proteinCopy.getXLinks())) { outFrags = complexDigestion(proteinCopy, getSettings()); } else { outFrags = simpleDigestion(proteinCopy, getSettings()); } } else { // Native digest. Deal with X-links outFrags = complexDigestion(getProtein(), getSettings()); } Set fragSet = new OrderedSet<>(outFrags); outFrags.clear(); outFrags.addAll(fragSet); return outFrags; } //########################################################################### // PRIVATE METHODS //########################################################################### //-------------------------------------------------------------------------- public boolean isCleavageSite(char inP1Residue, char inP1PrimeResidue) { char p1Residue = Character.toUpperCase(inP1Residue); char p1PrimeResidue = Character.toUpperCase(inP1PrimeResidue); boolean isCleavageSite = false; for (Protease protease : mProteases) { isCleavageSite = ((protease.getP1Specificity().indexOf(p1Residue) >= 0 && protease.getExcludedP1PrimeResidues().indexOf(p1PrimeResidue) == -1) || (protease.getP1PrimeSpecificity().indexOf(p1PrimeResidue) >= 0 && protease.getExcludedP1Residues().indexOf(p1Residue) == -1)); if (isCleavageSite) break; } return isCleavageSite; } //-------------------------------------------------------------------------- private void addPrecedingAndTrailingResidues(List inFragments, Protein inProtein) { for (DigestFragment fragment : inFragments) { int position = fragment.getBegin() - 1; if (position > 0) { fragment.setPrecedingResidue(inProtein.residueAt(position)); } position = fragment.getEnd() + 1; if (position <= inProtein.length()) { fragment.setTrailingResidue(inProtein.residueAt(position)); } } } //-------------------------------------------------------------------------- private List simpleDigestion(Protein inProtein, DigestSettings inSettings) { List outFrags = new ArrayList<>(); if (CollectionUtil.hasValues(inProtein.getChains())) { for (Protein chain : inProtein.getChains()) { outFrags.addAll(simpleDigestion(chain, inSettings)); } } else { AminoAcidSet aaSet; if (inSettings.getAlkylatedCys() != null) { aaSet = new AminoAcidSet(getProtein().getAminoAcidSet()); aaSet.setMapping('c', getSettings().getAlkylatedCys()); aaSet.setMapping('C', getSettings().getAlkylatedCys()); } else { aaSet = inProtein.getAminoAcidSet().clone(); } NTerminalGroup nTerminalGroup = inProtein.getAminoAcidSet().getNTerminalGroup(); if (! nTerminalGroup.equals(NTerminalGroup.UNMODIFIED_N_TERMINUS)) { aaSet.setNTerminalGroup(NTerminalGroup.UNMODIFIED_N_TERMINUS); } CTerminalGroup cTerminalGroup = inProtein.getAminoAcidSet().getCTerminalGroup(); if (! cTerminalGroup.equals(CTerminalGroup.UNMODIFIED_C_TERMINUS)) { aaSet.setCTerminalGroup(CTerminalGroup.UNMODIFIED_C_TERMINUS); } try { Reader seqReader = null; try { seqReader = inProtein.getSequenceReader(); SlidingWindow fragWindow = new SlidingWindow(inProtein.getID(), aaSet, inSettings); int p1Residue = seqReader.read(); if (p1Residue != -1) { StringBuilder frag = new StringBuilder((char)p1Residue + ""); int p1PrimeResidue; while ((p1PrimeResidue = seqReader.read()) != -1) { if (isCleavageSite((char)p1Residue, (char)p1PrimeResidue)) { List fragments = fragWindow.push(frag.toString()); if (fragments != null) { // Make sure any custom termini are passed along for (DigestFragment fragment : fragments) { if (1 == fragment.getBegin() && ! nTerminalGroup.equals(NTerminalGroup.UNMODIFIED_N_TERMINUS)) { AminoAcidSet fragAASet = fragment.getAminoAcidSet().clone(); fragAASet.setNTerminalGroup(nTerminalGroup); fragment.setAminoAcidSet(fragAASet); } if (inProtein.length() == fragment.getEnd() && ! cTerminalGroup.equals(CTerminalGroup.UNMODIFIED_C_TERMINUS)) { AminoAcidSet fragAASet = fragment.getAminoAcidSet().clone(); fragAASet.setCTerminalGroup(cTerminalGroup); fragment.setAminoAcidSet(fragAASet); } } addPrecedingAndTrailingResidues(fragments, inProtein); outFrags.addAll(fragments); } frag.setLength(0); frag.append((char)p1PrimeResidue); } else { frag.append((char)p1PrimeResidue); } p1Residue = p1PrimeResidue; } List fragments = fragWindow.lastPush(frag.toString()); if (fragments != null) { // Make sure any custom termini are passed along for (DigestFragment fragment : fragments) { if (1 == fragment.getBegin() && ! nTerminalGroup.equals(NTerminalGroup.UNMODIFIED_N_TERMINUS)) { AminoAcidSet fragAASet = fragment.getAminoAcidSet().clone(); fragAASet.setNTerminalGroup(nTerminalGroup); fragment.setAminoAcidSet(fragAASet); } if (inProtein.length() == fragment.getEnd() && ! cTerminalGroup.equals(CTerminalGroup.UNMODIFIED_C_TERMINUS)) { AminoAcidSet fragAASet = fragment.getAminoAcidSet().clone(); fragAASet.setCTerminalGroup(cTerminalGroup); fragment.setAminoAcidSet(fragAASet); } } addPrecedingAndTrailingResidues(fragments, inProtein); outFrags.addAll(fragments); } } } finally { if (seqReader != null) seqReader.close(); } } catch (IOException e) { throw new RuntimeException(e); } } // Apply the DigestSettings limits. for (int i = 0; i < outFrags.size(); i++) { if (! inSettings.meetsCriteria(outFrags.get(i))) { outFrags.remove(i--); } } return outFrags; } //-------------------------------------------------------------------------- private List complexDigestion(Protein inProtein, DigestSettings inSettings) { // Initially disable limits when finding fragments. // We'll apply the desired settings after all the fragments have been linked up. DigestSettings settingsWithoutLimits = inSettings.clone(); settingsWithoutLimits.setMinFragmentLength(null); settingsWithoutLimits.setMaxFragmentLength(null); settingsWithoutLimits.setMinFragmentMass(null); settingsWithoutLimits.setMaxFragmentMass(null); List rawFrags = simpleDigestion(inProtein, settingsWithoutLimits); // For ea. x-link, bind it to the combinations of raw fragments List orderedXLinks = new ArrayList<>(inProtein.getXLinks()); Collections.sort(orderedXLinks); for (ProteinXLink xlink : orderedXLinks) { List donorFragIndexes = new ArrayList<>(); List acceptorFragIndexes = new ArrayList<>(); Set fragsToAdd = new HashSet<>(); Set fragsIndexesToRemove = new HashSet<>(); for (int fragIdx = 0; fragIdx < rawFrags.size(); fragIdx++) { DigestFragment frag = rawFrags.get(fragIdx); DigestFragment donorChain = null; if (frag.hasChains()) { for (DigestFragment fragChain : (List) (Object) frag.getChains()) { if (fragChain.getID().startsWith(xlink.getDonorChainId() + ":")) { donorChain = fragChain; break; } } } else if (xlink.getDonorChainId().equals(frag.getID())) { donorChain = frag; } boolean hasDonorSite = false; if (donorChain != null && xlink.getDonorPosition() >= donorChain.getBegin() && xlink.getDonorPosition() <= donorChain.getEnd()) { // Donor site is within this frag. hasDonorSite = true; } DigestFragment acceptorChain = null; if (CollectionUtil.hasValues(frag.getChains())) { for (DigestFragment fragChain : (List) (Object) frag.getChains()) { if (fragChain.getID().startsWith(xlink.getAcceptorChainId() + ":")) { acceptorChain = fragChain; break; } } } else if (xlink.getAcceptorChainId().equals(frag.getID())) { acceptorChain = frag; } boolean hasAcceptorSite = false; if (acceptorChain != null && xlink.getAcceptorPosition() >= acceptorChain.getBegin() && xlink.getAcceptorPosition() <= acceptorChain.getEnd()) { // Acceptor site is within this frag. hasAcceptorSite = true; } if (hasDonorSite && hasAcceptorSite) { DigestFragment linkedFrag; if (CollectionUtil.hasValues(frag.getChains())) { linkedFrag = frag; } else { linkedFrag = new DigestFragment(); linkedFrag.addChain(frag.clone()); // Add it back to the pool fragsToAdd.add(linkedFrag); fragsIndexesToRemove.add(fragIdx); } String donorFragId = null; String acceptorFragId = null; for (DigestFragment fragChain : (List) (Object) linkedFrag.getChains()) { if (fragChain.getID().startsWith(xlink.getDonorChainId() + ":")) { donorFragId = fragChain.getID(); } if (fragChain.getID().startsWith(xlink.getAcceptorChainId() + ":")) { acceptorFragId = fragChain.getID(); } } xlink = xlink.clone(); xlink.setDonorChainId(donorFragId); xlink.setAcceptorChainId(acceptorFragId); linkedFrag.addXLink(xlink); } else { if (hasDonorSite) donorFragIndexes.add(fragIdx); if (hasAcceptorSite) acceptorFragIndexes.add(fragIdx); } } // Link the donors & acceptors in all possible combinations for (int donorFragIndex : donorFragIndexes) { DigestFragment donorFrag = rawFrags.get(donorFragIndex); for (int acceptorFragIndex : acceptorFragIndexes) { DigestFragment acceptorFrag = rawFrags.get(acceptorFragIndex); DigestFragment linkedFrag; if (CollectionUtil.hasValues(donorFrag.getChains())) { linkedFrag = (DigestFragment) donorFrag.clone(); } else { linkedFrag = new DigestFragment(); donorFrag = (DigestFragment) donorFrag.clone(); donorFrag.setID(donorFrag.getID() + ":" + donorFrag.getBeginFragIndex() + (donorFrag.getEndFragIndex() == donorFrag.getBeginFragIndex() ? "" : ".." + donorFrag.getEndFragIndex())); linkedFrag.addChain(donorFrag); } // Add the acceptor chain (if it isn't already present) DigestFragment acceptorChain = (DigestFragment) linkedFrag.getChain(xlink.getAcceptorChainId()); if (null == acceptorChain || xlink.getAcceptorPosition() < acceptorChain.getBegin() || xlink.getAcceptorPosition() > acceptorChain.getEnd()) { acceptorFrag = (DigestFragment) acceptorFrag.clone(); acceptorFrag.setID(acceptorFrag.getID() + ":" + acceptorFrag.getBeginFragIndex() + (acceptorFrag.getEndFragIndex() == acceptorFrag.getBeginFragIndex() ? "" : ".." + acceptorFrag.getEndFragIndex())); linkedFrag.addChain(acceptorFrag); } xlink = xlink.clone(); xlink.setDonorChainId(donorFrag.getID()); xlink.setAcceptorChainId(acceptorFrag.getID()); linkedFrag.addXLink(xlink); // Add it back to the pool rawFrags.add(linkedFrag); } } // Now remove the raw frags that were linked. // We have to remove them in reverse order for all the indices to be valid fragsIndexesToRemove.addAll(donorFragIndexes); fragsIndexesToRemove.addAll(acceptorFragIndexes); List orderedFragIndices = new ArrayList<>(fragsIndexesToRemove); Collections.sort(orderedFragIndices); Collections.reverse(orderedFragIndices); for (int fragIndex : orderedFragIndices) { rawFrags.remove(fragIndex); } rawFrags.addAll(fragsToAdd); } // Apply the DigestSettings limits. for (int i = 0; i < rawFrags.size(); i++) { if (! inSettings.meetsCriteria(rawFrags.get(i))) { rawFrags.remove(i--); } } return rawFrags; } //########################################################################## // INNER CLASS //########################################################################## protected class SlidingWindow { private String mChainId; private AminoAcidSet mAminoAcidSet; private StringBuilder[] mFrags; private DigestSettings mDigestSettings; private int mIndex = 1; private int mLength; private int mCurrentFragIndex = 0; //----------------------------------------------------------------------- public SlidingWindow(String inChainId, AminoAcidSet inAASet, DigestSettings inSettings) { mChainId = inChainId; mAminoAcidSet = inAASet; mDigestSettings = inSettings; mFrags = new StringBuilder[inSettings.getMaxMissedCleavages() + 3]; mFrags[0] = new StringBuilder(); mFrags[1] = new StringBuilder(); mLength = 2; } //----------------------------------------------------------------------- public List push(String inFrag) { List outFrags = null; if (mLength < mFrags.length) { // Still filling the window mFrags[mLength++] = new StringBuilder(inFrag); } else { StringBuilder tmp = mFrags[0]; for (int i = 1; i < mFrags.length; i++) { mFrags[i - 1] = mFrags[i]; } mFrags[mFrags.length - 1] = tmp; mFrags[mFrags.length - 1].setLength(0); mFrags[mFrags.length - 1].append(inFrag); mIndex += mFrags[0].length(); mCurrentFragIndex++; List frags = evaluateCurrentFrag(); if (frags != null) { outFrags = new ArrayList<>(frags); } } return outFrags; } //----------------------------------------------------------------------- public List lastPush(String inFrag) { List outFrags = null; List frags = push(inFrag); if (frags != null) { outFrags = new ArrayList<>(frags); } for (int i = 0; i <= mDigestSettings.getMaxMissedCleavages(); i++) { frags = push(""); if (frags != null) { if (null == outFrags) { outFrags = new ArrayList<>(frags.size()); } outFrags.addAll(frags); } } return outFrags; } //----------------------------------------------------------------------- private List evaluateCurrentFrag() { List fragments = null; int maxMissedCleavages = 0; if (mDigestSettings != null && mDigestSettings.getMaxMissedCleavages() != null) { maxMissedCleavages = mDigestSettings.getMaxMissedCleavages(); } if (null == mDigestSettings || null == mDigestSettings.getMaxFragmentLength() || mFrags[1].length() <= mDigestSettings.getMaxFragmentLength()) { StringBuilder frag = new StringBuilder(); for (int i = 1; i < mFrags.length - 1; i++) { if (mFrags[i].length() == 0) break; frag.append(mFrags[i]); if (null == mDigestSettings || null == mDigestSettings.getMinFragmentLength() || frag.length() >= mDigestSettings.getMinFragmentLength()) { // Don't get too big if (mDigestSettings != null && mDigestSettings.getMaxFragmentLength() != null && frag.length() > mDigestSettings.getMaxFragmentLength()) { break; } DigestFragment digestFrag = allocateNewDigestFragment(); digestFrag.setSequence(frag.toString()); digestFrag.setBegin(mIndex); digestFrag.setEnd(mIndex + frag.length() - 1); digestFrag.setNumUncleavedSites(i - 1); digestFrag.setBeginFragIndex(mCurrentFragIndex); digestFrag.setEndFragIndex(mCurrentFragIndex + (i - 1)); if (null == fragments) { fragments = new ArrayList<>(maxMissedCleavages); } fragments.add(digestFrag); } } } return fragments; } //----------------------------------------------------------------------- private DigestFragment allocateNewDigestFragment() { DigestFragment frag = new DigestFragment(); frag.setID(mChainId); frag.setAminoAcidSet(mAminoAcidSet); return frag; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy