All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.seq.ProteinDigest Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio.seq;

import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import com.hfg.bio.AminoAcidSet;
import com.hfg.bio.CTerminalGroup;
import com.hfg.bio.DigestFragment;
import com.hfg.bio.DigestSettings;
import com.hfg.bio.NTerminalGroup;
import com.hfg.bio.Protease;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;
import com.hfg.util.collection.CollectionUtil;

//------------------------------------------------------------------------------
/**
 Performs a digestion of a protein via protease(s).
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg XML/HTML Coding Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class ProteinDigest { private DigestSettings mSettings; private Protein mProtein; private Set mProteases; private static final int NUM_N_TERMINAL_POSITIONS = 6; private static final int NUM_C_TERMINAL_POSITIONS = 3; //########################################################################### // CONSTRUCTORS //########################################################################### //--------------------------------------------------------------------------- public ProteinDigest(Protein inProtein, DigestSettings inSettings) { mProtein = inProtein; mSettings = inSettings; mProteases = inSettings.getProteases(); if (! CollectionUtil.hasValues(mProteases)) { throw new RuntimeException("No protease(s) specified!"); } } //########################################################################### // PUBLIC METHODS //########################################################################### //--------------------------------------------------------------------------- public DigestSettings getSettings() { return mSettings; } //--------------------------------------------------------------------------- public Protein getProtein() { return mProtein; } //--------------------------------------------------------------------------- public List getFragments() { List outFrags; DigestSettings settings = getSettings(); if (settings.getAlkylatedCys() != null) { Protein proteinCopy = getProtein().clone(); proteinCopy.removeXLinks(ProteinXLinkType.DISULFIDE); // Reflect that the cysteines in the protein are alkylated. AminoAcidSet aaSet = new AminoAcidSet(getProtein().getAminoAcidSet()); aaSet.setMapping('c', settings.getAlkylatedCys()); aaSet.setMapping('C', settings.getAlkylatedCys()); proteinCopy.setAminoAcidSet(aaSet); // Are there other types of x-links? if (CollectionUtil.hasValues(proteinCopy.getXLinks())) { outFrags = complexDigestion(proteinCopy, settings); } else { outFrags = simpleDigestion(proteinCopy, settings); } } else { // Native digest. Deal with X-links outFrags = complexDigestion(getProtein(), settings); } /* TODO: What was this for? Set fragSet = new OrderedSet<>(outFrags); outFrags.clear(); outFrags.addAll(fragSet); */ if (settings.getUniqueSeqs()) { reduceToUniqueFragments(outFrags); } return outFrags; } //########################################################################### // PRIVATE METHODS //########################################################################### //-------------------------------------------------------------------------- public boolean isCleavageSite(CharSequence inNTerminalResidues, CharSequence inCTerminalResidues) { boolean isCleavageSite = false; for (Protease protease : mProteases) { isCleavageSite = protease.isCleavageSite(inNTerminalResidues, inCTerminalResidues); if (isCleavageSite) break; } return isCleavageSite; } //-------------------------------------------------------------------------- private void addPrecedingAndTrailingResidues(List inFragments, Protein inProtein) { for (DigestFragment fragment : inFragments) { int position = fragment.getBegin() - 1; if (position > 0) { fragment.setPrecedingResidue(inProtein.residueAt(position)); } position = fragment.getEnd() + 1; if (position <= inProtein.length()) { fragment.setTrailingResidue(inProtein.residueAt(position)); } } } //-------------------------------------------------------------------------- private List simpleDigestion(Protein inProtein, DigestSettings inSettings) { List outFrags = new ArrayList<>(); if (CollectionUtil.hasValues(inProtein.getChains())) { for (Protein chain : inProtein.getChains()) { outFrags.addAll(simpleDigestion(chain, inSettings)); } } else { AminoAcidSet aaSet; if (inSettings.getAlkylatedCys() != null) { aaSet = new AminoAcidSet(inProtein.getAminoAcidSet()); aaSet.setMapping('c', inSettings.getAlkylatedCys()); aaSet.setMapping('C', inSettings.getAlkylatedCys()); } else { aaSet = inProtein.getAminoAcidSet().clone(); } // We don't want for custom N- or C-terminal groups to get used on anything but the terminal peptides NTerminalGroup nTerminalGroup = inProtein.getAminoAcidSet().getNTerminalGroup(); if (! nTerminalGroup.equals(NTerminalGroup.UNMODIFIED_N_TERMINUS)) { aaSet.setNTerminalGroup(NTerminalGroup.UNMODIFIED_N_TERMINUS); } CTerminalGroup cTerminalGroup = inProtein.getAminoAcidSet().getCTerminalGroup(); if (! cTerminalGroup.equals(CTerminalGroup.UNMODIFIED_C_TERMINUS)) { aaSet.setCTerminalGroup(CTerminalGroup.UNMODIFIED_C_TERMINUS); } try { Reader seqReader = null; try { seqReader = inProtein.getSequenceReader(); SlidingWindow fragWindow = new SlidingWindow(inProtein.getID(), aaSet, inSettings); int bufferSize = NUM_N_TERMINAL_POSITIONS + NUM_C_TERMINAL_POSITIONS; int[] site = new int[bufferSize]; for (int i = 0; i < bufferSize; i++) { site[i] = -1; } StringBuilder nTerminalResidues = new StringBuilder(); StringBuilder cTerminalResidues = new StringBuilder(); int residue = seqReader.read(); if (residue != -1) { StringBuilder frag = new StringBuilder((char) residue + ""); site[bufferSize - 1] = residue; // Drops in on the right side and slides left while ((residue = seqReader.read()) != -1) { for (int i = 0; i < bufferSize - 1; i++) { site[i] = site[i + 1]; } site[bufferSize - 1] = residue; if (-1 == site[NUM_N_TERMINAL_POSITIONS - 1]) { continue; } nTerminalResidues.setLength(0); for (int i = 0; i < NUM_N_TERMINAL_POSITIONS; i++) { nTerminalResidues.append((char) site[i]); } cTerminalResidues.setLength(0); for (int i = NUM_N_TERMINAL_POSITIONS; i < bufferSize; i++) { cTerminalResidues.append((char) site[i]); } if (isCleavageSite(nTerminalResidues, cTerminalResidues)) { List fragments = fragWindow.push(frag.toString()); if (fragments != null) { outFrags.addAll(fragments); } frag.setLength(0); frag.append((char)site[NUM_N_TERMINAL_POSITIONS]); } else { frag.append((char)site[NUM_N_TERMINAL_POSITIONS]); } } // Process residues remaining in the site buffer while (hasUnprocessedBufferContent(site)) { for (int i = 0; i < bufferSize - 1; i++) { site[i] = site[i + 1]; } site[bufferSize - 1] = -1; if (-1 == site[NUM_N_TERMINAL_POSITIONS - 1] || -1 == site[NUM_N_TERMINAL_POSITIONS]) { continue; } nTerminalResidues.setLength(0); for (int i = 0; i < NUM_N_TERMINAL_POSITIONS; i++) { int aa = site[i]; nTerminalResidues.append(aa != -1 ? (char) aa : ""); } cTerminalResidues.setLength(0); for (int i = NUM_N_TERMINAL_POSITIONS; i < bufferSize; i++) { int aa = site[i]; cTerminalResidues.append(aa != -1 ? (char) aa : ""); } if (isCleavageSite(nTerminalResidues, cTerminalResidues)) { List fragments = fragWindow.push(frag.toString()); if (fragments != null) { outFrags.addAll(fragments); } frag.setLength(0); frag.append((char)site[NUM_N_TERMINAL_POSITIONS]); } else { frag.append((char)site[NUM_N_TERMINAL_POSITIONS]); } } List fragments = fragWindow.lastPush(frag.toString()); if (fragments != null) { outFrags.addAll(fragments); } } addPrecedingAndTrailingResidues(outFrags, inProtein); // Restore N-terminal group if (! nTerminalGroup.equals(NTerminalGroup.UNMODIFIED_N_TERMINUS)) { for (DigestFragment frag : outFrags) { if (1 == frag.getBegin()) { frag.setAminoAcidSet(frag.getAminoAcidSet().clone().setNTerminalGroup(nTerminalGroup)); } } } // Restore C-terminal group if (! cTerminalGroup.equals(CTerminalGroup.UNMODIFIED_C_TERMINUS)) { for (DigestFragment frag : outFrags) { if (inProtein.length() == frag.getEnd()) { frag.setAminoAcidSet(frag.getAminoAcidSet().clone().setCTerminalGroup(cTerminalGroup)); } } } } finally { if (seqReader != null) seqReader.close(); } } catch (IOException e) { throw new RuntimeException(e); } } // Apply the DigestSettings limits. for (int i = 0; i < outFrags.size(); i++) { if (! inSettings.meetsCriteria(outFrags.get(i))) { outFrags.remove(i--); } } return outFrags; } //-------------------------------------------------------------------------- private static boolean hasUnprocessedBufferContent(int[] inBuffer) { boolean hasUnprocessedContent = false; for (int i = NUM_N_TERMINAL_POSITIONS - 1; i < NUM_N_TERMINAL_POSITIONS + NUM_C_TERMINAL_POSITIONS; i++) { if (inBuffer[i] != -1) { hasUnprocessedContent = true; break; } } return hasUnprocessedContent; } //-------------------------------------------------------------------------- private List complexDigestion(Protein inProtein, DigestSettings inSettings) { // Initially disable limits when finding fragments. // We'll apply the desired settings after all the fragments have been linked up. DigestSettings settingsWithoutLimits = inSettings.clone(); settingsWithoutLimits.setMinFragmentLength(null); settingsWithoutLimits.setMaxFragmentLength(null); settingsWithoutLimits.setMinFragmentMass(null); settingsWithoutLimits.setMaxFragmentMass(null); List rawFrags = simpleDigestion(inProtein, settingsWithoutLimits); if (CollectionUtil.hasValues(inProtein.getXLinks())) { // For ea. x-link, bind it to the combinations of raw fragments List orderedXLinks = new ArrayList<>(inProtein.getXLinks()); Collections.sort(orderedXLinks); for (ProteinXLink xlink : orderedXLinks) { List donorFragIndexes = new ArrayList<>(5); List acceptorFragIndexes = new ArrayList<>(5); Set fragsToAdd = new HashSet<>(10); Set fragsIndexesToRemove = new HashSet<>(10); for (int fragIdx = 0; fragIdx < rawFrags.size(); fragIdx++) { DigestFragment frag = rawFrags.get(fragIdx); boolean hasDonorSite = containsDonorSite(frag, xlink); boolean hasAcceptorSite = containsAcceptorSite(frag, xlink); if (hasDonorSite && hasAcceptorSite) { // This X-link is fully contained within this fragment DigestFragment linkedFrag; if (CollectionUtil.hasValues(frag.getChains())) { linkedFrag = frag; } else { linkedFrag = new DigestFragment(); linkedFrag.addChain(frag.clone()); // Add it back to the pool fragsToAdd.add(linkedFrag); // Flag the original unlinked frag for removal fragsIndexesToRemove.add(fragIdx); } String donorFragId = getFragIdForDonorSite(linkedFrag, xlink); String acceptorFragId = getFragIdForAcceptorSite(linkedFrag, xlink); linkedFrag.addXLink(xlink.clone() .setDonorChainId(donorFragId) .setAcceptorChainId(acceptorFragId)); } else { if (hasDonorSite) donorFragIndexes.add(fragIdx); if (hasAcceptorSite) acceptorFragIndexes.add(fragIdx); } } // Link the donors & acceptors in all possible combinations for (int donorFragIndex : donorFragIndexes) { DigestFragment donorFrag = rawFrags.get(donorFragIndex); String donorFragId = null; if (CollectionUtil.hasValues(donorFrag.getChains())) { donorFragId = getFragIdForDonorSite(donorFrag, xlink); } else { donorFragId = donorFrag.getID(); } for (int acceptorFragIndex : acceptorFragIndexes) { DigestFragment acceptorFrag = rawFrags.get(acceptorFragIndex); String acceptorFragId = null; DigestFragment linkedFrag; if (CollectionUtil.hasValues(donorFrag.getChains())) { linkedFrag = donorFrag.clone(); } else { linkedFrag = new DigestFragment(); donorFrag = (DigestFragment) donorFrag.clone() .setID(donorFrag.getID() + ":" + donorFrag.getBeginFragIndex() + (donorFrag.getEndFragIndex() == donorFrag.getBeginFragIndex() ? "" : ".." + donorFrag.getEndFragIndex())); donorFragId = donorFrag.getID(); linkedFrag.addChain(donorFrag); } // Add the acceptor chain (if it isn't already present) acceptorFragId = getFragIdForAcceptorSite(linkedFrag, xlink); if (! StringUtil.isSet(acceptorFragId)) { acceptorFrag = (DigestFragment) acceptorFrag.clone() .setID(acceptorFrag.getID() + ":" + acceptorFrag.getBeginFragIndex() + (acceptorFrag.getEndFragIndex() == acceptorFrag.getBeginFragIndex() ? "" : ".." + acceptorFrag.getEndFragIndex())); acceptorFragId = getFragIdForAcceptorSite(acceptorFrag, xlink); if (acceptorFrag.hasChains()) { linkedFrag.addChains(acceptorFrag.getChains()); if (CollectionUtil.hasValues(acceptorFrag.getXLinks())) { for (ProteinXLink acceptorXlink : acceptorFrag.getXLinks()) { linkedFrag.addXLink(acceptorXlink); } } } else { linkedFrag.addChain(acceptorFrag); } } linkedFrag.addXLink(xlink.clone() .setDonorChainId(donorFragId) .setAcceptorChainId(acceptorFragId)); // Add it back to the pool rawFrags.add(linkedFrag); } } // Now remove the raw frags that were linked. // We have to remove them in reverse order for all the indices to be valid fragsIndexesToRemove.addAll(donorFragIndexes); fragsIndexesToRemove.addAll(acceptorFragIndexes); if (CollectionUtil.hasValues(fragsIndexesToRemove)) { List orderedFragIndices = new ArrayList<>(fragsIndexesToRemove); Collections.sort(orderedFragIndices); Collections.reverse(orderedFragIndices); for (int fragIndex : orderedFragIndices) { rawFrags.remove(fragIndex); } } rawFrags.addAll(fragsToAdd); } } // Apply the DigestSettings limits. for (int i = 0; i < rawFrags.size(); i++) { if (! inSettings.meetsCriteria(rawFrags.get(i))) { rawFrags.remove(i--); } } return rawFrags; } //-------------------------------------------------------------------------- private boolean containsDonorSite(DigestFragment inFrag, ProteinXLink inXLink) { DigestFragment donorFrag = getFragWithSite(inFrag, new SeqLocation(inXLink.getDonorPosition(), inXLink.getDonorPosition()).setChainId(inXLink.getDonorChainId())); return (donorFrag != null); } //-------------------------------------------------------------------------- private boolean containsAcceptorSite(DigestFragment inFrag, ProteinXLink inXLink) { DigestFragment acceptorFrag = getFragWithSite(inFrag, new SeqLocation(inXLink.getAcceptorPosition(), inXLink.getAcceptorPosition()).setChainId(inXLink.getAcceptorChainId())); return (acceptorFrag != null); } //-------------------------------------------------------------------------- private String getFragIdForDonorSite(DigestFragment inFrag, ProteinXLink inXLink) { DigestFragment donorFrag = getFragWithSite(inFrag, new SeqLocation(inXLink.getDonorPosition(), inXLink.getDonorPosition()).setChainId(inXLink.getDonorChainId())); return (donorFrag != null ? donorFrag.getID() : null); } //-------------------------------------------------------------------------- private String getFragIdForAcceptorSite(DigestFragment inFrag, ProteinXLink inXLink) { DigestFragment acceptorFrag = getFragWithSite(inFrag, new SeqLocation(inXLink.getAcceptorPosition(), inXLink.getAcceptorPosition()).setChainId(inXLink.getAcceptorChainId())); return (acceptorFrag != null ? acceptorFrag.getID() : null); } //-------------------------------------------------------------------------- // Recursive function needed to unroll complex fragments. private DigestFragment getFragWithSite(DigestFragment inFrag, SeqLocation inLoc) { DigestFragment fragWithSite = null; if (CollectionUtil.hasValues(inFrag.getChains())) { for (DigestFragment fragChain : (List) (Object) inFrag.getChains()) { fragWithSite = getFragWithSite(fragChain, inLoc); if (fragWithSite != null) { break; } } } else if (inFrag.getSrcChainId().equals(inLoc.getChainId()) && inLoc.getStart() >= inFrag.getBegin() && inLoc.getStart() <= inFrag.getEnd()) { fragWithSite = inFrag; } return fragWithSite; } //-------------------------------------------------------------------------- // TODO: Get this to work for complex digests. private static void reduceToUniqueFragments(List inFrags) { Map uniqueMap = new HashMap<>(inFrags.size()); for (int i = 0; i < inFrags.size(); i++) { DigestFragment frag = inFrags.get(i); String seqString; if (frag.hasChains()) { StringBuilderPlus buffer = new StringBuilderPlus().setDelimiter("/"); for (DigestFragment chain : (List) (Object) frag.getChains()) { buffer.delimitedAppend(chain.getSequence()); } seqString = buffer.toString(); } else { seqString = frag.getSequence(); } DigestFragment firstFrag = uniqueMap.get(seqString); if (null == firstFrag) { uniqueMap.put(seqString, frag); } else { firstFrag.addIdenticalFragment(frag); inFrags.remove(i--); } } } //########################################################################## // INNER CLASS //########################################################################## protected class SlidingWindow { private String mChainId; private AminoAcidSet mAminoAcidSet; private StringBuilder[] mFrags; private DigestSettings mDigestSettings; private int mIndex = 1; private int mLength; private int mCurrentFragIndex = 0; //----------------------------------------------------------------------- public SlidingWindow(String inChainId, AminoAcidSet inAASet, DigestSettings inSettings) { mChainId = inChainId; mAminoAcidSet = inAASet; mDigestSettings = inSettings; mFrags = new StringBuilder[inSettings.getMaxMissedCleavages() + 3]; mFrags[0] = new StringBuilder(); mFrags[1] = new StringBuilder(); mLength = 2; } //----------------------------------------------------------------------- public List push(String inFrag) { List outFrags = null; if (mLength < mFrags.length) { // Still filling the window mFrags[mLength++] = new StringBuilder(inFrag); } else { StringBuilder tmp = mFrags[0]; for (int i = 1; i < mFrags.length; i++) { mFrags[i - 1] = mFrags[i]; } mFrags[mFrags.length - 1] = tmp; mFrags[mFrags.length - 1].setLength(0); mFrags[mFrags.length - 1].append(inFrag); mIndex += mFrags[0].length(); mCurrentFragIndex++; List frags = evaluateCurrentFrag(); if (frags != null) { outFrags = new ArrayList<>(frags); } } return outFrags; } //----------------------------------------------------------------------- public List lastPush(String inFrag) { List outFrags = null; List frags = push(inFrag); if (frags != null) { outFrags = new ArrayList<>(frags); } for (int i = 0; i <= mDigestSettings.getMaxMissedCleavages(); i++) { frags = push(""); if (frags != null) { if (null == outFrags) { outFrags = new ArrayList<>(frags.size()); } outFrags.addAll(frags); } } return outFrags; } //----------------------------------------------------------------------- private List evaluateCurrentFrag() { List fragments = null; int maxMissedCleavages = 0; if (mDigestSettings != null && mDigestSettings.getMaxMissedCleavages() != null) { maxMissedCleavages = mDigestSettings.getMaxMissedCleavages(); } if (null == mDigestSettings || null == mDigestSettings.getMaxFragmentLength() || mFrags[1].length() <= mDigestSettings.getMaxFragmentLength()) { StringBuilder frag = new StringBuilder(); for (int i = 1; i < mFrags.length - 1; i++) { if (mFrags[i].length() == 0) break; frag.append(mFrags[i]); if (null == mDigestSettings || null == mDigestSettings.getMinFragmentLength() || frag.length() >= mDigestSettings.getMinFragmentLength()) { // Don't get too big if (mDigestSettings != null && mDigestSettings.getMaxFragmentLength() != null && frag.length() > mDigestSettings.getMaxFragmentLength()) { break; } DigestFragment digestFrag = allocateNewDigestFragment(); digestFrag.setSequence(frag.toString()); digestFrag.setBegin(mIndex); digestFrag.setEnd(mIndex + frag.length() - 1); digestFrag.setNumUncleavedSites(i - 1); digestFrag.setBeginFragIndex(mCurrentFragIndex); digestFrag.setEndFragIndex(mCurrentFragIndex + (i - 1)); if (null == fragments) { fragments = new ArrayList<>(maxMissedCleavages); } fragments.add(digestFrag); } } } return fragments; } //----------------------------------------------------------------------- private DigestFragment allocateNewDigestFragment() { DigestFragment frag = new DigestFragment(); frag.setID(mChainId); frag.setSrcChainId(mChainId); frag.setAminoAcidSet(mAminoAcidSet); return frag; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy