All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.bio.seq.alignment.PairwiseSeqAlignment Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.bio.seq.alignment;

import java.util.HashSet;
import java.util.Set;

import com.hfg.bio.HfgBioXML;
import com.hfg.bio.seq.BioSequence;
import com.hfg.bio.seq.alignment.matrix.SubstitutionMatrix;
import com.hfg.exception.ProgrammingException;
import com.hfg.util.CompareUtil;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;
import com.hfg.xml.XMLNode;
import com.hfg.xml.XMLTag;

//------------------------------------------------------------------------------
/**
 Pairwise sequence alignment container.
 
@author J. Alex Taylor, hairyfatguy.com
*/ //------------------------------------------------------------------------------ // com.hfg XML/HTML Coding Library // // This library is free software; you can redistribute it and/or // modify it under the terms of the GNU Lesser General Public // License as published by the Free Software Foundation; either // version 2.1 of the License, or (at your option) any later version. // // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU // Lesser General Public License for more details. // // You should have received a copy of the GNU Lesser General Public // License along with this library; if not, write to the Free Software // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA // // J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com // [email protected] //------------------------------------------------------------------------------ public class PairwiseSeqAlignment implements Cloneable, Comparable { private PairwiseSettings mSettings; private AlignedQuery mAlignedQuery; private AlignedSubject mAlignedSubject; private Integer mAlignmentLength; private AlignmentScoring mScoring = new AlignmentScoring(); //########################################################################### // CONSTRUCTORS //########################################################################### //--------------------------------------------------------------------------- public PairwiseSeqAlignment(AlignedQuery inAlignedQuery, AlignedSubject inAlignedSubject) { mAlignedQuery = inAlignedQuery; mAlignedSubject = inAlignedSubject; } //--------------------------------------------------------------------------- public PairwiseSeqAlignment(XMLNode inXMLNode) { if (! inXMLNode.getTagName().equals(HfgBioXML.ALIGNMENT)) { throw new RuntimeException("Cannot construct a " + this.getClass().getSimpleName() + " from a " + inXMLNode.getTagName() + " tag!"); } mAlignedQuery = new AlignedQuery(inXMLNode.getRequiredSubtagByName(HfgBioXML.ALIGNED_QUERY)); mAlignedSubject = new AlignedSubject(inXMLNode.getRequiredSubtagByName(HfgBioXML.ALIGNED_SUBJECT)); String rawScoreString = inXMLNode.getAttributeValue(HfgBioXML.SCORE_ATT); if (StringUtil.isSet(rawScoreString)) { setScore(Float.parseFloat(rawScoreString)); } } //########################################################################### // PUBLIC METHODS //########################################################################### //--------------------------------------------------------------------------- @Override public PairwiseSeqAlignment clone() { PairwiseSeqAlignment cloneObj; try { cloneObj = (PairwiseSeqAlignment) super.clone(); } catch (Exception e) { throw new ProgrammingException(e); } if (mSettings != null) { cloneObj.mSettings = mSettings.clone(); } if (mAlignedQuery != null) { cloneObj.mAlignedQuery = mAlignedQuery.clone(); } if (mAlignedSubject != null) { cloneObj.mAlignedSubject = mAlignedSubject.clone(); } return cloneObj; } //-------------------------------------------------------------------------- @Override public boolean equals(Object inObj2) { return (inObj2 != null && inObj2 instanceof PairwiseSeqAlignment && 0 == compareTo((PairwiseSeqAlignment) inObj2)); } //-------------------------------------------------------------------------- @Override public int hashCode() { int hashCode = 0; if (getScore() != null) { hashCode += getScore().hashCode(); } if (getNumIdentities() != null) { hashCode += 31 * getNumIdentities().hashCode(); } return hashCode; } //-------------------------------------------------------------------------- @Override public int compareTo(PairwiseSeqAlignment inObj2) { int score = - CompareUtil.compare(getScore(), inObj2.getScore()); if (0 == score) { score = - CompareUtil.compare(getNumIdentities(), inObj2.getNumIdentities()); } return score; } //--------------------------------------------------------------------------- public PairwiseSeqAlignment setSettings(PairwiseSettings inValue) { mSettings = inValue; return this; } //--------------------------------------------------------------------------- public PairwiseSettings getSettings() { return mSettings; } //--------------------------------------------------------------------------- public void clearCachedValues() { mAlignmentLength = null; mScoring.clear(); } //--------------------------------------------------------------------------- public PairwiseSeqAlignment setAlignedQuery(AlignedQuery inValue) { mAlignedQuery = inValue; clearCachedValues(); return this; } //--------------------------------------------------------------------------- public AlignedQuery getAlignedQuery() { return mAlignedQuery; } //--------------------------------------------------------------------------- public PairwiseSeqAlignment setAlignedSubject(AlignedSubject inValue) { mAlignedSubject = inValue; clearCachedValues(); return this; } //--------------------------------------------------------------------------- public AlignedSubject getAlignedSubject() { return mAlignedSubject; } //--------------------------------------------------------------------------- public Integer length() { if (null == mAlignmentLength) { mAlignmentLength = getAlignedQuery().getAlignedSeq().length(); } return mAlignmentLength; } //--------------------------------------------------------------------------- /** Returns the alignment scoring object. * @return the alignment scoring object */ public AlignmentScoring getScoring() { ensureScoringIdentityStatsAreCalculated(); return mScoring; } //--------------------------------------------------------------------------- public PairwiseSeqAlignment setScore(Float inValue) { mScoring.setScore(inValue); return this; } //--------------------------------------------------------------------------- public Float getScore() { return mScoring.getScore(); } //--------------------------------------------------------------------------- public Integer getNumIdentities() { ensureScoringIdentityStatsAreCalculated(); return mScoring.getNumIdentities(); } //--------------------------------------------------------------------------- public Integer getComparisonLength() { ensureScoringIdentityStatsAreCalculated(); return mScoring.getComparisonLength(); } //--------------------------------------------------------------------------- /** Doesn't penalize sequence's terminal gaps. @return the adjusted comparison length */ public Integer getAdjustedComparisonLength() { ensureScoringIdentityStatsAreCalculated(); return mScoring.getAdjustedComparisonLength(); } //--------------------------------------------------------------------------- public Float getPctIdentity() { ensureScoringIdentityStatsAreCalculated(); return mScoring.getPctIdentity(); } //--------------------------------------------------------------------------- /** Doesn't penalize sequence's terminal gaps. @return the adjusted percent identity */ public Float getAdjustedPctIdentity() { ensureScoringIdentityStatsAreCalculated(); return mScoring.getAdjustedPctIdentity(); } //--------------------------------------------------------------------------- public PairwiseSeqAlignment setEValue(Float inValue) { mScoring.setEValue(inValue); return this; } //--------------------------------------------------------------------------- public Float getEValue() { return mScoring.getEValue(); } //--------------------------------------------------------------------------- public PairwiseSeqAlignment setPValue(Float inValue) { mScoring.setPValue(inValue); return this; } //--------------------------------------------------------------------------- public Float getPValue() { return mScoring.getPValue(); } //--------------------------------------------------------------------------- public PairwiseSeqAlignment setZScore(Float inValue) { mScoring.setZScore(inValue); return this; } //--------------------------------------------------------------------------- public Float getZScore() { return mScoring.getZScore(); } //--------------------------------------------------------------------------- /** Method helpful for understanding the per-residue scoring breakdown. @return ASCII display of the per-residue scoring */ public String showScoring() { SubstitutionMatrix substitutionMatrix = getSettings().getSubstitutionMatrix(getAlignedQuery().getSeq().getType()); StringBuilderPlus buffer = new StringBuilderPlus(); boolean queryGapOpen = false; boolean subjectGapOpen = false; Set queryGapExclusionIndices = new HashSet<>(); if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.QUERY, PairwiseSeqTerminus.LEFT)) { for (int i = 0; i < length(); i++) { if (getAlignedQuery().getAlignedSeq().charAt(i) != '-') { break; } queryGapExclusionIndices.add(i); } } if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.QUERY, PairwiseSeqTerminus.RIGHT)) { for (int i = length() - 1; i > 0; i--) { if (getAlignedQuery().getAlignedSeq().charAt(i) != '-') { break; } queryGapExclusionIndices.add(i); } } Set subjectGapExclusionIndices = new HashSet<>(); if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.SUBJECT, PairwiseSeqTerminus.LEFT)) { for (int i = 0; i < length(); i++) { if (getAlignedSubject().getAlignedSeq().charAt(i) != '-') { break; } subjectGapExclusionIndices.add(i); } } if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.SUBJECT, PairwiseSeqTerminus.RIGHT)) { for (int i = length() - 1; i > 0; i--) { if (getAlignedSubject().getAlignedSeq().charAt(i) != '-') { break; } subjectGapExclusionIndices.add(i); } } Set subjectGapScoreIndices = new HashSet<>(); if (getSettings().getSubjectTemplateMode()) { StringBuilderPlus origSubject = new StringBuilderPlus(getAlignedSubject().getSeq().getSequence()); for (int i = 0; i < length(); i++) { if (getAlignedSubject().getAlignedSeq().charAt(i) == '-') { if (origSubject.charAt(i) == '-') { subjectGapScoreIndices.add(i); } else { origSubject.insert(i, '-'); } } } } GapPenalties queryGapPenalties = getSettings().getGapPenalties(PairwiseSeqType.QUERY); GapPenalties subjectGapPenalties = getSettings().getGapPenalties(PairwiseSeqType.SUBJECT); float totalScore = 0; for (int i = 0; i < length(); i++) { buffer.append(String.format("%3d. ", i + 1)); char queryResidue = getAlignedQuery().getAlignedSeq().charAt(i); char subjectResidue = getAlignedSubject().getAlignedSeq().charAt(i); float score = 0; if (queryResidue != '-') { queryGapOpen = false; if (subjectResidue != '-') { subjectGapOpen = false; score = substitutionMatrix.score(queryResidue, subjectResidue); } else { if (subjectGapScoreIndices.contains(i)) { score = substitutionMatrix.score(queryResidue, subjectResidue); } else if (! subjectGapExclusionIndices.contains(i)) { score = (subjectGapOpen ? subjectGapPenalties.getExtensionPenalty() : subjectGapPenalties.getOpenPenalty()); } subjectGapOpen = true; } } else { if (subjectGapScoreIndices.contains(i)) { score = substitutionMatrix.score(queryResidue, subjectResidue); } else if (! queryGapExclusionIndices.contains(i)) { score = (queryGapOpen ? queryGapPenalties.getExtensionPenalty() : queryGapPenalties.getOpenPenalty()); } queryGapOpen = true; } buffer.append(String.format("%c %5.1f %c", queryResidue, score, subjectResidue)); buffer.appendln(); totalScore += score; } buffer.appendln(String.format("Total:%6.1f", totalScore)); buffer.appendln(); return buffer.toString(); } //--------------------------------------------------------------------------- @Override public String toString() { StringBuilderPlus buffer = new StringBuilderPlus().setDelimiter(", "); if (null == getSettings() || getSettings().getQueryAlignmentType().equals(PairwiseAlignmentType.LOCAL)) { buffer.append(String.format("Query Range: %s", getAlignedQuery().getSeqLocation().toString())); } if (null == getSettings() || getSettings().getSubjectAlignmentType().equals(PairwiseAlignmentType.LOCAL)) { buffer.delimitedAppend(String.format("Subject Range: %s", getAlignedSubject().getSeqLocation().toString())); } buffer.delimitedAppend(String.format("Score: %.1f\n", getScore())); buffer.appendln(getAlignedQuery().getAlignedSeq()); for (int i = 0; i < length(); i++) { char queryChar = getAlignedQuery().getAlignedSeq().charAt(i); buffer.append(queryChar != '-' && queryChar == getAlignedSubject().getAlignedSeq().charAt(i) ? '|' : ' '); } buffer.appendln(); buffer.appendln(getAlignedSubject().getAlignedSeq()); return buffer.toString(); } //-------------------------------------------------------------------------- public XMLNode toXMLNode() { XMLTag tag = new XMLTag(HfgBioXML.ALIGNMENT); if (getScore() != null) { tag.setAttribute(HfgBioXML.SCORE_ATT, getScore()); } tag.addSubtag(getAlignedQuery().toXMLNode()); tag.addSubtag(getAlignedSubject().toXMLNode()); return tag; } //--------------------------------------------------------------------------- public PairwiseSeqAlignment getSubalignment(int inZeroIndexedStart) { return getSubalignment(inZeroIndexedStart, length()); } //--------------------------------------------------------------------------- public PairwiseSeqAlignment getSubalignment(int inZeroIndexedStart, int inLimit) { AlignedQuery subalignedQuery = new AlignedQuery(mAlignedQuery.getSeq(), mAlignedQuery.getAlignedSeq().substring(inZeroIndexedStart, inLimit), mAlignedQuery.getLinearPosition(inZeroIndexedStart)); AlignedSubject subalignedSubject = null; if (mAlignedSubject != null) { subalignedSubject = new AlignedSubject(mAlignedSubject.getSeq(), mAlignedSubject.getAlignedSeq().substring(inZeroIndexedStart, inLimit), mAlignedSubject.getLinearPosition(inZeroIndexedStart)); } return new PairwiseSeqAlignment(subalignedQuery, subalignedSubject); } //--------------------------------------------------------------------------- /** In the case of a local query vs. a global subject, this method can be used to extend the local query in both directions to match where subject residues are present. */ public void extendAlignedQuery() { BioSequence query = getAlignedQuery().getSeq(); if (query != null) { int queryMatchStart = getAlignedQuery().getSeqLocation().getStart(); // Don't leave terminal gaps on the query when there are residues to give and the germline has a residue if (getAlignedQuery().getNTerminalGapLength() > 0 && queryMatchStart > 1) { StringBuilder queryBuffer = new StringBuilder(getAlignedQuery().getAlignedSeq()); StringBuilder germlineBuffer = new StringBuilder(getAlignedSubject().getAlignedSeq()); int queryGapSize = getAlignedQuery().getNTerminalGapLength(); int germlineGapSize = getAlignedSubject().getNTerminalGapLength(); int adjustedQueryMatchStart = queryMatchStart; if (queryGapSize > germlineGapSize) { for (int index = queryGapSize - 1; index >= 0; index--) { if (germlineBuffer.charAt(index) != '-' && '-' == queryBuffer.charAt(index)) { queryBuffer.setCharAt(index, query.residueAt(adjustedQueryMatchStart - queryMatchStart)); adjustedQueryMatchStart--; if (1 == adjustedQueryMatchStart) { break; } } } } setAlignedQuery(new AlignedQuery(query, queryBuffer, adjustedQueryMatchStart)); } if (getAlignedQuery().getCTerminalGapLength() > 0) { StringBuilder queryBuffer = new StringBuilder(getAlignedQuery().getAlignedSeq()); StringBuilder germlineBuffer = new StringBuilder(getAlignedSubject().getAlignedSeq()); int queryGapSize = getAlignedQuery().getCTerminalGapLength(); int germlineGapSize = getAlignedSubject().getCTerminalGapLength(); int matchEnd = getAlignedQuery().getSeqLocation().getEnd(); if (queryGapSize > germlineGapSize && matchEnd < query.length()) { for (int index = queryBuffer.length() - queryGapSize; index < queryBuffer.length(); index++) { if (germlineBuffer.charAt(index) != '-' && '-' == queryBuffer.charAt(index)) { queryBuffer.setCharAt(index, query.residueAt(matchEnd + 1)); matchEnd++; if (matchEnd == query.length()) { break; } } } } setAlignedQuery(new AlignedQuery(query, queryBuffer, getAlignedQuery().getSeqLocation().getStart())); } } } //--------------------------------------------------------------------------- /** Swaps the query and the subject. */ public void invert() { AlignedQuery newQuery = new AlignedQuery(getAlignedSubject().getSeq(), getAlignedSubject().getAlignedSeq(), getAlignedSubject().getSeqLocation().getStart()); AlignedSubject newSubject = new AlignedSubject(getAlignedQuery().getSeq(), getAlignedQuery().getAlignedSeq(), getAlignedQuery().getSeqLocation().getStart()); setAlignedQuery(newQuery); setAlignedSubject(newSubject); } //########################################################################### // PRIVATE METHODS //########################################################################### //--------------------------------------------------------------------------- private void ensureScoringIdentityStatsAreCalculated() { if (null == mScoring.getNumIdentities()) { mScoring.calculate(this); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy