com.hfg.bio.seq.alignment.PairwiseSeqAlignment Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of com_hfg Show documentation
Show all versions of com_hfg Show documentation
com.hfg xml, html, svg, and bioinformatics utility library
package com.hfg.bio.seq.alignment;
import java.util.HashSet;
import java.util.Set;
import com.hfg.bio.HfgBioXML;
import com.hfg.bio.seq.BioSequence;
import com.hfg.bio.seq.alignment.matrix.SubstitutionMatrix;
import com.hfg.exception.ProgrammingException;
import com.hfg.util.CompareUtil;
import com.hfg.util.StringBuilderPlus;
import com.hfg.util.StringUtil;
import com.hfg.xml.XMLNode;
import com.hfg.xml.XMLTag;
//------------------------------------------------------------------------------
/**
Pairwise sequence alignment container.
@author J. Alex Taylor, hairyfatguy.com
*/
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------
public class PairwiseSeqAlignment implements Cloneable, Comparable
{
private PairwiseSettings mSettings;
private AlignedQuery mAlignedQuery;
private AlignedSubject mAlignedSubject;
private Integer mAlignmentLength;
private AlignmentScoring mScoring = new AlignmentScoring();
//###########################################################################
// CONSTRUCTORS
//###########################################################################
//---------------------------------------------------------------------------
public PairwiseSeqAlignment(AlignedQuery inAlignedQuery, AlignedSubject inAlignedSubject)
{
mAlignedQuery = inAlignedQuery;
mAlignedSubject = inAlignedSubject;
}
//---------------------------------------------------------------------------
public PairwiseSeqAlignment(XMLNode inXMLNode)
{
if (! inXMLNode.getTagName().equals(HfgBioXML.ALIGNMENT))
{
throw new RuntimeException("Cannot construct a " + this.getClass().getSimpleName() + " from a " + inXMLNode.getTagName() + " tag!");
}
mAlignedQuery = new AlignedQuery(inXMLNode.getRequiredSubtagByName(HfgBioXML.ALIGNED_QUERY));
mAlignedSubject = new AlignedSubject(inXMLNode.getRequiredSubtagByName(HfgBioXML.ALIGNED_SUBJECT));
String rawScoreString = inXMLNode.getAttributeValue(HfgBioXML.SCORE_ATT);
if (StringUtil.isSet(rawScoreString))
{
setScore(Float.parseFloat(rawScoreString));
}
}
//###########################################################################
// PUBLIC METHODS
//###########################################################################
//---------------------------------------------------------------------------
@Override
public PairwiseSeqAlignment clone()
{
PairwiseSeqAlignment cloneObj;
try
{
cloneObj = (PairwiseSeqAlignment) super.clone();
}
catch (Exception e)
{
throw new ProgrammingException(e);
}
if (mSettings != null)
{
cloneObj.mSettings = mSettings.clone();
}
if (mAlignedQuery != null)
{
cloneObj.mAlignedQuery = mAlignedQuery.clone();
}
if (mAlignedSubject != null)
{
cloneObj.mAlignedSubject = mAlignedSubject.clone();
}
return cloneObj;
}
//--------------------------------------------------------------------------
@Override
public boolean equals(Object inObj2)
{
return (inObj2 != null
&& inObj2 instanceof PairwiseSeqAlignment
&& 0 == compareTo((PairwiseSeqAlignment) inObj2));
}
//--------------------------------------------------------------------------
@Override
public int hashCode()
{
int hashCode = 0;
if (getScore() != null)
{
hashCode += getScore().hashCode();
}
if (getNumIdentities() != null)
{
hashCode += 31 * getNumIdentities().hashCode();
}
return hashCode;
}
//--------------------------------------------------------------------------
@Override
public int compareTo(PairwiseSeqAlignment inObj2)
{
int score = - CompareUtil.compare(getScore(), inObj2.getScore());
if (0 == score)
{
score = - CompareUtil.compare(getNumIdentities(), inObj2.getNumIdentities());
}
return score;
}
//---------------------------------------------------------------------------
public PairwiseSeqAlignment setSettings(PairwiseSettings inValue)
{
mSettings = inValue;
return this;
}
//---------------------------------------------------------------------------
public PairwiseSettings getSettings()
{
return mSettings;
}
//---------------------------------------------------------------------------
public void clearCachedValues()
{
mAlignmentLength = null;
mScoring.clear();
}
//---------------------------------------------------------------------------
public PairwiseSeqAlignment setAlignedQuery(AlignedQuery inValue)
{
mAlignedQuery = inValue;
clearCachedValues();
return this;
}
//---------------------------------------------------------------------------
public AlignedQuery getAlignedQuery()
{
return mAlignedQuery;
}
//---------------------------------------------------------------------------
public PairwiseSeqAlignment setAlignedSubject(AlignedSubject inValue)
{
mAlignedSubject = inValue;
clearCachedValues();
return this;
}
//---------------------------------------------------------------------------
public AlignedSubject getAlignedSubject()
{
return mAlignedSubject;
}
//---------------------------------------------------------------------------
public Integer length()
{
if (null == mAlignmentLength)
{
mAlignmentLength = getAlignedQuery().getAlignedSeq().length();
}
return mAlignmentLength;
}
//---------------------------------------------------------------------------
/**
Returns the alignment scoring object.
* @return the alignment scoring object
*/
public AlignmentScoring getScoring()
{
ensureScoringIdentityStatsAreCalculated();
return mScoring;
}
//---------------------------------------------------------------------------
public PairwiseSeqAlignment setScore(Float inValue)
{
mScoring.setScore(inValue);
return this;
}
//---------------------------------------------------------------------------
public Float getScore()
{
return mScoring.getScore();
}
//---------------------------------------------------------------------------
public Integer getNumIdentities()
{
ensureScoringIdentityStatsAreCalculated();
return mScoring.getNumIdentities();
}
//---------------------------------------------------------------------------
public Integer getComparisonLength()
{
ensureScoringIdentityStatsAreCalculated();
return mScoring.getComparisonLength();
}
//---------------------------------------------------------------------------
/**
Doesn't penalize sequence's terminal gaps.
@return the adjusted comparison length
*/
public Integer getAdjustedComparisonLength()
{
ensureScoringIdentityStatsAreCalculated();
return mScoring.getAdjustedComparisonLength();
}
//---------------------------------------------------------------------------
public Float getPctIdentity()
{
ensureScoringIdentityStatsAreCalculated();
return mScoring.getPctIdentity();
}
//---------------------------------------------------------------------------
/**
Doesn't penalize sequence's terminal gaps.
@return the adjusted percent identity
*/
public Float getAdjustedPctIdentity()
{
ensureScoringIdentityStatsAreCalculated();
return mScoring.getAdjustedPctIdentity();
}
//---------------------------------------------------------------------------
public PairwiseSeqAlignment setEValue(Float inValue)
{
mScoring.setEValue(inValue);
return this;
}
//---------------------------------------------------------------------------
public Float getEValue()
{
return mScoring.getEValue();
}
//---------------------------------------------------------------------------
public PairwiseSeqAlignment setPValue(Float inValue)
{
mScoring.setPValue(inValue);
return this;
}
//---------------------------------------------------------------------------
public Float getPValue()
{
return mScoring.getPValue();
}
//---------------------------------------------------------------------------
public PairwiseSeqAlignment setZScore(Float inValue)
{
mScoring.setZScore(inValue);
return this;
}
//---------------------------------------------------------------------------
public Float getZScore()
{
return mScoring.getZScore();
}
//---------------------------------------------------------------------------
/**
Method helpful for understanding the per-residue scoring breakdown.
@return ASCII display of the per-residue scoring
*/
public String showScoring()
{
SubstitutionMatrix substitutionMatrix = null;
if (null == getAlignedSubject().getPSSM())
{
substitutionMatrix = getSettings().getSubstitutionMatrix(getAlignedQuery().getSeq().getType());
}
StringBuilderPlus buffer = new StringBuilderPlus();
boolean queryGapOpen = false;
boolean subjectGapOpen = false;
Set queryGapExclusionIndices = new HashSet<>();
if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.QUERY, PairwiseSeqTerminus.LEFT))
{
for (int i = 0; i < length(); i++)
{
if (getAlignedQuery().getAlignedSeq().charAt(i) != '-')
{
break;
}
queryGapExclusionIndices.add(i);
}
}
if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.QUERY, PairwiseSeqTerminus.RIGHT))
{
for (int i = length() - 1; i > 0; i--)
{
if (getAlignedQuery().getAlignedSeq().charAt(i) != '-')
{
break;
}
queryGapExclusionIndices.add(i);
}
}
Set subjectGapExclusionIndices = new HashSet<>();
if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.SUBJECT, PairwiseSeqTerminus.LEFT))
{
for (int i = 0; i < length(); i++)
{
if (getAlignedSubject().getAlignedSeq().charAt(i) != '-')
{
break;
}
subjectGapExclusionIndices.add(i);
}
}
if (! getSettings().getPenalizeTerminalGaps(PairwiseSeqType.SUBJECT, PairwiseSeqTerminus.RIGHT))
{
for (int i = length() - 1; i > 0; i--)
{
if (getAlignedSubject().getAlignedSeq().charAt(i) != '-')
{
break;
}
subjectGapExclusionIndices.add(i);
}
}
Set subjectGapScoreIndices = new HashSet<>();
if (getSettings().getSubjectTemplateMode())
{
StringBuilderPlus origSubject = new StringBuilderPlus(getAlignedSubject().getSeq().getSequence());
for (int i = 0; i < length(); i++)
{
if (getAlignedSubject().getAlignedSeq().charAt(i) == '-')
{
if (origSubject.charAt(i) == '-')
{
subjectGapScoreIndices.add(i);
}
else
{
origSubject.insert(i, '-');
}
}
}
}
GapPenalties queryGapPenalties = getSettings().getGapPenalties(PairwiseSeqType.QUERY);
GapPenalties subjectGapPenalties = getSettings().getGapPenalties(PairwiseSeqType.SUBJECT);
float totalScore = 0;
for (int i = 0; i < length(); i++)
{
buffer.append(String.format("%3d. ", i + 1));
char queryResidue = getAlignedQuery().getAlignedSeq().charAt(i);
char subjectResidue = getAlignedSubject().getAlignedSeq().charAt(i);
float score = 0;
if (queryResidue != '-')
{
queryGapOpen = false;
if (subjectResidue != '-')
{
subjectGapOpen = false;
if (substitutionMatrix != null)
{
score = (getSettings().getScoreCaseInsensitive() ?
substitutionMatrix.scoreCaseInsensitive(queryResidue, subjectResidue) :
substitutionMatrix.score(queryResidue, subjectResidue));
}
else
{
score = getAlignedSubject().getPSSM().score(i + 1, queryResidue);
}
}
else
{
if (subjectGapScoreIndices.contains(i))
{
if (substitutionMatrix != null)
{
score = (getSettings().getScoreCaseInsensitive() ?
substitutionMatrix.scoreCaseInsensitive(queryResidue, subjectResidue) :
substitutionMatrix.score(queryResidue, subjectResidue));
}
else
{
score = getAlignedSubject().getPSSM().score(i + 1, queryResidue);
}
}
else if (! subjectGapExclusionIndices.contains(i))
{
score = (subjectGapOpen ? subjectGapPenalties.getExtensionPenalty() : subjectGapPenalties.getOpenPenalty());
}
subjectGapOpen = true;
}
}
else
{
if (subjectGapScoreIndices.contains(i))
{
if (substitutionMatrix != null)
{
score = (getSettings().getScoreCaseInsensitive() ?
substitutionMatrix.scoreCaseInsensitive(queryResidue, subjectResidue) :
substitutionMatrix.score(queryResidue, subjectResidue));
}
else
{
score = getAlignedSubject().getPSSM().score(i + 1, queryResidue);
}
}
else if (! queryGapExclusionIndices.contains(i))
{
if (substitutionMatrix != null)
{
score = (queryGapOpen ? queryGapPenalties.getExtensionPenalty() : queryGapPenalties.getOpenPenalty());
}
else
{
score = (queryGapOpen ? queryGapPenalties.getExtensionPenalty() * getAlignedSubject().getPSSM().getGapExtScore(i + 1)
: queryGapPenalties.getOpenPenalty() * getAlignedSubject().getPSSM().getGapOpenScore(i + 1));
}
}
queryGapOpen = true;
}
buffer.append(String.format("%c %5.1f %c", queryResidue, score, subjectResidue));
buffer.appendln();
totalScore += score;
}
buffer.appendln(String.format("Total:%6.1f", totalScore));
buffer.appendln();
return buffer.toString();
}
//---------------------------------------------------------------------------
@Override
public String toString()
{
StringBuilderPlus buffer = new StringBuilderPlus().setDelimiter(", ");
if (null == getSettings()
|| getSettings().getQueryAlignmentType().equals(PairwiseAlignmentType.LOCAL))
{
buffer.append(String.format("Query Range: %s", getAlignedQuery().getSeqLocation().toString()));
}
if (null == getSettings()
|| getSettings().getSubjectAlignmentType().equals(PairwiseAlignmentType.LOCAL))
{
buffer.delimitedAppend(String.format("Subject Range: %s", getAlignedSubject().getSeqLocation().toString()));
}
buffer.delimitedAppend(String.format("Score: %.1f\n", getScore()));
// Show the aligned sequences with the query on top
buffer.appendln(getAlignedQuery().getAlignedSeq());
// Generate the comparison string - lowercase the sequences for doing a case-insensitive comparison
String lcQuerySeq = getAlignedQuery().getAlignedSeq().toLowerCase();
String lcSubjectSeq = getAlignedSubject().getAlignedSeq().toLowerCase();
for (int i = 0; i < length(); i++)
{
char queryChar = lcQuerySeq.charAt(i);
char subjectChar = lcSubjectSeq.charAt(i);
buffer.append(queryChar != '-' && queryChar == subjectChar ? '|' : ' ');
}
buffer.appendln();
// Now show the aligned subject on the bottom
buffer.appendln(getAlignedSubject().getAlignedSeq());
return buffer.toString();
}
//--------------------------------------------------------------------------
public XMLNode toXMLNode()
{
XMLTag tag = new XMLTag(HfgBioXML.ALIGNMENT);
if (getScore() != null)
{
tag.setAttribute(HfgBioXML.SCORE_ATT, getScore());
}
tag.addSubtag(getAlignedQuery().toXMLNode());
tag.addSubtag(getAlignedSubject().toXMLNode());
return tag;
}
//---------------------------------------------------------------------------
public PairwiseSeqAlignment getSubalignment(int inZeroIndexedStart)
{
return getSubalignment(inZeroIndexedStart, length());
}
//---------------------------------------------------------------------------
public PairwiseSeqAlignment getSubalignment(int inZeroIndexedStart, int inLimit)
{
AlignedQuery subalignedQuery = new AlignedQuery(mAlignedQuery.getSeq(),
mAlignedQuery.getAlignedSeq().substring(inZeroIndexedStart, inLimit),
mAlignedQuery.getLinearPosition(inZeroIndexedStart));
AlignedSubject subalignedSubject = null;
if (mAlignedSubject != null)
{
subalignedSubject = new AlignedSubject(mAlignedSubject.getSeq(),
mAlignedSubject.getAlignedSeq().substring(inZeroIndexedStart, inLimit),
mAlignedSubject.getLinearPosition(inZeroIndexedStart));
}
return new PairwiseSeqAlignment(subalignedQuery, subalignedSubject);
}
//---------------------------------------------------------------------------
/**
In the case of a local query vs. a global subject, this method can be used to extend the local query in both directions
to match where subject residues are present.
*/
public void extendAlignedQuery()
{
BioSequence query = getAlignedQuery().getSeq();
if (query != null)
{
int queryLength = query.length(); // Pulled out for performance
int queryMatchStart = getAlignedQuery().getSeqLocation().getStart();
// Don't leave terminal gaps on the query when there are residues to give and the germline has a residue
if (getAlignedQuery().getNTerminalGapLength() > 0
&& queryMatchStart > 1)
{
StringBuilder queryBuffer = new StringBuilder(getAlignedQuery().getAlignedSeq());
StringBuilder germlineBuffer = new StringBuilder(getAlignedSubject().getAlignedSeq());
int queryGapSize = getAlignedQuery().getNTerminalGapLength();
int germlineGapSize = getAlignedSubject().getNTerminalGapLength();
int adjustedQueryMatchStart = queryMatchStart;
if (queryGapSize > germlineGapSize)
{
for (int index = queryGapSize - 1; index >= 0; index--)
{
if (germlineBuffer.charAt(index) != '-'
&& '-' == queryBuffer.charAt(index))
{
queryBuffer.setCharAt(index, query.residueAt(adjustedQueryMatchStart - queryMatchStart));
adjustedQueryMatchStart--;
if (1 == adjustedQueryMatchStart)
{
break;
}
}
}
}
setAlignedQuery(new AlignedQuery(query, queryBuffer, adjustedQueryMatchStart));
}
if (getAlignedQuery().getCTerminalGapLength() > 0)
{
StringBuilder queryBuffer = new StringBuilder(getAlignedQuery().getAlignedSeq());
int queryBufferLength = queryBuffer.length(); // Pulled out for performance
StringBuilder germlineBuffer = new StringBuilder(getAlignedSubject().getAlignedSeq());
int queryGapSize = getAlignedQuery().getCTerminalGapLength();
int germlineGapSize = getAlignedSubject().getCTerminalGapLength();
int matchEnd = getAlignedQuery().getSeqLocation().getEnd();
if (queryGapSize > germlineGapSize
&& matchEnd < queryLength)
{
for (int index = queryBufferLength - queryGapSize; index < queryBufferLength; index++)
{
if (germlineBuffer.charAt(index) != '-'
&& '-' == queryBuffer.charAt(index))
{
queryBuffer.setCharAt(index, query.residueAt(matchEnd + 1));
matchEnd++;
if (matchEnd == queryLength)
{
break;
}
}
}
}
setAlignedQuery(new AlignedQuery(query, queryBuffer, getAlignedQuery().getSeqLocation().getStart()));
}
}
}
//---------------------------------------------------------------------------
/**
Swaps the query and the subject.
*/
public void invert()
{
AlignedQuery newQuery = new AlignedQuery(getAlignedSubject().getSeq(), getAlignedSubject().getAlignedSeq(), getAlignedSubject().getSeqLocation().getStart());
AlignedSubject newSubject = new AlignedSubject(getAlignedQuery().getSeq(), getAlignedQuery().getAlignedSeq(), getAlignedQuery().getSeqLocation().getStart());
setAlignedQuery(newQuery);
setAlignedSubject(newSubject);
}
//###########################################################################
// PRIVATE METHODS
//###########################################################################
//---------------------------------------------------------------------------
private void ensureScoringIdentityStatsAreCalculated()
{
if (0 == mScoring.getComparisonLength())
{
mScoring.calculate(this);
}
}
}