net.maizegenetics.analysis.gbs.neobio.PairwiseAlignment Maven / Gradle / Ivy
Show all versions of tassel Show documentation
/*
* PairwiseAlignment.java
*
* Copyright 2003 Sergio Anibal de Carvalho Junior
*
* This file is part of NeoBio.
*
* NeoBio is free software; you can redistribute it and/or modify it under the terms of
* the GNU General Public License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* NeoBio is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with NeoBio;
* if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
* Boston, MA 02111-1307, USA.
*
* Proper attribution of the author as the source of the software would be appreciated.
*
* Sergio Anibal de Carvalho Junior mailto:[email protected]
* Department of Computer Science http://www.dcs.kcl.ac.uk
* King's College London, UK http://www.kcl.ac.uk
*
* Please visit http://neobio.sourceforge.net
*
* This project was supervised by Professor Maxime Crochemore.
*
*/
package net.maizegenetics.analysis.gbs.neobio;
import java.io.Serializable;
/**
* This class is the product of a pairwise alignment, generated by one subclasses of
* {@linkplain PairwiseAlignmentAlgorithm}. It contains the two sequences strings with
* gaps, a score tag line, and a score value. It is typically displayed in three rows as
* in the following example of an alignment between parts of two protein sequences:
*
*
* MDEIHQLEDMFTVDSETLRKVVKHFILPHD-----MRTTKHQEELWSFIAELDSLKDFMVEQE // sequence 1
* M +I E +FTV +ETL+ V KHFILP D MRTT++ +ELW FIA DSLK F+ EQ // score tag line
* MQQIENFEKIFTVPTETLQAVTKHFILP-DATETLMRTTQNPDELWEFIA--DSLKAFIDEQF // sequence 2
*
*
* Each column has one character of each sequence and a score tag. The same character
* is displayed in all three rows when a column has an exact match (character of sequences
* 1 and 2 are equal). When a mismatch occurs (substitution of different characters), the
* score tag is left blank. A '+' in the score line signals a partial match (a
* substitution of similar characters). The difference between a partial match and a
* mismatch is that the score of a partial match is positive whereas the score of a
* mismatch is zero or negative (each case is determined by the scoring scheme).
*
* Gaps are usually represented by dashes ('-') and have a blank score tag. Insertions
* have dashes in sequence 1 and the inserted character in sequence 2. Deletions, by
* contrast, have the deleted character in sequence 1 and dashes in sequence 2.
*
* Each column carries a score value for the corresponding operation (as defined by the
* scoring scheme). The overall score of a pairwise alignment is the sum of all columns
* scores values.
*
* When the scoring schemes does not support partial matches, a match is usually
* signaled by a '|' character.
*
*
Note that these special characters are defined by the
* PairwiseAlignmentAlgorithm
class. Consult that class specification for the
* actual configuration. For instance, an alignment between two DNA fragmens may look like
* this:
*
*
* A--C--TAAAAAGCA--TT-AATAATAAA-A
* | | |||| ||| || ||||| ||| |
* AAGCCCTAAACCGCAAGTTTAATAA-AAATA
*
*
* This class is serializable, so it can be saved to a file (or any other output). It
* overrides the default equals
method of the Object
class to
* allow a proper comparsion of alignments produced by different algorithms or even
* different runs of the same algorithm. However, it does not override the
* hashCode
method as it is generally the case to maintain the contract for
* the hashCode
method (which states that equal objects must have equal hash
* codes). Hence, as it is, its use in a hash table is not supported.
*
* @author Sergio A. de Carvalho Jr.
* @see PairwiseAlignmentAlgorithm
* @see PairwiseAlignmentAlgorithm#MATCH_TAG
* @see PairwiseAlignmentAlgorithm#APPROXIMATE_MATCH_TAG
* @see PairwiseAlignmentAlgorithm#MISMATCH_TAG
* @see PairwiseAlignmentAlgorithm#GAP_TAG
* @see PairwiseAlignmentAlgorithm#GAP_CHARACTER
* @see ScoringScheme
* @see ScoringScheme#isPartialMatchSupported
*/
public class PairwiseAlignment implements Serializable
{
/**
* First gapped sequence.
*
* @serial
*/
protected String gapped_seq1;
/**
* The score tag line.
*
* @serial
*/
protected String score_tag_line;
/**
* Second gapped sequence.
*
* @serial
*/
protected String gapped_seq2;
/**
* The overall score value for this alignment.
*
* @serial
*/
protected int score;
/**
* Basically, the longer sequence is stored in rows,
* the shorter on is stored in the columns.
*
* The beginning ROW of matrix that was aligned.
* If seq1 is the same length or shorter then seq2,
* this correlates to the position on seq2 where alignment started.
*
* If seq2 is shorter than seq1, this correlates to the
* position on seq1 where alignment started.
*/
protected int rowStart;
/**
* The beginning COLUMN of the matrix that was aligned.
* If seq1 is the same length or shorter then seq1,
* this correlates to the position on seq1 where alignment started.
*
* If seq2 is shorter than seq1, this correlates to the
* position on seq2 where alignment started.
*/
protected int colStart;
/**
* Creates a PairwiseAlignment
instance with the specified gapped
* sequences, score tag line and score value.
*
* @param gapped_seq1 the first gapped sequence
* @param score_tag_line the score tag line
* @param gapped_seq2 the second gapped sequence
* @param score the overall score value for this alignment
*/
public PairwiseAlignment (String gapped_seq1, String score_tag_line,
String gapped_seq2, int score)
{
this.gapped_seq1 = gapped_seq1;
this.score_tag_line = score_tag_line;
this.gapped_seq2 = gapped_seq2;
this.score = score;
}
/**
* Creates a PairwiseAlignment
instance with the specified gapped
* sequences, score tag line and score value.
*
* @param gapped_seq1 the first gapped sequence
* @param score_tag_line the score tag line
* @param gapped_seq2 the second gapped sequence
* @param score the overall score value for this alignment
*/
public PairwiseAlignment (String gapped_seq1, String score_tag_line,
String gapped_seq2, int score, int row, int col)
{
this.gapped_seq1 = gapped_seq1;
this.score_tag_line = score_tag_line;
this.gapped_seq2 = gapped_seq2;
this.score = score;
this.rowStart = row;
this.colStart = col;
}
/**
* Returns the first gapped sequence.
*
* @return first gapped sequence
*/
public String getGappedSequence1 ()
{
return gapped_seq1;
}
/**
* Returns the score tag line.
*
* @return score tag line
*/
public String getScoreTagLine ()
{
return score_tag_line;
}
/**
* Returns the second gapped sequence.
*
* @return second gapped sequence
*/
public String getGappedSequence2 ()
{
return gapped_seq2;
}
/**
* Returns the score for this alignment.
*
* @return overall score for this alignment
*/
public int getScore ()
{
return score;
}
/**
* Returns the matrix row position where the paried alignement starts
*
* @return seq1 start for this alignment
*/
public int getRowStart ()
{
return rowStart;
}
/**
* Returns the matrix column position where the paired alignment starts
*
* @return seq2 start for this alignment
*/
public int getColStart ()
{
return colStart;
}
/**
* Returns a four-line String representation of this alignment in the following
* order: first gapped sequence, score tag line, second gapped sequence and the
* score value.
*
* @return a String representation of this scoring matrix
*/
public String toString ()
{
return gapped_seq1 + "\n" + score_tag_line + "\n"
+ gapped_seq2 + "\nScore: " + score;
}
/**
* Compares this object to the specified object. The result is true
if
* and only if the argument is not null
and is an
* PairwiseAlignment
object that contains the same values as this object,
* i.e. the same gapped sequences, the same score tag line and the same score.
*
* @param obj the object to compare with
* @return true
if objects are the same, false
otherwise
*/
public boolean equals (Object obj)
{
if (!(obj instanceof PairwiseAlignment))
return false;
PairwiseAlignment another_pa = (PairwiseAlignment) obj;
if (this.score != another_pa.score)
return false;
if (!this.gapped_seq1.equals(another_pa.gapped_seq1))
return false;
if (!this.score_tag_line.equals(another_pa.score_tag_line))
return false;
if (!this.gapped_seq2.equals(another_pa.gapped_seq2))
return false;
return true;
}
}