net.maizegenetics.analysis.gbs.neobio.CrochemoreLandauZivUkelsonGlobalAlignment Maven / Gradle / Ivy
/*
* CrochemoreLandauZivUkelsonGlobalAlignment.java
*
* Copyright 2003 Sergio Anibal de Carvalho Junior
*
* This file is part of NeoBio.
*
* NeoBio is free software; you can redistribute it and/or modify it under the terms of
* the GNU General Public License as published by the Free Software Foundation; either
* version 2 of the License, or (at your option) any later version.
*
* NeoBio is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
* without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
* PURPOSE. See the GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License along with NeoBio;
* if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
* Boston, MA 02111-1307, USA.
*
* Proper attribution of the author as the source of the software would be appreciated.
*
* Sergio Anibal de Carvalho Junior mailto:[email protected]
* Department of Computer Science http://www.dcs.kcl.ac.uk
* King's College London, UK http://www.kcl.ac.uk
*
* Please visit http://neobio.sourceforge.net
*
* This project was supervised by Professor Maxime Crochemore.
*
*/
package net.maizegenetics.analysis.gbs.neobio;
/**
* This class implements the global pairwise sequence alignment algorithm (with
* linear gap penalty function) due to Maxime Crochemore, Gad Landau and Michal
* Ziv-Ukelson (2002).
*
* This implementation derives from the paper of M.Crochemore, G.Landau and
* M.Ziv-Ukelson, A Sub-quadratic Sequence Alignment Algorithm for Unrestricted Scoring
* Matrices (available here as
* PDF or
* Postscript).
*
* For a general description of the algorithm, please refer to the specification of the
* abstract {@linkplain CrochemoreLandauZivUkelson} superclass.
*
* This class consist mainly of methods that:
*
*
* create and compute all information of a block (see {@link #createBlock createBlock}
* and its variants);
* compute the output border of a block (see {@link #computeOutputBorder
* computeOutputBorder};
* locate the score of a high scoring global alignment in the block table (see {@link
* #locateScore locateScore};
* build an optimal global alignment from the information stored in the block table
* (see {@link #buildOptimalAlignment buildOptimalAlignment}.
*
*
* @see CrochemoreLandauZivUkelson
* @see CrochemoreLandauZivUkelsonLocalAlignment
* @author Sergio A. de Carvalho Jr.
*/
public class CrochemoreLandauZivUkelsonGlobalAlignment extends CrochemoreLandauZivUkelson
{
/**
* Creates and computes all information of an alignment block. Its main job is to
* compute the DIST column for the block. It then request the
* computeOutputBorder
method to compute the block's output border.
*
* @param factor1 factor of the first sequence
* @param factor2 factor of the second sequence
* @param row row index of the block in the block table
* @param col column index of the block in the block table
* @return the computed block
* @throws IncompatibleScoringSchemeException if the scoring scheme is not compatible
* with the sequences being aligned
*/
protected AlignmentBlock createBlock (Factor factor1, Factor factor2, int row,
int col) throws IncompatibleScoringSchemeException
{
AlignmentBlock block, left_prefix, diag_prefix, top_prefix;
int size, lr, lc, score_ins, score_sub, score_del, ins, del, sub, max;
lr = factor1.length();
lc = factor2.length();
size = lr + lc + 1;
block = new AlignmentBlock (factor1, factor2, size);
// set up pointers to prefixes
left_prefix = getLeftPrefix (block);
diag_prefix = getDiagonalPrefix (block);
top_prefix = getTopPrefix (block);
// compute scores
score_ins = scoreInsertion (factor2.getNewChar());
score_sub = scoreSubstitution (factor1.getNewChar(), factor2.getNewChar());
score_del = scoreDeletion (factor1.getNewChar());
// compute dist column and direction
for (int i = 0; i < size; i++)
{
// compute optimal path to
// input border's ith position
ins = sub = del = Integer.MIN_VALUE;
if (i < size - 1)
ins = left_prefix.dist_column[i] + score_ins;
if ((i > 0) && (i < size - 1))
sub = diag_prefix.dist_column[i - 1] + score_sub;
if (i > 0)
del = top_prefix.dist_column[i - 1] + score_del;
block.dist_column[i] = max = max (ins, sub, del);
// record the direction to of the optimal
// path to input border's ith position
if (max == ins)
block.direction[i] = LEFT_DIRECTION;
else if (max == sub)
block.direction[i] = DIAGONAL_DIRECTION;
else
block.direction[i] = TOP_DIRECTION;
}
computeOutputBorder (block, row, col, size, lc, lr);
return block;
}
/**
* Creates the root block. This is a special case of the createBlock
* method. No information is actually computed.
*
* @param factor1 factor of the first sequence
* @param factor2 factor of the second sequence
* @return the root block
*/
protected AlignmentBlock createRootBlock (Factor factor1, Factor factor2)
{
return new AlignmentBlock (factor1, factor2);
}
/**
* Creates and computes all information of an alignment block of the first row of the
* block table. This is a special case of the createBlock
method.
*
* @param factor1 factor of the first sequence
* @param factor2 factor of the second sequence
* @param col column index of the block in the block table
* @return the computed block
* @throws IncompatibleScoringSchemeException if the scoring scheme is not compatible
* with the sequences being aligned
* @see #createBlock createBlock
*/
protected AlignmentBlock createFirstRowBlock (Factor factor1, Factor factor2, int col)
throws IncompatibleScoringSchemeException
{
AlignmentBlock block, left_prefix;
int size, lr, lc, score_ins;
lr = 0; // factor1.length();
lc = factor2.length();
size = lr + lc + 1;
block = new AlignmentBlock (factor1, factor2, size);
// set up pointer to left prefix
left_prefix = getLeftPrefix (block);
// compute insertion's score
score_ins = scoreInsertion (factor2.getNewChar());
// compute dist column and direction
for (int i = 0; i < lc; i++)
{
block.dist_column[i] = left_prefix.dist_column[i] + score_ins;
block.direction[i] = LEFT_DIRECTION;
}
// last position
block.dist_column[lc] = 0;
block.direction[lc] = STOP_DIRECTION;
computeOutputBorder (block, 0, col, size, lc, lr);
return block;
}
/**
* Creates and computes all information of an alignment block of the first column of
* the block table. This is a special case of the createBlock
method.
*
* @param factor1 factor of the first sequence
* @param factor2 factor of the second sequence
* @param row row index of the block in the block table
* @return the computed block
* @throws IncompatibleScoringSchemeException if the scoring scheme is not compatible
* with the sequences being aligned
* @see #createBlock createBlock
*/
protected AlignmentBlock createFirstColumnBlock (Factor factor1, Factor factor2,
int row) throws IncompatibleScoringSchemeException
{
AlignmentBlock block, top_prefix;
int size, lr, lc, score_del;
lr = factor1.length();
lc = 0; // factor2.length();
size = lr + lc + 1;
block = new AlignmentBlock (factor1, factor2, size);
// set up pointer to top prefix
top_prefix = getTopPrefix (block);
// compute deletion's score
score_del = scoreDeletion (factor1.getNewChar());
// first position
block.dist_column[0] = 0;
block.direction[0] = STOP_DIRECTION;
// compute dist column and direction
for (int i = 1; i < size; i++)
{
block.dist_column[i] = top_prefix.dist_column[i - 1] + score_del;
block.direction[i] = TOP_DIRECTION;
}
computeOutputBorder (block, row, 0, size, lc, lr);
return block;
}
/**
* Computes the output border of a block. This is performed in five steps:
*
*
* Retrieve the block's input border;
* Retrieve the block's complete DIST matrix;
* Create an interface to the {@linkplain OutMatrix OUT} matrix from the input
* border and DIST matrix;
* Use {@linkplain Smawk SMAWK} to compute all column maxima of the OUT matrix
* (SMAWK finds the index of the row that contains the maximum value of a column);
* Assemble the output border by extracting the maximum values of each column of
* the OUT matrix using the information obtained in the previous step.
*
*
* @param block the block for which the output border is to be computed
* @param row row index of the block in the block table
* @param col column index of the block in the block table
* @param dim dimension of the output border
* @param lc number of columns of the block
* @param lr number of row of the block
*/
protected void computeOutputBorder (AlignmentBlock block, int row, int col, int dim,
int lc, int lr)
{
int[] input = assembleInputBorder (dim, row, col, lr);
int[][] dist = assembleDistMatrix (block, dim, row, col, lc);
// update the interface to the OUT matrix
out_matrix.setData (dist, input, dim, lc);
// compute source_path using Smawk
smawk.computeColumnMaxima (out_matrix, block.source_path);
// update output border
for (int i = 0; i < dim; i++)
block.output_border[i] = out_matrix.valueAt(block.source_path[i], i);
}
/**
* Builds an optimal global alignment between the loaded sequences after the block
* table has been computed. This method traces a path back in the block table, from
* the last block to the first.
*
* @return an optimal global alignment
* @throws IncompatibleScoringSchemeException If the scoring scheme is not compatible
* with the loaded sequences.
* @see CrochemoreLandauZivUkelson#traverseBlock
*/
protected PairwiseAlignment buildOptimalAlignment ()
throws IncompatibleScoringSchemeException
{
AlignmentBlock block, ancestor;
StringBuffer gapped_seq1, tag_line, gapped_seq2;
int source, dest, ancestor_source;
int row, col;
gapped_seq1 = new StringBuffer();
tag_line = new StringBuffer();
gapped_seq2 = new StringBuffer();
// start at the last row, last column of block table
row = num_rows - 1; col = num_cols - 1;
block = block_table[row][col];
dest = block.factor2.length();
while (row > 0 || col > 0)
{
block = block_table[row][col];
source = block.source_path[dest];
ancestor = block.ancestor[dest];
ancestor_source = source;
if (dest > block.factor2.length())
ancestor_source -= (block.factor1.length() - ancestor.factor1.length());
traverseBlock (ancestor, ancestor_source, gapped_seq1, tag_line, gapped_seq2);
if (row == 0)
{
col = col - 1;
dest = block_table[row][col].factor2.length();
}
else if (col == 0)
{
row = row - 1;
dest = 0;
}
else
{
if (source < block.factor1.length())
{
col = col - 1;
dest = block_table[row][col].factor2.length() + source;
}
else if (source == block.factor1.length())
{
row = row - 1; col = col - 1;
dest = block_table[row][col].factor2.length();
}
else
{
row = row - 1;
dest = source - block.factor1.length();
}
}
}
return new PairwiseAlignment (gapped_seq1.toString(), tag_line.toString(),
gapped_seq2.toString(), locateScore());
}
/**
* Locate the score of the highest scoring global alignment in the block table. This
* value is found in the output border of the last block (last row, last column).
*
* @return the score of the highest scoring global alignment
*/
protected int locateScore ()
{
AlignmentBlock last_block = block_table[num_rows - 1][num_cols - 1];
return last_block.output_border[last_block.factor2.length()];
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy