All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.gbs.neobio.CrochemoreLandauZivUkelsonGlobalAlignment Maven / Gradle / Ivy

/*
 * CrochemoreLandauZivUkelsonGlobalAlignment.java
 *
 * Copyright 2003 Sergio Anibal de Carvalho Junior
 *
 * This file is part of NeoBio.
 *
 * NeoBio is free software; you can redistribute it and/or modify it under the terms of
 * the GNU General Public License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * NeoBio is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 * PURPOSE. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with NeoBio;
 * if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 * Boston, MA 02111-1307, USA.
 *
 * Proper attribution of the author as the source of the software would be appreciated.
 *
 * Sergio Anibal de Carvalho Junior		mailto:[email protected]
 * Department of Computer Science		http://www.dcs.kcl.ac.uk
 * King's College London, UK			http://www.kcl.ac.uk
 *
 * Please visit http://neobio.sourceforge.net
 *
 * This project was supervised by Professor Maxime Crochemore.
 *
 */

package net.maizegenetics.analysis.gbs.neobio;

/**
 * This class implements the global pairwise sequence alignment algorithm (with
 * linear gap penalty function) due to Maxime Crochemore, Gad Landau and Michal
 * Ziv-Ukelson (2002).
 *
 * 

This implementation derives from the paper of M.Crochemore, G.Landau and * M.Ziv-Ukelson, A Sub-quadratic Sequence Alignment Algorithm for Unrestricted Scoring * Matrices (available here as * PDF or * Postscript).

* *

For a general description of the algorithm, please refer to the specification of the * abstract {@linkplain CrochemoreLandauZivUkelson} superclass.

* *

This class consist mainly of methods that:

* * *
  • create and compute all information of a block (see {@link #createBlock createBlock} * and its variants); *
  • compute the output border of a block (see {@link #computeOutputBorder * computeOutputBorder}; *
  • locate the score of a high scoring global alignment in the block table (see {@link * #locateScore locateScore}; *
  • build an optimal global alignment from the information stored in the block table * (see {@link #buildOptimalAlignment buildOptimalAlignment}. * * * @see CrochemoreLandauZivUkelson * @see CrochemoreLandauZivUkelsonLocalAlignment * @author Sergio A. de Carvalho Jr. */ public class CrochemoreLandauZivUkelsonGlobalAlignment extends CrochemoreLandauZivUkelson { /** * Creates and computes all information of an alignment block. Its main job is to * compute the DIST column for the block. It then request the * computeOutputBorder method to compute the block's output border. * * @param factor1 factor of the first sequence * @param factor2 factor of the second sequence * @param row row index of the block in the block table * @param col column index of the block in the block table * @return the computed block * @throws IncompatibleScoringSchemeException if the scoring scheme is not compatible * with the sequences being aligned */ protected AlignmentBlock createBlock (Factor factor1, Factor factor2, int row, int col) throws IncompatibleScoringSchemeException { AlignmentBlock block, left_prefix, diag_prefix, top_prefix; int size, lr, lc, score_ins, score_sub, score_del, ins, del, sub, max; lr = factor1.length(); lc = factor2.length(); size = lr + lc + 1; block = new AlignmentBlock (factor1, factor2, size); // set up pointers to prefixes left_prefix = getLeftPrefix (block); diag_prefix = getDiagonalPrefix (block); top_prefix = getTopPrefix (block); // compute scores score_ins = scoreInsertion (factor2.getNewChar()); score_sub = scoreSubstitution (factor1.getNewChar(), factor2.getNewChar()); score_del = scoreDeletion (factor1.getNewChar()); // compute dist column and direction for (int i = 0; i < size; i++) { // compute optimal path to // input border's ith position ins = sub = del = Integer.MIN_VALUE; if (i < size - 1) ins = left_prefix.dist_column[i] + score_ins; if ((i > 0) && (i < size - 1)) sub = diag_prefix.dist_column[i - 1] + score_sub; if (i > 0) del = top_prefix.dist_column[i - 1] + score_del; block.dist_column[i] = max = max (ins, sub, del); // record the direction to of the optimal // path to input border's ith position if (max == ins) block.direction[i] = LEFT_DIRECTION; else if (max == sub) block.direction[i] = DIAGONAL_DIRECTION; else block.direction[i] = TOP_DIRECTION; } computeOutputBorder (block, row, col, size, lc, lr); return block; } /** * Creates the root block. This is a special case of the createBlock * method. No information is actually computed. * * @param factor1 factor of the first sequence * @param factor2 factor of the second sequence * @return the root block */ protected AlignmentBlock createRootBlock (Factor factor1, Factor factor2) { return new AlignmentBlock (factor1, factor2); } /** * Creates and computes all information of an alignment block of the first row of the * block table. This is a special case of the createBlock method. * * @param factor1 factor of the first sequence * @param factor2 factor of the second sequence * @param col column index of the block in the block table * @return the computed block * @throws IncompatibleScoringSchemeException if the scoring scheme is not compatible * with the sequences being aligned * @see #createBlock createBlock */ protected AlignmentBlock createFirstRowBlock (Factor factor1, Factor factor2, int col) throws IncompatibleScoringSchemeException { AlignmentBlock block, left_prefix; int size, lr, lc, score_ins; lr = 0; // factor1.length(); lc = factor2.length(); size = lr + lc + 1; block = new AlignmentBlock (factor1, factor2, size); // set up pointer to left prefix left_prefix = getLeftPrefix (block); // compute insertion's score score_ins = scoreInsertion (factor2.getNewChar()); // compute dist column and direction for (int i = 0; i < lc; i++) { block.dist_column[i] = left_prefix.dist_column[i] + score_ins; block.direction[i] = LEFT_DIRECTION; } // last position block.dist_column[lc] = 0; block.direction[lc] = STOP_DIRECTION; computeOutputBorder (block, 0, col, size, lc, lr); return block; } /** * Creates and computes all information of an alignment block of the first column of * the block table. This is a special case of the createBlock method. * * @param factor1 factor of the first sequence * @param factor2 factor of the second sequence * @param row row index of the block in the block table * @return the computed block * @throws IncompatibleScoringSchemeException if the scoring scheme is not compatible * with the sequences being aligned * @see #createBlock createBlock */ protected AlignmentBlock createFirstColumnBlock (Factor factor1, Factor factor2, int row) throws IncompatibleScoringSchemeException { AlignmentBlock block, top_prefix; int size, lr, lc, score_del; lr = factor1.length(); lc = 0; // factor2.length(); size = lr + lc + 1; block = new AlignmentBlock (factor1, factor2, size); // set up pointer to top prefix top_prefix = getTopPrefix (block); // compute deletion's score score_del = scoreDeletion (factor1.getNewChar()); // first position block.dist_column[0] = 0; block.direction[0] = STOP_DIRECTION; // compute dist column and direction for (int i = 1; i < size; i++) { block.dist_column[i] = top_prefix.dist_column[i - 1] + score_del; block.direction[i] = TOP_DIRECTION; } computeOutputBorder (block, row, 0, size, lc, lr); return block; } /** * Computes the output border of a block. This is performed in five steps: * * *
  • Retrieve the block's input border; *
  • Retrieve the block's complete DIST matrix; *
  • Create an interface to the {@linkplain OutMatrix OUT} matrix from the input * border and DIST matrix; *
  • Use {@linkplain Smawk SMAWK} to compute all column maxima of the OUT matrix * (SMAWK finds the index of the row that contains the maximum value of a column); *
  • Assemble the output border by extracting the maximum values of each column of * the OUT matrix using the information obtained in the previous step. * * * @param block the block for which the output border is to be computed * @param row row index of the block in the block table * @param col column index of the block in the block table * @param dim dimension of the output border * @param lc number of columns of the block * @param lr number of row of the block */ protected void computeOutputBorder (AlignmentBlock block, int row, int col, int dim, int lc, int lr) { int[] input = assembleInputBorder (dim, row, col, lr); int[][] dist = assembleDistMatrix (block, dim, row, col, lc); // update the interface to the OUT matrix out_matrix.setData (dist, input, dim, lc); // compute source_path using Smawk smawk.computeColumnMaxima (out_matrix, block.source_path); // update output border for (int i = 0; i < dim; i++) block.output_border[i] = out_matrix.valueAt(block.source_path[i], i); } /** * Builds an optimal global alignment between the loaded sequences after the block * table has been computed. This method traces a path back in the block table, from * the last block to the first. * * @return an optimal global alignment * @throws IncompatibleScoringSchemeException If the scoring scheme is not compatible * with the loaded sequences. * @see CrochemoreLandauZivUkelson#traverseBlock */ protected PairwiseAlignment buildOptimalAlignment () throws IncompatibleScoringSchemeException { AlignmentBlock block, ancestor; StringBuffer gapped_seq1, tag_line, gapped_seq2; int source, dest, ancestor_source; int row, col; gapped_seq1 = new StringBuffer(); tag_line = new StringBuffer(); gapped_seq2 = new StringBuffer(); // start at the last row, last column of block table row = num_rows - 1; col = num_cols - 1; block = block_table[row][col]; dest = block.factor2.length(); while (row > 0 || col > 0) { block = block_table[row][col]; source = block.source_path[dest]; ancestor = block.ancestor[dest]; ancestor_source = source; if (dest > block.factor2.length()) ancestor_source -= (block.factor1.length() - ancestor.factor1.length()); traverseBlock (ancestor, ancestor_source, gapped_seq1, tag_line, gapped_seq2); if (row == 0) { col = col - 1; dest = block_table[row][col].factor2.length(); } else if (col == 0) { row = row - 1; dest = 0; } else { if (source < block.factor1.length()) { col = col - 1; dest = block_table[row][col].factor2.length() + source; } else if (source == block.factor1.length()) { row = row - 1; col = col - 1; dest = block_table[row][col].factor2.length(); } else { row = row - 1; dest = source - block.factor1.length(); } } } return new PairwiseAlignment (gapped_seq1.toString(), tag_line.toString(), gapped_seq2.toString(), locateScore()); } /** * Locate the score of the highest scoring global alignment in the block table. This * value is found in the output border of the last block (last row, last column). * * @return the score of the highest scoring global alignment */ protected int locateScore () { AlignmentBlock last_block = block_table[num_rows - 1][num_cols - 1]; return last_block.output_border[last_block.factor2.length()]; } }




  • © 2015 - 2025 Weber Informatics LLC | Privacy Policy