All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.maizegenetics.analysis.gbs.neobio.PairwiseAlignmentAlgorithm Maven / Gradle / Ivy

Go to download

TASSEL is a software package to evaluate traits associations, evolutionary patterns, and linkage disequilibrium.

There is a newer version: 5.2.94
Show newest version
/*
 * PairwiseAlignmentAlgorithm.java
 *
 * Copyright 2003 Sergio Anibal de Carvalho Junior
 *
 * This file is part of NeoBio.
 *
 * NeoBio is free software; you can redistribute it and/or modify it under the terms of
 * the GNU General Public License as published by the Free Software Foundation; either
 * version 2 of the License, or (at your option) any later version.
 *
 * NeoBio is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY;
 * without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
 * PURPOSE. See the GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License along with NeoBio;
 * if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330,
 * Boston, MA 02111-1307, USA.
 *
 * Proper attribution of the author as the source of the software would be appreciated.
 *
 * Sergio Anibal de Carvalho Junior		mailto:[email protected]
 * Department of Computer Science		http://www.dcs.kcl.ac.uk
 * King's College London, UK			http://www.kcl.ac.uk
 *
 * Please visit http://neobio.sourceforge.net
 *
 * This project was supervised by Professor Maxime Crochemore.
 *
 */

package net.maizegenetics.analysis.gbs.neobio;

import java.io.Reader;
import java.io.IOException;

/**
 * This abstract class is the superclass of all classes implementing pairwise sequence
 * alignment algorithms. Subclasses are required to provide methods to build a high
 * scoring alignment between two sequences and compute its score with a given scoring
 * scheme.
 *
 * 

Clients are required to set a scoring scheme and load two sequences before * requesting an alignment or the computation of its score. They typically make the * following sequence of method calls:

* *
 * // prepare
 * PairwiseAlignmentAlgorithm algorithm = new SomePairwiseAlignmentAlgorith ();
 * algorithm.setScoringScheme (some_scoring_scheme);
 * algorithm.loadSequences (sequence1, sequence2);
 *
 * // now compute the alignment
 * PairwiseAlignment alignment = algorithm.getPairwiseAlignment();
 * int score = algorithm.getScore();
 * 
* * @author Sergio A. de Carvalho Jr. * @see PairwiseAlignment */ public abstract class PairwiseAlignmentAlgorithm { /** * Tag character that signals a match in the score tag line of an alignment. Its use * is conditioned by the use_match_tag flag. * * @see #use_match_tag * @see #useMatchTag */ protected static final char MATCH_TAG = '|'; /** * Tag character that signals an approximate match in the score tag line of an * alignment. */ protected static final char APPROXIMATE_MATCH_TAG = '+'; /** * Character that signals a mismatch in the score tag line of an alignment. */ protected static final char MISMATCH_TAG = ' '; /** * Character that signals a gap in the score tag line of an alignment. */ protected static final char GAP_TAG = ' '; /** * Character that signals a gap in sequence. */ protected static final char GAP_CHARACTER = '-'; /** * Indicates if the MATCH_TAG tag should be used or not. If it is * true, the alignment algorithm should write the MATCH_TAG * tag in the score tag line of the alignment whenever a match occurs between * characters of the two sequences. If it is false the matching character * should be written instead. This flag is updated whenever a scoring scheme is set to * this PairwiseAlignmentAlgorithm by the setScoringScheme * method. * * @see #MATCH_TAG * @see #useMatchTag * @see #setScoringScheme */ protected boolean use_match_tag; /** * The scoring scheme used to compute a pairwise sequence alignment. It must be set * before performing the alignment, and if a new scoring scheme is set, any alignment * or score already computed is lost. */ protected ScoringScheme scoring; /** * Stores the product of the last pairwise alignment performed. It contains a string * representation of a highest scoring alignment between the two sequences and its * score. It is set after a successful execution of the * computePairwiseAlignment method that subclasses must implement. It is * set to null if new sequences are loaded or a new scoring scheme is set. */ protected PairwiseAlignment alignment; /** * This field stores just the score of the last pairwise alignment performed (if the * score_computed flag is set to true). It is useful when just the score * is needed (and not the alignment itselft). Its value is set after a successful * execution of both computePairwiseAlignment or * computeScore methods that subclasses must implement. If new sequences * are loaded or a new scoring scheme is set, the score_computed flag is * set to false, and this field's value becomes undefined. */ protected int score; /** * Flags whether the score of the alignment between the last two loaded sequences has * already been computed. It is set to true after a successful execution of both * computePairwiseAlignment or computeScore methods that * subclasses must implement. It is set to falsef if new sequences are loaded or a new * scoring scheme is set. */ protected boolean score_computed = false; /** * Flags whether sequences have been loaded. It is set to true after subclasses * successfully load two sequences. */ protected boolean sequences_loaded = false; /** * Sets the scoring scheme to be used for the next alignments. Any alignment or score * already computed is lost. If the scoring scheme supports partial matches, this * PairwiseAlignmentAlgorithm is set not to use the * MATCH_TAG tag because in this case the score tag line be confusing. * If the scoring scheme does not support partial matches, then the use of the * MATCH_TAG tag is enabled. * * @param scoring Scoring scheme to be used * @see #MATCH_TAG * @see ScoringScheme#isPartialMatchSupported */ public void setScoringScheme (ScoringScheme scoring) { if (scoring == null) throw new IllegalArgumentException ("Null scoring scheme object."); this.scoring = scoring; // if the scoring scheme supports partial matches, // disable the use of the MATCH_TAG character if (scoring.isPartialMatchSupported()) this.use_match_tag = false; else this.use_match_tag = true; // when a new scoring scheme is set, // the alignment needs to be recomputed this.alignment = null; this.score_computed = false; } /** * Tells wether the MATCH_TAG tag should be used or not. If it returns * true, the alignment algorithm should write the MATCH_TAG * tag in the score tag line of the alignment produced whenever a match occurs between * characters of the two sequences. If it returns false the matching * character should be written instead. The value returned is conditioned by the * use_match_tag flag, which is updated whenever a scoring scheme is set * to this PairwiseAlignmentAlgorithm by the * setScoringScheme method. * * @return trueMATCH_TAG
tag should be used, * false otherwise * @see #MATCH_TAG * @see #use_match_tag * @see #setScoringScheme */ protected boolean useMatchTag () { return use_match_tag; } /** * Request subclasses to load the sequences according to their own needs. Any * alignment and score already computed is lost. If no exception is raised, the loaded * flag is set to true. Subclasses typically store the sequences in instances of an * appropiate class and each can have its own contract, so check each algorithm to see * what kind of sequences it produces. Input can come from any source provided they * are encapsulated with a proper Reader. They must be ready to be read, i.e. the next * read operation must return the sequence's first character. * * @param input1 First sequence * @param input2 Second sequence * @throws IOException If an I/O error occurs when reading the sequences * @throws InvalidSequenceException If the sequences are not valid */ public void loadSequences (Reader input1, Reader input2) throws IOException, InvalidSequenceException { // when new sequences are loaded, the // alignment and score needs to be recomputed this.alignment = null; this.score_computed = false; // make sure that if an exception is raised // the sequences_loaded flag is false this.sequences_loaded = false; // request subclasses to load sequences loadSequencesInternal (input1, input2); // if no exception is raised, // set the loaded flag to true this.sequences_loaded = true; } /** * Frees pointer to loaded sequences and computed alignments (if any) so that their * data can be garbage collected. */ public void unloadSequences () { // allow any alignment already computed // to be garbage collected this.alignment = null; this.score_computed = false; // request subclasses to unload sequences unloadSequencesInternal (); this.sequences_loaded = false; } /** * Return the last pairwise alignment computed (if any) or request subclasses to * compute one and return the result by calling the * computePairwiseAlignment method. The sequences must already be loaded * and a scoring scheme must already be set. * * @return a pairwise alignment between the loaded sequences * @throws IncompatibleScoringSchemeException If the scoring scheme * is not compatible with the loaded sequences * @see #computePairwiseAlignment */ public PairwiseAlignment getPairwiseAlignment () throws IncompatibleScoringSchemeException { if (!sequences_loaded) throw new IllegalStateException ("Sequences have not been loaded."); if (scoring == null) throw new IllegalStateException ("Scoring scheme has not been set."); if (this.alignment == null) { // make sure the scoring scheme won't be changed // in the middle of the alignment computation synchronized (scoring) { // compute the alignment if it hasn't been computed yet this.alignment = computePairwiseAlignment(); } // store the score as well this.score = this.alignment.getScore(); this.score_computed = true; } return this.alignment; } /** * Returns the score of the last alignment computed (if any) or request subclasses to * compute one and return the result by calling the computeScore method. * The sequences must already be loaded and a scoring scheme must already be set. * * @return the score of the alignment between the loaded sequences * @throws IncompatibleScoringSchemeException If the scoring scheme * is not compatible with the loaded sequences * @see #computeScore */ public int getScore () throws IncompatibleScoringSchemeException { if (!sequences_loaded) throw new IllegalStateException ("Sequences have not been loaded."); if (scoring == null) throw new IllegalStateException ("Scoring scheme has not been set."); if (!score_computed) { // make sure the scoring scheme won't be changed // in the middle of the alignment computation synchronized (scoring) { // compute the alignment's score if it hasn't been computed yet this.score = computeScore(); } this.score_computed = true; } return this.score; } /** * Subclasses must implement this method to load sequences according to their own * needs and throw an exception in case of any failure. If no exception is raised, the * loaded flag is set to true by the public method and the sequences are believed to * be loaded (so an alignment or score can be requested). * * @param input1 First sequence * @param input2 Second sequence * @throws IOException If an I/O error occurs when reading the sequences * @throws InvalidSequenceException If the sequences are not valid * @see #loadSequences * @see CharSequence * @see FactorSequence */ protected abstract void loadSequencesInternal (Reader input1, Reader input2) throws IOException, InvalidSequenceException; /** * Subclasses must implement this method to unload sequences according to their own * storage, freeing pointers to sequences and any intermediate data so that they can * be garbage collected. This methid is called by the public * unloadSequences method. * * @see #unloadSequences */ protected abstract void unloadSequencesInternal (); /** * Subclasses must implement this method to compute an alignment between the loaded * sequences using the scoring scheme previously set. This methid is called by the * getPairwiseAlignment method when needed. * * @return a pairwise alignment between the loaded sequences * @throws IncompatibleScoringSchemeException If the scoring scheme * is not compatible with the loaded sequences * @see #getPairwiseAlignment */ protected abstract PairwiseAlignment computePairwiseAlignment () throws IncompatibleScoringSchemeException; /** * Subclasses must implement this method to compute the score of the alignment between * the loaded sequences using the scoring scheme previously set. This methid is called * by the getScore method when needed. * * @return the score of the alignment between the loaded sequences * @throws IncompatibleScoringSchemeException If the scoring scheme * is not compatible with the loaded sequences * @see #getScore */ protected abstract int computeScore () throws IncompatibleScoringSchemeException; /** * Helper method to invoke the scoreSubstitution method of the scoring * scheme set to this algorithm. * * @param a first character * @param b second character * @return score of substitution of a for b * @throws IncompatibleScoringSchemeException if the scoring scheme is not compatible * with the sequences being aligned * @see ScoringScheme#scoreSubstitution */ protected final int scoreSubstitution (char a, char b) throws IncompatibleScoringSchemeException { return scoring.scoreSubstitution (a, b); } /** * Helper method to invoke the scoreInsertion method of the scoring * scheme set to this algorithm. * * @param a the character to be inserted * @return score of insertion of a * @throws IncompatibleScoringSchemeException if the scoring scheme is not compatible * with the sequences being aligned * @see ScoringScheme#scoreInsertion */ protected final int scoreInsertion (char a) throws IncompatibleScoringSchemeException { return scoring.scoreInsertion (a); } /** * Helper method to invoke the scoreDeletion method of the scoring scheme * set to this algorithm. * * @param a the character to be deleted * @return score of deletion of a * @throws IncompatibleScoringSchemeException if the scoring scheme is not compatible * with the sequences being aligned * @see ScoringScheme#scoreDeletion */ protected final int scoreDeletion (char a) throws IncompatibleScoringSchemeException { return scoring.scoreDeletion (a); } /** * Helper method to compute the the greater of two values. * * @param v1 first value * @param v2 second value * @return the larger of v1 and v2 */ protected final int max (int v1, int v2) { return (v1 >= v2) ? v1 : v2; } /** * Helper method to compute the the greater of three values. * * @param v1 first value * @param v2 second value * @param v3 third value * @return the larger of v1, v2 and v3 */ protected final int max (int v1, int v2, int v3) { return (v1 >= v2) ? ((v1 >= v3)? v1 : v3) : ((v2 >= v3)? v2 : v3); } /** * Helper method to compute the the greater of four values. * * @param v1 first value * @param v2 second value * @param v3 third value * @param v4 fourth value * @return the larger of v1, v2 v3 and * v4 */ protected final int max (int v1, int v2, int v3, int v4) { int m1 = ((v1 >= v2) ? v1 : v2); int m2 = ((v3 >= v4) ? v3 : v4); return (m1 >= m2) ? m1 : m2; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy