All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.biojava.nbio.alignment.template.AbstractMatrixAligner Maven / Gradle / Ivy

The newest version!
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 * Created on July 2, 2010
 * Author: Mark Chapman
 */

package org.biojava.nbio.alignment.template;

import org.biojava.nbio.core.alignment.template.Profile;
import org.biojava.nbio.core.alignment.template.SubstitutionMatrix;
import org.biojava.nbio.alignment.routines.AlignerHelper.Anchor;
import org.biojava.nbio.alignment.routines.AlignerHelper.Last;
import org.biojava.nbio.alignment.routines.AlignerHelper.Subproblem;
import org.biojava.nbio.core.alignment.template.AlignedSequence.Step;
import org.biojava.nbio.core.sequence.template.Compound;
import org.biojava.nbio.core.sequence.template.CompoundSet;
import org.biojava.nbio.core.sequence.template.Sequence;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import static org.biojava.nbio.alignment.routines.AlignerHelper.setScoreVector;
import static org.biojava.nbio.alignment.routines.AlignerHelper.setSteps;

/**
 * Implements common code for an {@link Aligner} which builds a score matrix during computation.
 *
 * @author Mark Chapman
 * @author Daniel Cameron
 * @param  each element of the alignment {@link Profile} is of type S
 * @param  each element of an {@link AlignedSequence} is a {@link Compound} of type C
 */
public abstract class AbstractMatrixAligner, C extends Compound> extends AbstractScorer
		implements MatrixAligner {

	// input fields
	protected GapPenalty gapPenalty;
	private SubstitutionMatrix subMatrix;
	private boolean local, storingScoreMatrix;
	protected List anchors = new ArrayList<>();
	protected int cutsPerSection;

	// output fields
	protected Profile profile;
	/**
	 * Start position of the aligned sequence in the query and target respectively
	 */
	protected int[] xyStart;
	/**
	 * End position of the aligned sequence in the query and target respectively
	 */
	protected int[] xyMax;
	protected int max, min, score;
	/**
	 * Dynamic programming score matrix
	 * The first dimension has the length of the first (query) sequence + 1
	 * The second has the length of the second (target) sequence + 1
	 * The third has length 1 for linear gap penalty and 3 for affine/constant gap
	 * (one each for match/substitution, deletion, insertion)
	 */
	protected int[][][] scores;
	/**
	 * Friendly name of each copy of the scoring matrix.
	 * The number of elements must match the number of elements in third dimension of @see scores
	 */
	private String[] types;
	protected long time = -1;

	/**
	 * Before running an alignment, data must be sent in via calls to {@link #setGapPenalty(GapPenalty)} and
	 * {@link #setSubstitutionMatrix(SubstitutionMatrix)}.
	 */
	protected AbstractMatrixAligner() {
	}

	/**
	 * Prepares for an alignment.
	 *
	 * @param gapPenalty the gap penalties used during alignment
	 * @param subMatrix the set of substitution scores used during alignment
	 */
	protected AbstractMatrixAligner(GapPenalty gapPenalty, SubstitutionMatrix subMatrix) {
		this(gapPenalty, subMatrix, false);
	}

	/**
	 * Prepares for an alignment.
	 *
	 * @param gapPenalty the gap penalties used during alignment
	 * @param subMatrix the set of substitution scores used during alignment
	 * @param local if true, find a region of similarity rather than aligning every compound
	 */
	protected AbstractMatrixAligner(GapPenalty gapPenalty, SubstitutionMatrix subMatrix, boolean local) {
		this.gapPenalty = gapPenalty;
		this.subMatrix = subMatrix;
		this.local = local;
		reset();
	}

	/**
	 * Returns the gap penalties.
	 *
	 * @return the gap penalties used during alignment
	 */
	public GapPenalty getGapPenalty() {
		return gapPenalty;
	}

	/**
	 * Returns the substitution matrix.
	 *
	 * @return the set of substitution scores used during alignment
	 */
	public SubstitutionMatrix getSubstitutionMatrix() {
		return subMatrix;
	}

	/**
	 * Returns whether alignment finds a region of similarity rather than aligning every compound.
	 *
	 * @return true if alignment finds a region of similarity rather than aligning every compound
	 */
	public boolean isLocal() {
		return local;
	}

	/**
	 * Returns choice to cache the score matrix or to save memory by deleting score matrix after alignment.
	 *
	 * @return choice to cache the score matrix
	 */
	public boolean isStoringScoreMatrix() {
		return storingScoreMatrix;
	}

	/**
	 * Sets the gap penalties.
	 *
	 * @param gapPenalty the gap penalties used during alignment
	 */
	public void setGapPenalty(GapPenalty gapPenalty) {
		this.gapPenalty = gapPenalty;
		reset();
	}

	/**
	 * Sets the substitution matrix.
	 *
	 * @param subMatrix the set of substitution scores used during alignment
	 */
	public void setSubstitutionMatrix(SubstitutionMatrix subMatrix) {
		this.subMatrix = subMatrix;
		reset();
	}

	/**
	 * Sets choice to cache the score matrix or to save memory by deleting score matrix after alignment.
	 *
	 * @param storingScoreMatrix choice to cache the score matrix
	 */
	public void setStoringScoreMatrix(boolean storingScoreMatrix) {
		this.storingScoreMatrix = storingScoreMatrix;
		if (!storingScoreMatrix) {
			scores = null;
		}
	}

	// methods for MatrixAligner

	@Override
	public int[][][] getScoreMatrix() {
		boolean tempStoringScoreMatrix = storingScoreMatrix;
		if (scores == null) {
			storingScoreMatrix = true;
			align();
			if (scores == null) {
				return null;
			}
		}
		int[][][] copy = scores;
		if (tempStoringScoreMatrix) {
			copy = new int[scores.length][scores[0].length][];
			for (int i = 0; i < copy.length; i++) {
				for (int j = 0; j < copy[0].length; j++) {
					copy[i][j] = Arrays.copyOf(scores[i][j], scores[i][j].length);
				}
			}
		}
		setStoringScoreMatrix(tempStoringScoreMatrix);
		return copy;
	}

	@Override
	public String getScoreMatrixAsString() {
		int[][][] scores = getScoreMatrix();
		return scoreMatrixToString(scores);
	}
	private String scoreMatrixToString(int[][][] scores) {
		StringBuilder s = new StringBuilder();
		CompoundSet compoundSet = getCompoundSet();
		int lengthCompound = compoundSet.getMaxSingleCompoundStringLength(), lengthRest =
				Math.max(Math.max(Integer.toString(min).length(), Integer.toString(max).length()), lengthCompound) + 1;
		String padCompound = "%" + Integer.toString(lengthCompound) + "s",
				padRest = "%" + Integer.toString(lengthRest);
		List query = getCompoundsOfQuery(), target = getCompoundsOfTarget();
		for (int type = 0; type < scores[0][0].length; type++) {
			if (type > 0) {
				s.append(String.format("%n"));
			}
			if (types[type] != null) {
				s.append(String.format("%s%n", types[type]));
			}
			s.append(String.format(padCompound, ""));
			s.append(String.format(padRest + "s", ""));
			for (C col : target) {
				s.append(String.format(padRest + "s", compoundSet.getStringForCompound(col)));
			}
			s.append(String.format("%n"));
			for (int x = 0; x < scores.length; x++) {
				s.append(String.format(padCompound, (x == 0) ? "" :
					compoundSet.getStringForCompound(query.get(x - 1))));
				for (int y = 0; y < scores[0].length; y++) {
					s.append(scores[x][y][type] >= min ? String.format(padRest + "d", scores[x][y][type]) :
							String.format(padRest + "s", "-\u221E"));
				}
				s.append(String.format("%n"));
			}
		}
		return s.toString();
	}

	// methods for Aligner
	@Override
	public long getComputationTime() {
		if (profile == null) {
			align();
		}
		return time;
	}

	@Override
	public Profile getProfile() {
		if (profile == null) {
			align();
		}
		return profile;
	}

	// methods for Scorer

	@Override
	public double getMaxScore() {
		if (profile == null) {
			align();
		}
		return max;
	}

	@Override
	public double getMinScore() {
		if (profile == null) {
			align();
		}
		return min;
	}

	@Override
	public double getScore() {
		if (profile == null) {
			align();
		}
		return score;
	}

	// helper methods

	/**
	 * Performs alignment
	 */
	protected void align() {
		if (!isReady()) {
			return;
		}

		long timeStart = System.nanoTime();

		int[] dim = getScoreMatrixDimensions();
		if (storingScoreMatrix) {
			scores = new int[dim[0]][dim[1]][dim[2]];
		} else {
			scores = new int[dim[0]][][];
			scores[0] = new int[dim[1]][dim[2]];
			scores[1] = new int[dim[1]][dim[2]];
		}
		boolean linear = (gapPenalty.getType() == GapPenalty.Type.LINEAR);
		Last[][][] traceback = new Last[dim[0]][][];
		List sx = new ArrayList<>(), sy = new ArrayList<>();

		if (!local) {
			xyMax = new int[] { dim[0] - 1, dim[1] - 1 };
			xyStart = new int[] { 0, 0 };
			score = 0;
			List problems = Subproblem.getSubproblems(anchors, xyMax[0], xyMax[1]);
			assert problems.size() == anchors.size() + 1;
			for (int i = 0; i < problems.size(); i++) {
				Subproblem subproblem = problems.get(i);
				for (int x = subproblem.getQueryStartIndex(); x <= subproblem.getQueryEndIndex(); x++) {

					traceback[x] =

					  linear ?
						setScoreVector(x, subproblem, gapPenalty.getExtensionPenalty(), getSubstitutionScoreVector(x, subproblem), storingScoreMatrix, scores) :

						setScoreVector(x, subproblem, gapPenalty.getOpenPenalty(), gapPenalty.getExtensionPenalty(), getSubstitutionScoreVector(x, subproblem), storingScoreMatrix, scores);
				}
			}
			setSteps(traceback, scores, sx, sy);
			score = Integer.MIN_VALUE;
			int[] finalScore = scores[xyMax[0]][xyMax[1]];
			for (int z = 0; z < finalScore.length; z++) {
				score = Math.max(score, finalScore[z]);
			}
		} else {
			for (int x = 0; x < dim[0]; x++) {

				traceback[x] =

				  linear ?
					setScoreVector(x, gapPenalty.getExtensionPenalty(), getSubstitutionScoreVector(x), storingScoreMatrix, scores, xyMax, score) :

					setScoreVector(x, gapPenalty.getOpenPenalty(), gapPenalty.getExtensionPenalty(), getSubstitutionScoreVector(x), storingScoreMatrix, scores, xyMax, score);

				if (xyMax[0] == x) {
					score = scores[x][xyMax[1]][0];
				}
			}
			xyStart = local ? setSteps(traceback, xyMax, sx, sy) : setSteps(traceback, scores, sx, sy);
		}

		setProfile(sx, sy);

		if (!storingScoreMatrix) {
			scores = null;
		}

		time = System.nanoTime() - timeStart;
	}

	/**
	 * Returns score for the alignment of the query column to all target columns
	 * @param queryColumn
	 * @return
	 */
	protected int[] getSubstitutionScoreVector(int queryColumn) {
		return getSubstitutionScoreVector(queryColumn, new Subproblem(0, 0, scores.length - 1, scores[0].length - 1));
	}

	/**
	 * Returns score for the alignment of the query column to all target columns
	 * @param queryColumn
	 * @param subproblem
	 * @return
	 */
	protected int[] getSubstitutionScoreVector(int queryColumn, Subproblem subproblem) {
		int[] subs = new int[subproblem.getTargetEndIndex() + 1];
		if (queryColumn > 0) {
			for (int y = Math.max(1, subproblem.getTargetStartIndex()); y <= subproblem.getTargetEndIndex(); y++) {
				subs[y] = getSubstitutionScore(queryColumn, y);
			}
		}
		return subs;
	}

	/**
	 * Resets output fields; should be overridden to set max and min
	 */
	protected void reset() {
		xyMax = new int[] {0, 0};
		xyStart = new int[] {0, 0};
		scores = null;
		types = (gapPenalty == null || gapPenalty.getType() == GapPenalty.Type.LINEAR) ? new String[] { null } :
				new String[] { "Substitution", "Deletion", "Insertion" };
		time = -1;
		profile = null;
	}
	// abstract methods

	// returns compound set of sequences
	protected abstract CompoundSet getCompoundSet();

	// returns compounds in query sequence/profile
	protected abstract List getCompoundsOfQuery();

	// returns compounds in target sequence/profile
	protected abstract List getCompoundsOfTarget();

	// returns the 3 score matrix dimensions
	protected abstract int[] getScoreMatrixDimensions();

	// returns score for the alignment of two columns
	protected abstract int getSubstitutionScore(int queryColumn, int targetColumn);

	// prepares for alignment; returns true if everything is set to run the alignment
	protected abstract boolean isReady();

	// sets profile following the given alignment path
	protected abstract void setProfile(List sx, List sy);

}