de.citec.tcs.alignment.StrictAllOptimalAlgorithm Maven / Gradle / Ivy

Go to download
/* 
 * TCS Alignment Toolbox
 * 
 * Copyright (C) 2013-2015
 * Benjamin Paaßen, Georg Zentgraf
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment;

import de.citec.tcs.alignment.comparators.OperationType;
import de.citec.tcs.alignment.sequence.Node;
import de.citec.tcs.alignment.sequence.Sequence;
import java.util.Random;
import java.util.Stack;

/**
 * This is an implementation of the Needleman-Wunsch-Algorithm for sequence
 * alignment. It returns all optimal paths until a limit of k paths, where k is
 * given per default by DEFAULTPATHLIMIT. The scoring scheme is equivalent to
 * the StrictAlignmentScoreAlgorithm.
 *
 * @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
 */
public class StrictAllOptimalAlgorithm
		extends AbstractGapAlignmentAlgorithm {

	public static final int DEFAULTPATHLIMIT = 100;
	private int pathLimit = DEFAULTPATHLIMIT;

	/**
	 * This sets up an AlignmentAlgorithm instance according to the given
	 * specification.
	 *
	 * @param alignmentSpecification an AlignmentSpecification.
	 */
	public StrictAllOptimalAlgorithm(AlignmentSpecification alignmentSpecification) {
		super(alignmentSpecification, FullMatrixEntry.class, PathList.class);
	}

	/**
	 * Please refer to the corresponding setter-method for more information.
	 *
	 * @return a maximum number of co-optimal paths that shall be calculated.
	 */
	public int getPathLimit() {
		return pathLimit;
	}

	/**
	 * The total number of co-optimal alignments can be very huge as multiple
	 * co-optimal operations lead to a combinatorial explosion of possible
	 * alignments. Thus this option enables you to limit the number of paths
	 * that are actually calculated to keep the calculation time controlled.
	 *
	 * As soon as the limit is reached random choices of operations are used
	 * instead of exploring all co-optimal solutions.
	 *
	 * The default for this is DEFAULTPATHLIMIT.
	 *
	 * @param pathLimit a maximum number of co-optimal paths that shall be
	 * calculated.
	 */
	public void setPathLimit(int pathLimit) {
		if (pathLimit < 1) {
			throw new IllegalArgumentException("You have to allow at least one path to be calculated!");
		}
		this.pathLimit = pathLimit;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public FullMatrixEntry createInitial() {
		return new FullMatrixEntry(0, 0, 0, 0, 0, 0);
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public FullMatrixEntry createDelInitial(FullMatrixEntry delOld, int i, double delLocal) {
		return new FullMatrixEntry(delOld.getScore() + delLocal, delLocal, 0, 0, i, 0);
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public FullMatrixEntry createInsInitial(FullMatrixEntry insOld, int j, double insLocal) {
		return new FullMatrixEntry(insOld.getScore() + insLocal, 0, insLocal, 0, 0, j);
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public FullMatrixEntry createNewEntry(FullMatrixEntry delOld,
			FullMatrixEntry insOld,
			FullMatrixEntry repOld,
			int i, int j,
			double delLocal, double insLocal, double repLocal) {
		final double delTotal = delOld.getScore() + delLocal;
		final double insTotal = insOld.getScore() + insLocal;
		final double repTotal = repOld.getScore() + repLocal;
		if (delTotal < insTotal) {
			if (delTotal < repTotal) {
				return new FullMatrixEntry(delTotal, delLocal, insLocal, repLocal, i, j);
			} else {
				return new FullMatrixEntry(repTotal, delLocal, insLocal, repLocal, i, j);
			}
		} else {
			if (insTotal < repTotal) {
				return new FullMatrixEntry(insTotal, delLocal, insLocal, repLocal, i, j);
			} else {
				return new FullMatrixEntry(repTotal, delLocal, insLocal, repLocal, i, j);
			}
		}
	}

	/**
	 * {@inheritDoc }
	 */
	public PathList transformToResult(FullMatrixEntry[][] alignmentMatrix, Sequence a, Sequence b) {
		final int M = a.getNodes().size();
		final int N = b.getNodes().size();

		final FullMatrixEntry last = alignmentMatrix[M][N];
		/*
		 * In each step of the alignment we had a distance of 1 at most. Thus we
		 * can normalize the score to the range [0,1] if we just divide the raw
		 * score by the worst case, which would be to delete a entirely and
		 * insert b entirely => worst case score = m+n.
		 */
		final int normalization = Math.max(1, M + N);
		final double normalizedScore = last.getScore() / normalization;
		//Now backtrace to reconstruct the paths.
		final Stack stack = new Stack();
		final PathList pathlist = new PathList();
		stack.push(new TmpPath(M, N, new AlignmentPath(getSpecification(), a, b, normalizedScore)));
		TmpPath current;
		int pathCounter = 1;
		while (!stack.empty()) {
			current = stack.pop();
			//we are finished if we are at the start.
			if (current.i == 0 && current.j == 0) {
				//in that case we transform the temporary path to an actual one.
				while (!current.opStack.empty()) {
					current.rawPath.getOperations().add(current.opStack.pop());
				}
				pathlist.add(current.rawPath);
				continue;
			}
			//otherwise we still have work to do.
			Node left, right;
			OperationType type;
			double[] local;
			//If we are in the first column we can only delete.
			if (current.j == 0) {
				left = a.getNodes().get(current.i - 1);
				right = null;
				local = getSpecification().calculateDeletionCosts(left);
				type = OperationType.DELETION;
				final Operation del = new Operation(left, right, type, current.rawPath);
				del.setComparatorDistances(local);
				current.opStack.push(del);
				current.i--;
				stack.push(current);
				continue;
			}
			//if we are in the first row we can only insert.
			if (current.i == 0) {
				left = null;
				right = b.getNodes().get(current.j - 1);
				local = getSpecification().calculateInsertionCosts(right);
				type = OperationType.INSERTION;
				final Operation ins = new Operation(left, right, type, current.rawPath);
				ins.setComparatorDistances(local);
				current.opStack.push(ins);
				current.j--;
				stack.push(current);
				continue;
			}
			//if we are inside the matrix we consider all operations that are co-optimal, until we
			//reached the limit.
			final FullMatrixEntry delOld = alignmentMatrix[current.i - 1][current.j];
			final FullMatrixEntry insOld = alignmentMatrix[current.i][current.j - 1];
			final FullMatrixEntry repOld = alignmentMatrix[current.i - 1][current.j - 1];
			final double delTotal = delOld.getScore()
					+ alignmentMatrix[current.i][current.j].getDelLocal();
			final double insTotal = insOld.getScore()
					+ alignmentMatrix[current.i][current.j].getInsLocal();
			final double repTotal = repOld.getScore()
					+ alignmentMatrix[current.i][current.j].getRepLocal();
			final double optimum = alignmentMatrix[current.i][current.j].getScore();

			boolean[] cooptimal = {delTotal == optimum, insTotal == optimum, repTotal == optimum};
			if (pathCounter == pathLimit) {
				//if we have reached the limit we choose one continuation of the path at random.
				int counter = 0;
				for (final boolean coop : cooptimal) {
					if (coop) {
						counter++;
					}
				}
				if (counter > 1) {
					final Random random = new Random();
					int winnerIdx = random.nextInt(counter);
					int i = 0;
					for (int j = 0; j < cooptimal.length; j++) {
						if (cooptimal[j]) {
							if (i != winnerIdx) {
								cooptimal[j] = false;
							}
							i++;
						}
					}
				}
			}

			//continue the backtracing with a deletion if that is co-optimal.
			if (cooptimal[0]) {
				left = a.getNodes().get(current.i - 1);
				right = null;
				local = getSpecification().calculateDeletionCosts(left);
				type = OperationType.DELETION;
				final Operation del = new Operation(left, right, type, current.rawPath);
				del.setComparatorDistances(local);
				current.opStack.push(del);
				current.i--;
				stack.push(current);
			}
			//continue the backtracing with an insertion if that is co-optimal.
			if (cooptimal[1]) {
				//if we already did continue the path using a deletion, we make a clone before we proceed.
				if (cooptimal[0]) {
					try {
						//reverse the changes made by a deletion
						current = (TmpPath) current.clone();
						current.i++;
						current.opStack.pop();
						pathCounter++;
					} catch (CloneNotSupportedException ex) {
						throw new RuntimeException(ex);
					}
				}
				left = null;
				right = b.getNodes().get(current.j - 1);
				local = getSpecification().calculateInsertionCosts(right);
				type = OperationType.INSERTION;
				final Operation ins = new Operation(left, right, type, current.rawPath);
				ins.setComparatorDistances(local);
				current.opStack.push(ins);
				current.j--;
				stack.push(current);
			}
			//continue the backtracing with a replacement if that is co-optimal.
			if (cooptimal[2]) {
				//if we already did continue the path, we make a clone before we proceed.
				if (cooptimal[0] || cooptimal[1]) {
					try {
						current = (TmpPath) current.clone();
						pathCounter++;
						//if we continued with an insertion, reverse the changes made by the insertion
						if (cooptimal[1]) {
							current.j++;
						} else {
							//if we used a deletion, reverse those changes.
							current.i++;
						}
						current.opStack.pop();
					} catch (CloneNotSupportedException ex) {
						throw new RuntimeException(ex);
					}
				}
				left = a.getNodes().get(current.i - 1);
				right = b.getNodes().get(current.j - 1);
				local = getSpecification().calculateReplacementCosts(left, right);
				type = OperationType.REPLACEMENT;
				final Operation rep = new Operation(left, right, type, current.rawPath);
				rep.setComparatorDistances(local);
				current.opStack.push(rep);
				current.i--;
				current.j--;
				stack.push(current);
			}
		}
		return pathlist;
	}

	private static class TmpPath {

		private Stack opStack = new Stack<>();
		private int i;
		private int j;
		private final AlignmentPath rawPath;

		public TmpPath(int i, int j, AlignmentPath rawPath) {
			this.i = i;
			this.j = j;
			this.rawPath = rawPath;
		}

		@Override
		protected Object clone() throws CloneNotSupportedException {
			final TmpPath clone = new TmpPath(i, j, (AlignmentPath) rawPath.clone());
			for (final Operation op : opStack) {
				clone.opStack.push(op);
			}
			return clone;
		}
	}

	public static class FullMatrixEntry implements AlignmentMatrixEntry {

		private final double score;
		private final double delLocal;
		private final double insLocal;
		private final double repLocal;
		private final int i;
		private final int j;

		public FullMatrixEntry(double score, double delLocal, double insLocal, double repLocal,
				int i, int j) {
			this.score = score;
			this.delLocal = delLocal;
			this.insLocal = insLocal;
			this.repLocal = repLocal;
			this.i = i;
			this.j = j;
		}

		public double getScore() {
			return score;
		}

		public double getDelLocal() {
			return delLocal;
		}

		public double getInsLocal() {
			return insLocal;
		}

		public double getRepLocal() {
			return repLocal;
		}

		public int getI() {
			return i;
		}

		public int getJ() {
			return j;
		}
	}
}