de.citec.tcs.alignment.StrictAlignmentAllOptimalAlgorithm Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of algorithms-lib Show documentation
This module containts standard implementations of AlignmentAlgorithms. In contrast to the adp module these implementations are hand-tailored for some specific algorithms and thus achieve somewhat faster runtime (a constant factor of maybe 30-50 percent).
The newest version!
/* 
 * TCS Alignment Toolbox Version 3
 * 
 * Copyright (C) 2016
 * Benjamin Paaßen
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment;

import de.citec.tcs.alignment.comparators.Comparator;
import de.citec.tcs.alignment.comparators.OperationType;
import java.util.List;
import java.util.Random;
import java.util.Stack;
import lombok.Getter;
import lombok.NonNull;

/**
 * This is an implementation of the Needleman-Wunsch-Algorithm for sequence alignment. It returns
 * all optimal Alignments until a limit of k, where k is given per default by
 * DEFAULT_ALIGNMENT_LIMIT.
 * The scoring scheme is equivalent to the StrictAlignmentScoreAlgorithm.
 *
 * @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
 * @param  the class of the elements in the left input sequence.
 * @param  the class of the elements in the right input sequence.
 */
public class StrictAlignmentAllOptimalAlgorithm
		extends AbstractStrictAlignmentAlgorithm {

	public static final int DEFAULT_ALIGNMENT_LIMIT = 100;
	/**
	 * The total number of co-optimal alignments can be very huge as multiple co-optimal operations
	 * lead to a combinatorial explosion of possible alignments. Thus this option enables you to
	 * limit the number of paths that are actually calculated to keep the calculation time
	 * controlled.
	 *
	 * As soon as the limit is reached random choices of operations are used instead of exploring
	 * all co-optimal solutions.
	 *
	 * @return The limit of co-optimal alignments that are stored in the returned PathList.
	 */
	@Getter
	private int pathLimit = DEFAULT_ALIGNMENT_LIMIT;

	public StrictAlignmentAllOptimalAlgorithm(@NonNull Comparator comparator) {
		super(comparator, AlignmentList.class);
	}

	/**
	 * The total number of co-optimal alignments can be very huge as multiple co-optimal operations
	 * lead to a combinatorial explosion of possible alignments. Thus this option enables you to
	 * limit the number of paths that are actually calculated to keep the calculation time
	 * controlled.
	 *
	 * As soon as the limit is reached random choices of operations are used instead of exploring
	 * all co-optimal solutions.
	 *
	 * The default for this is DEFAULT_ALIGNMENT_LIMIT.
	 *
	 * @param pathLimit a maximum number of co-optimal paths that shall be calculated.
	 */
	public void setPathLimit(int pathLimit) {
		if (pathLimit < 1) {
			throw new IllegalArgumentException("You have to allow at least one path to be calculated!");
		}
		this.pathLimit = pathLimit;
	}

	@Override
	public AlignmentList transformToResult(double[][] alignMat,
			@NonNull double[][] repCosts, @NonNull double[] delCosts, @NonNull double[] insCosts,
			List a, List b) {
		final int m = a.size();
		final int n = b.size();
		final double normalizedDistance = normalizeDissimilarity(alignMat[m][n], a, b);
		//Now backtrace to reconstruct the paths.
		final Stack stack = new Stack<>();
		final AlignmentList pathlist = new AlignmentList<>();
		stack.push(new TmpPath(m, n));
		TmpPath current;
		int pathCounter = 1;
		while (!stack.empty()) {
			current = stack.pop();
			//we are finished if we are at the start.
			if (current.i == 0 && current.j == 0) {
				//in that case we transform the temporary path to an actual one.
				final Alignment alignment = new Alignment<>(
						getComparator(), a, b, normalizedDistance);
				while (!current.opStack.empty()) {
					alignment.add(current.opStack.pop());
				}
				pathlist.add(alignment);
				continue;
			}
			//otherwise we still have work to do.
			final X left;
			final Y right;

			//If we are in the first column we can only delete.
			if (current.j == 0) {
				left = a.get(current.i - 1);
				right = null;
				final OperationType type = OperationType.DELETION;
				final Operation del = new Operation<>(left, right, type, delCosts[current.i - 1]);
				current.opStack.push(del);
				current.i--;
				stack.push(current);
				continue;
			}
			//if we are in the first row we can only insert.
			if (current.i == 0) {
				left = null;
				right = b.get(current.j - 1);
				final OperationType type = OperationType.INSERTION;
				final Operation ins = new Operation<>(left, right, type, insCosts[current.j - 1]);
				current.opStack.push(ins);
				current.j--;
				stack.push(current);
				continue;
			}
			//if we are inside the matrix we consider all operations that are co-optimal, until we
			//reached the limit.
			left = a.get(current.i - 1);
			right = b.get(current.j - 1);

			final double delTotal = alignMat[current.i - 1][current.j] + delCosts[current.i - 1];
			final double insTotal = alignMat[current.i][current.j - 1] + insCosts[current.j - 1];
			final double repTotal = alignMat[current.i - 1][current.j - 1] + repCosts[current.i - 1][current.j - 1];
			final double optimum = alignMat[current.i][current.j];

			boolean[] cooptimal = {delTotal == optimum, insTotal == optimum, repTotal == optimum};
			if (pathCounter == pathLimit) {
				//if we have reached the limit we choose one continuation of the path at random.
				int counter = 0;
				for (final boolean coop : cooptimal) {
					if (coop) {
						counter++;
					}
				}
				if (counter > 1) {
					final Random random = new Random();
					int winnerIdx = random.nextInt(counter);
					int i = 0;
					for (int j = 0; j < cooptimal.length; j++) {
						if (cooptimal[j]) {
							if (i != winnerIdx) {
								cooptimal[j] = false;
							}
							i++;
						}
					}
				}
			}

			//continue the backtracing with a deletion if that is co-optimal.
			if (cooptimal[0]) {
				final Operation del = new Operation<>(left, null, OperationType.DELETION, delCosts[current.i - 1]);
				current.opStack.push(del);
				current.i--;
				stack.push(current);
			}
			//continue the backtracing with an insertion if that is co-optimal.
			if (cooptimal[1]) {
				//if we already did continue the path using a deletion, we make a copy before we proceed.
				if (cooptimal[0]) {
					//reverse the changes made by a deletion
					current = new TmpPath(current);
					current.i++;
					current.opStack.pop();
					pathCounter++;
				}
				final Operation ins = new Operation<>(null, right, OperationType.INSERTION, insCosts[current.j - 1]);
				current.opStack.push(ins);
				current.j--;
				stack.push(current);
			}
			//continue the backtracing with a replacement if that is co-optimal.
			if (cooptimal[2]) {
				//if we already did continue the path, we make a clone before we proceed.
				if (cooptimal[0] || cooptimal[1]) {
					current = new TmpPath(current);
					pathCounter++;
					//if we continued with an insertion, reverse the changes made by the insertion
					if (cooptimal[1]) {
						current.j++;
					} else {
						//if we used a deletion, reverse those changes.
						current.i++;
					}
					current.opStack.pop();
				}
				final Operation rep = new Operation(left, right, OperationType.REPLACEMENT, repCosts[current.i - 1][current.j - 1]);
				current.opStack.push(rep);
				current.i--;
				current.j--;
				stack.push(current);
			}
		}
		return pathlist;
	}

	private class TmpPath {

		private Stack> opStack = new Stack<>();
		private int i;
		private int j;

		public TmpPath(int i, int j) {
			this.i = i;
			this.j = j;
		}

		public TmpPath(TmpPath copy) {
			this.i = copy.i;
			this.j = copy.j;
			for (final Operation op : copy.opStack) {
				this.opStack.push(op);
			}
		}
	}
}