de.citec.tcs.alignment.StrictAlignmentFullAlgorithm Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of algorithms-lib Show documentation
This module containts standard implementations of AlignmentAlgorithms. In contrast to the adp module these implementations are hand-tailored for some specific algorithms and thus achieve somewhat faster runtime (a constant factor of maybe 30-50 percent).
The newest version!
/* 
 * TCS Alignment Toolbox Version 3
 * 
 * Copyright (C) 2016
 * Benjamin Paaßen
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment;

import de.citec.tcs.alignment.comparators.Comparator;
import de.citec.tcs.alignment.comparators.OperationType;
import java.util.List;
import java.util.Random;
import java.util.Stack;
import lombok.NonNull;

/**
 * This is an implementation of the Needleman-Wunsch-Algorithm for sequence alignment. It returns an
 * optimal Alignment. The scoring scheme is equivalent to the StrictAlignmentScoreAlgorithm.
 *
 * @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
 * @param  the class of the elements in the left input sequence.
 * @param  the class of the elements in the right input sequence.
 */
public class StrictAlignmentFullAlgorithm
		extends AbstractStrictAlignmentAlgorithm {

	public StrictAlignmentFullAlgorithm(@NonNull Comparator comparator) {
		super(comparator, Alignment.class);
	}

	@Override
	public Alignment transformToResult(@NonNull double[][] alignMat,
			@NonNull double[][] repCosts, @NonNull double[] delCosts, @NonNull double[] insCosts,
			@NonNull List a, @NonNull List b) {

		final int m = a.size();
		final int n = b.size();
		// construct the normalized distance.
		final double normalizedDistance = normalizeDissimilarity(alignMat[m][n], a, b);

		//Now backtrace to reconstruct the path.
		final Alignment path = new Alignment<>(getComparator(), a, b, normalizedDistance);

		final Stack opStack = new Stack<>();
		int i = m;
		int j = n;
		final Random rand = new Random();
		X left;
		Y right;
		Operation op;
		while (i > 0 || j > 0) {
			// get the current sequence elements.
			if (i > 0) {
				left = a.get(i - 1);
			} else {
				left = null;
			}
			if (j > 0) {
				right = b.get(j - 1);
			} else {
				right = null;
			}

			//then look how to go backwards.
			//if we are in the first column, we can just go upwards.
			if (j == 0) {
				i--;
				final OperationType type = OperationType.DELETION;
				op = new Operation(left, right, type, delCosts[i]);
				opStack.push(op);
				continue;
			}
			//if we are in the first row, we can just go left.
			if (i == 0) {
				j--;
				final OperationType type = OperationType.INSERTION;
				op = new Operation(left, right, type, insCosts[j]);
				opStack.push(op);
				continue;
			}

			//otherwise look for current co-optimals.
			final double opt = alignMat[i][j];
			final boolean[] coopts = new boolean[3];

			coopts[0] = alignMat[i - 1][j] + delCosts[i - 1] == opt;
			coopts[1] = alignMat[i - 1][j - 1] + repCosts[i - 1][j - 1] == opt;
			coopts[2] = alignMat[i][j - 1] + insCosts[j - 1] == opt;

			int num_coopts = 0;
			for (final boolean coopt : coopts) {
				if (coopt) {
					num_coopts++;
				}
			}

			if (num_coopts > 1) {
				//reduce the number of paths taken back to 1.
				int r = rand.nextInt(num_coopts);
				int c = 0;
				while (c < coopts.length) {
					if (coopts[c]) {
						if (r > 0) {
							coopts[c] = false;
						}
						r--;
					}
					c++;
				}
			}
			final OperationType type;
			final double cost;
			if (coopts[0]) {
				//go upwards.
				type = OperationType.DELETION;
				right = null;
				i--;
				cost = delCosts[i];
			} else if (coopts[1]) {
				//go diagonal.
				type = OperationType.REPLACEMENT;
				i--;
				j--;
				cost = repCosts[i][j];
			} else if (coopts[2]) {
				//go left.
				type = OperationType.INSERTION;
				left = null;
				j--;
				cost = insCosts[j];
			} else {
				throw new RuntimeException("Internal error: No co-optimal state during backtracing!");
			}
			op = new Operation(left, right, type, cost);
			opStack.push(op);
		}

		//now push all operations on the path.
		while (!opStack.empty()) {
			path.add(opStack.pop());
		}
		return path;
	}
}