
de.citec.tcs.alignment.StrictAlignmentAllOptimalAlgorithm Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of algorithms-lib Show documentation
Show all versions of algorithms-lib Show documentation
This module containts standard implementations of
AlignmentAlgorithms. In contrast to the adp module these implementations
are hand-tailored for some specific algorithms and thus achieve somewhat
faster runtime (a constant factor of maybe 30-50 percent).
The newest version!
/*
* TCS Alignment Toolbox Version 3
*
* Copyright (C) 2016
* Benjamin Paaßen
* AG Theoretical Computer Science
* Centre of Excellence Cognitive Interaction Technology (CITEC)
* University of Bielefeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.tcs.alignment;
import de.citec.tcs.alignment.comparators.Comparator;
import de.citec.tcs.alignment.comparators.OperationType;
import java.util.List;
import java.util.Random;
import java.util.Stack;
import lombok.Getter;
import lombok.NonNull;
/**
* This is an implementation of the Needleman-Wunsch-Algorithm for sequence alignment. It returns
* all optimal Alignments until a limit of k, where k is given per default by
* DEFAULT_ALIGNMENT_LIMIT.
* The scoring scheme is equivalent to the StrictAlignmentScoreAlgorithm.
*
* @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
* @param the class of the elements in the left input sequence.
* @param the class of the elements in the right input sequence.
*/
public class StrictAlignmentAllOptimalAlgorithm
extends AbstractStrictAlignmentAlgorithm {
public static final int DEFAULT_ALIGNMENT_LIMIT = 100;
/**
* The total number of co-optimal alignments can be very huge as multiple co-optimal operations
* lead to a combinatorial explosion of possible alignments. Thus this option enables you to
* limit the number of paths that are actually calculated to keep the calculation time
* controlled.
*
* As soon as the limit is reached random choices of operations are used instead of exploring
* all co-optimal solutions.
*
* @return The limit of co-optimal alignments that are stored in the returned PathList.
*/
@Getter
private int pathLimit = DEFAULT_ALIGNMENT_LIMIT;
public StrictAlignmentAllOptimalAlgorithm(@NonNull Comparator comparator) {
super(comparator, AlignmentList.class);
}
/**
* The total number of co-optimal alignments can be very huge as multiple co-optimal operations
* lead to a combinatorial explosion of possible alignments. Thus this option enables you to
* limit the number of paths that are actually calculated to keep the calculation time
* controlled.
*
* As soon as the limit is reached random choices of operations are used instead of exploring
* all co-optimal solutions.
*
* The default for this is DEFAULT_ALIGNMENT_LIMIT.
*
* @param pathLimit a maximum number of co-optimal paths that shall be calculated.
*/
public void setPathLimit(int pathLimit) {
if (pathLimit < 1) {
throw new IllegalArgumentException("You have to allow at least one path to be calculated!");
}
this.pathLimit = pathLimit;
}
@Override
public AlignmentList transformToResult(double[][] alignMat,
@NonNull double[][] repCosts, @NonNull double[] delCosts, @NonNull double[] insCosts,
List a, List b) {
final int m = a.size();
final int n = b.size();
final double normalizedDistance = normalizeDissimilarity(alignMat[m][n], a, b);
//Now backtrace to reconstruct the paths.
final Stack stack = new Stack<>();
final AlignmentList pathlist = new AlignmentList<>();
stack.push(new TmpPath(m, n));
TmpPath current;
int pathCounter = 1;
while (!stack.empty()) {
current = stack.pop();
//we are finished if we are at the start.
if (current.i == 0 && current.j == 0) {
//in that case we transform the temporary path to an actual one.
final Alignment alignment = new Alignment<>(
getComparator(), a, b, normalizedDistance);
while (!current.opStack.empty()) {
alignment.add(current.opStack.pop());
}
pathlist.add(alignment);
continue;
}
//otherwise we still have work to do.
final X left;
final Y right;
//If we are in the first column we can only delete.
if (current.j == 0) {
left = a.get(current.i - 1);
right = null;
final OperationType type = OperationType.DELETION;
final Operation del = new Operation<>(left, right, type, delCosts[current.i - 1]);
current.opStack.push(del);
current.i--;
stack.push(current);
continue;
}
//if we are in the first row we can only insert.
if (current.i == 0) {
left = null;
right = b.get(current.j - 1);
final OperationType type = OperationType.INSERTION;
final Operation ins = new Operation<>(left, right, type, insCosts[current.j - 1]);
current.opStack.push(ins);
current.j--;
stack.push(current);
continue;
}
//if we are inside the matrix we consider all operations that are co-optimal, until we
//reached the limit.
left = a.get(current.i - 1);
right = b.get(current.j - 1);
final double delTotal = alignMat[current.i - 1][current.j] + delCosts[current.i - 1];
final double insTotal = alignMat[current.i][current.j - 1] + insCosts[current.j - 1];
final double repTotal = alignMat[current.i - 1][current.j - 1] + repCosts[current.i - 1][current.j - 1];
final double optimum = alignMat[current.i][current.j];
boolean[] cooptimal = {delTotal == optimum, insTotal == optimum, repTotal == optimum};
if (pathCounter == pathLimit) {
//if we have reached the limit we choose one continuation of the path at random.
int counter = 0;
for (final boolean coop : cooptimal) {
if (coop) {
counter++;
}
}
if (counter > 1) {
final Random random = new Random();
int winnerIdx = random.nextInt(counter);
int i = 0;
for (int j = 0; j < cooptimal.length; j++) {
if (cooptimal[j]) {
if (i != winnerIdx) {
cooptimal[j] = false;
}
i++;
}
}
}
}
//continue the backtracing with a deletion if that is co-optimal.
if (cooptimal[0]) {
final Operation del = new Operation<>(left, null, OperationType.DELETION, delCosts[current.i - 1]);
current.opStack.push(del);
current.i--;
stack.push(current);
}
//continue the backtracing with an insertion if that is co-optimal.
if (cooptimal[1]) {
//if we already did continue the path using a deletion, we make a copy before we proceed.
if (cooptimal[0]) {
//reverse the changes made by a deletion
current = new TmpPath(current);
current.i++;
current.opStack.pop();
pathCounter++;
}
final Operation ins = new Operation<>(null, right, OperationType.INSERTION, insCosts[current.j - 1]);
current.opStack.push(ins);
current.j--;
stack.push(current);
}
//continue the backtracing with a replacement if that is co-optimal.
if (cooptimal[2]) {
//if we already did continue the path, we make a clone before we proceed.
if (cooptimal[0] || cooptimal[1]) {
current = new TmpPath(current);
pathCounter++;
//if we continued with an insertion, reverse the changes made by the insertion
if (cooptimal[1]) {
current.j++;
} else {
//if we used a deletion, reverse those changes.
current.i++;
}
current.opStack.pop();
}
final Operation rep = new Operation(left, right, OperationType.REPLACEMENT, repCosts[current.i - 1][current.j - 1]);
current.opStack.push(rep);
current.i--;
current.j--;
stack.push(current);
}
}
return pathlist;
}
private class TmpPath {
private Stack> opStack = new Stack<>();
private int i;
private int j;
public TmpPath(int i, int j) {
this.i = i;
this.j = j;
}
public TmpPath(TmpPath copy) {
this.i = copy.i;
this.j = copy.j;
for (final Operation op : copy.opStack) {
this.opStack.push(op);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy