
de.citec.tcs.alignment.StrictAffineAlignmentFullAlgorithm Maven / Gradle / Ivy
/*
* TCS Alignment Toolbox
*
* Copyright (C) 2013-2015
* Benjamin Paaßen, Georg Zentgraf
* AG Theoretical Computer Science
* Centre of Excellence Cognitive Interaction Technology (CITEC)
* University of Bielefeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.tcs.alignment;
import de.citec.tcs.alignment.comparators.OperationType;
import de.citec.tcs.alignment.sequence.Node;
import de.citec.tcs.alignment.sequence.Sequence;
import java.util.ArrayList;
import java.util.EnumMap;
import java.util.EnumSet;
import java.util.Random;
/**
* This implements a local affine alignment similar to Smith-Waterman
* and Gotoh. More information about this alignment can be found in the
* AbstractLocalAlignmentAlgorithm.
*
* Note that you are allowed to specify higher deletion, insertion or comparison
* costs than 1 in your comparators here, because it is still possible to skip
* both sequences entirely in the worst case. It is, however, still recommended
* to set the local comparator costs between 0 and 1 as well.
*
* The end result is a path of an exemplary, optimal alignment. If more than
* one optimal path exists, a random one is chosen.
*
* @author Benjamin Paassen - [email protected]
*/
public class StrictAffineAlignmentFullAlgorithm
extends AbstractAffineAlignmentAlgorithm {
private OperationType[] preferredCooptimal = null;
/**
* This allows to specify an order in which operations shall be preferred if
* cooptimal alignments occur. The first elements in the input array will be
* counted as preferred. If this is set to null co-optimal operations will
* be chosen randomly.
*
* @param preferredCooptimal
*/
public void setPreferredCooptimal(OperationType[] preferredCooptimal) {
// ensure that this contains all operations.
final EnumSet all = EnumSet.allOf(OperationType.class);
for (final OperationType t : preferredCooptimal) {
if (!all.remove(t)) {
throw new IllegalArgumentException("The given preference array "
+ "contianed " + t + " multiple times!");
}
}
for (final OperationType r : all) {
throw new IllegalArgumentException("A preference array of Operation "
+ "types must contain all OperationTypes but " + r + " was missing!");
}
this.preferredCooptimal = preferredCooptimal;
}
/**
* This allows to specify an order in which operations shall be preferred if
* cooptimal alignments occur. The first elements in the input array will be
* counted as preferred. If this is set to null co-optimal operations will
* be chosen randomly.
*
* @return
*/
public OperationType[] getPreferredCooptimal() {
return preferredCooptimal;
}
public StrictAffineAlignmentFullAlgorithm(
AlignmentSpecification alignmentSpecification) {
super(AlignmentPath.class, alignmentSpecification);
}
/**
* This implements the strict maximum of the given input doubles.
*
* @return the strict maximum of the given input doubles.
*/
@Override
public double choice(double... choices) {
if (choices == null || choices.length == 0) {
throw new IllegalArgumentException("Choice input was invalid!");
}
double min = choices[0];
for (int i = 1; i < choices.length; i++) {
if (choices[i] < min) {
min = choices[i];
}
}
return min;
}
/**
* {@inheritDoc }
*/
@Override
public AlignmentPath transformToResult(EnumMap dp_tables,
double[][] compareMatrix,
double[] deletionMatrix, double[] insertionMatrix,
double[] skipDeletionMatrix, double[] skipInsertionMatrix,
Sequence a, Sequence b) {
final double accum_score = dp_tables.get(Recurrence.SKIPDEL_START)[0][0];
final double score;
if (accum_score > 0) {
//normalize it by the cost for skipping both sequences entirely.
double worstScore = 0;
for (int i = 0; i < skipDeletionMatrix.length; i++) {
worstScore += skipDeletionMatrix[i];
}
for (int j = 0; j < skipInsertionMatrix.length; j++) {
worstScore += skipInsertionMatrix[j];
}
score = Math.min(1, accum_score / worstScore);
} else {
score = 0;
}
/*
* We reconstruct an optimal alignment path by backtracing. I can not
* explain everything in detail here, because the local alignment is a
* fairly complicated algorithm, but again I will refer to the
* Bellman's Gap code that i wrote and highlight the important passages.
*/
final AlignmentPath path = new AlignmentPath(getSpecification(),
a, b, score);
int i = 0;
int j = 0;
final int M = a.getNodes().size();
final int N = b.getNodes().size();
Recurrence current = Recurrence.SKIPDEL_START;
final Random random = new Random();
double currentScore;
while (i < M || j < N) {
currentScore = dp_tables.get(current)[i][j];
//determine co-optimals.
final ArrayList cooptimals = new ArrayList<>();
final ArrayList cooptimalOps = new ArrayList<>();
final ArrayList cooptimalCounts = new ArrayList<>();
switch (current) {
/*
* SKIPDEL_START = skip_del(, SKIPDEL_START) |
* SKIPINS_START #h;
*/
case SKIPDEL_START: {
if (j > 0) {
throw new UnsupportedOperationException(
"Unexpected internal error: "
+ "SKIPDEL_START was used in an "
+ "ill-defined region!");
}
if (i < M) {
//skip_del(, SKIPDEL_START)
final double skipDelOld
= dp_tables.get(Recurrence.SKIPDEL_START)[i + 1][j];
if (skipDelOld + skipDeletionMatrix[i] == currentScore) {
cooptimals.add(Recurrence.SKIPDEL_START);
cooptimalOps.add(OperationType.SKIPDELETION);
cooptimalCounts.add(1);
}
}
//SKIPINS_START
if (dp_tables.get(Recurrence.SKIPINS_START)[i][j] == currentScore) {
cooptimals.add(Recurrence.SKIPINS_START);
cooptimalOps.add(null);
cooptimalCounts.add(0);
}
}
break;
/*
* SKIPINS_START = skip_ins(, SKIPINS_START) |
* rep(, ALI) |
* nil() #h;
*/
case SKIPINS_START: {
if (j == N && i < M) {
throw new UnsupportedOperationException(
"Unexpected internal error: "
+ "SKIPINS_START was used in an "
+ "ill-defined region!");
}
if (j < N) {
//skip_ins(, SKIPINS_START)
if (j < N - 1 || i == M) {
final double skipInsOld
= dp_tables.get(Recurrence.SKIPINS_START)[i][j + 1];
if (skipInsOld + skipInsertionMatrix[j] == currentScore) {
cooptimals.add(Recurrence.SKIPINS_START);
cooptimalOps.add(OperationType.SKIPINSERTION);
cooptimalCounts.add(1);
}
}
//rep(, ALI)
if (i < M) {
final double repOld
= dp_tables.get(Recurrence.ALI)[i + 1][j + 1];
if (repOld + compareMatrix[i][j] == currentScore) {
cooptimals.add(Recurrence.ALI);
cooptimalOps.add(OperationType.REPLACEMENT);
cooptimalCounts.add(1);
}
}
}
//nil does not have to be considered here.
}
break;
/*
* ALI = del(, DEL) | ins(, INS) |
* rep(, ALI) |
* SKIPDEL_MIDDLE | SKIPINS_MIDDLE | SKIPDEL_END #h;
*
*/
case ALI: {
if (i < M && j < N) {
//del(, DEL)
if (i < M - 1) {
final double delOld
= dp_tables.get(Recurrence.DEL)[i + 1][j];
if (delOld + deletionMatrix[i] == currentScore) {
cooptimals.add(Recurrence.DEL);
cooptimalOps.add(OperationType.DELETION);
cooptimalCounts.add(1);
}
}
//ins(, INS)
if (j < N - 1) {
final double insOld
= dp_tables.get(Recurrence.INS)[i][j + 1];
if (insOld + insertionMatrix[j] == currentScore) {
cooptimals.add(Recurrence.INS);
cooptimalOps.add(OperationType.INSERTION);
cooptimalCounts.add(1);
}
}
//rep(, ALI)
final double repOld
= dp_tables.get(Recurrence.ALI)[i + 1][j + 1];
if (repOld + compareMatrix[i][j] == currentScore) {
cooptimals.add(Recurrence.ALI);
cooptimalOps.add(OperationType.REPLACEMENT);
cooptimalCounts.add(1);
}
/*
* SKIPDEL_MIDDLE or, better said:
* skip_del(,
* skip_del(,
* skip_del(,
* skip_del(,
* SKIPDEL_MIDDLE_LOOP))))
*/
if (i < M - getMinMiddleSkips()) {
double skipDelMiddleCost
= dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i + getMinMiddleSkips()][j];
for (int i2 = 0; i2 < getMinMiddleSkips(); i2++) {
skipDelMiddleCost += skipDeletionMatrix[i + i2];
}
if (skipDelMiddleCost == currentScore) {
cooptimals.add(Recurrence.SKIPDEL_MIDDLE);
cooptimalOps.add(OperationType.SKIPDELETION);
cooptimalCounts.add(getMinMiddleSkips());
}
}
/*
* SKIPINS_MIDDLE or, better said:
* skip_ins(,
* skip_ins(,
* skip_ins(,
* skip_ins(,
* SKIPINS_MIDDLE_LOOP)))) #h;
*/
if (j < N - getMinMiddleSkips()) {
double skipInsMiddleCost
= dp_tables.get(Recurrence.SKIPINS_MIDDLE)[i][j + getMinMiddleSkips()];
for (int j2 = 0; j2 < getMinMiddleSkips(); j2++) {
skipInsMiddleCost += skipInsertionMatrix[j + j2];
}
if (skipInsMiddleCost == currentScore) {
cooptimals.add(Recurrence.SKIPINS_MIDDLE);
cooptimalOps.add(OperationType.SKIPINSERTION);
cooptimalCounts.add(getMinMiddleSkips());
}
}
}
//SKIPDEL_END
if (dp_tables.get(Recurrence.SKIPDEL_END)[i][j] == currentScore) {
cooptimals.add(Recurrence.SKIPDEL_END);
cooptimalOps.add(null);
cooptimalCounts.add(0);
}
}
break;
/*
* DEL = del(, DEL) | ins(, INS) |
* rep(, ALI) #h;
*/
case DEL: {
if (i == M || j == N) {
throw new UnsupportedOperationException(
"Unexpected internal error: "
+ "DEL was used in an "
+ "ill-defined region!");
}
if (i < M && j < N) {
//del(, DEL)
if (i < M - 1) {
final double delOld
= dp_tables.get(Recurrence.DEL)[i + 1][j];
if (delOld + deletionMatrix[i] == currentScore) {
cooptimals.add(Recurrence.DEL);
cooptimalOps.add(OperationType.DELETION);
cooptimalCounts.add(1);
}
}
//ins(, INS)
if (j < N - 1) {
final double insOld
= dp_tables.get(Recurrence.INS)[i][j + 1];
if (insOld + insertionMatrix[j] == currentScore) {
cooptimals.add(Recurrence.INS);
cooptimalOps.add(OperationType.INSERTION);
cooptimalCounts.add(1);
}
}
//rep(, ALI)
final double repOld
= dp_tables.get(Recurrence.ALI)[i + 1][j + 1];
if (repOld + compareMatrix[i][j] == currentScore) {
cooptimals.add(Recurrence.ALI);
cooptimalOps.add(OperationType.REPLACEMENT);
cooptimalCounts.add(1);
}
}
}
break;
/*
* INS = ins(, INS) | rep(, ALI) #h;
*/
case INS: {
if (i == M || j == N) {
throw new UnsupportedOperationException(
"Unexpected internal error: "
+ "INS was used in an "
+ "ill-defined region!");
}
if (i < M && j < N) {
//ins(, INS)
if (j < N - 1) {
final double insOld
= dp_tables.get(Recurrence.INS)[i][j + 1];
if (insOld + insertionMatrix[j] == currentScore) {
cooptimals.add(Recurrence.INS);
cooptimalOps.add(OperationType.INSERTION);
cooptimalCounts.add(1);
}
}
//rep(, ALI)
final double repOld
= dp_tables.get(Recurrence.ALI)[i + 1][j + 1];
if (repOld + compareMatrix[i][j] == currentScore) {
cooptimals.add(Recurrence.ALI);
cooptimalOps.add(OperationType.REPLACEMENT);
cooptimalCounts.add(1);
}
}
}
break;
/*
* SKIPDEL_MIDDLE_LOOP = skip_del(,
* SKIPDEL_MIDDLE_LOOP) |
* rep(,ALI) #h;
*/
case SKIPDEL_MIDDLE: {
if (i == M || j == N) {
throw new UnsupportedOperationException(
"Unexpected internal error: "
+ "SKIPDEL_MIDDLE was used in an "
+ "ill-defined region!");
}
//skip_del(, SKIPDEL_MIDDLE_LOOP)
if (i < M && j < N) {
if (i < M - 1) {
final double skipDelOld
= dp_tables.get(Recurrence.SKIPDEL_MIDDLE)[i + 1][j];
if (skipDelOld + skipDeletionMatrix[i] == currentScore) {
cooptimals.add(Recurrence.SKIPDEL_MIDDLE);
cooptimalOps.add(OperationType.SKIPDELETION);
cooptimalCounts.add(1);
}
}
//rep(,ALI)
final double repOld
= dp_tables.get(Recurrence.ALI)[i + 1][j + 1];
if (repOld + compareMatrix[i][j] == currentScore) {
cooptimals.add(Recurrence.ALI);
cooptimalOps.add(OperationType.REPLACEMENT);
cooptimalCounts.add(1);
}
}
}
break;
/*
* SKIPINS_MIDDLE_LOOP = skip_ins(,
* SKIPINS_MIDDLE_LOOP) |
* rep(,ALI) #h;
*/
case SKIPINS_MIDDLE: {
if (i == M || j == N) {
throw new UnsupportedOperationException(
"Unexpected internal error: "
+ "SKIPINS_MIDDLE was used in an "
+ "ill-defined region!");
}
if (i < M && j < N) {
if (j < N - 1) {
//skip_ins(, SKIPINS_MIDDLE_LOOP)
final double skipInsOld
= dp_tables.get(Recurrence.SKIPINS_MIDDLE)[i][j + 1];
if (skipInsOld + skipInsertionMatrix[j] == currentScore) {
cooptimals.add(Recurrence.SKIPINS_MIDDLE);
cooptimalOps.add(OperationType.SKIPINSERTION);
cooptimalCounts.add(1);
}
}
//rep(,ALI)
final double repOld
= dp_tables.get(Recurrence.ALI)[i + 1][j + 1];
if (repOld + compareMatrix[i][j] == currentScore) {
cooptimals.add(Recurrence.ALI);
cooptimalOps.add(OperationType.REPLACEMENT);
cooptimalCounts.add(1);
}
}
}
break;
/*
* SKIPDEL_END = skip_del(, SKIPDEL_END) |
* SKIPINS_END #h;
*/
case SKIPDEL_END: {
if (i < M) {
final double skipDelOld
= dp_tables.get(Recurrence.SKIPDEL_END)[i + 1][j];
if (skipDelOld + skipDeletionMatrix[i] == currentScore) {
cooptimals.add(Recurrence.SKIPDEL_END);
cooptimalOps.add(OperationType.SKIPDELETION);
cooptimalCounts.add(1);
}
}
if (dp_tables.get(Recurrence.SKIPINS_END)[i][j] == currentScore) {
cooptimals.add(Recurrence.SKIPINS_END);
cooptimalOps.add(null);
cooptimalCounts.add(0);
}
}
break;
case SKIPINS_END: {
if (i < M) {
throw new UnsupportedOperationException(
"Unexpected internal error: "
+ "SKIPINS_END was used in an "
+ "ill-defined region!");
}
cooptimals.add(Recurrence.SKIPINS_END);
cooptimalOps.add(OperationType.SKIPINSERTION);
cooptimalCounts.add(1);
}
break;
default:
throw new UnsupportedOperationException(
"Unknown recurrence! " + current);
}
if (cooptimals.isEmpty()) {
throw new RuntimeException("Unexpected internal error!"
+ "No alignment choices!");
}
//choose a cooptimal to continue.
final Recurrence next;
final OperationType nextType;
final int nextCount;
if (cooptimals.size() > 1) {
if (preferredCooptimal == null) {
// choose randomly when we have no explicit preference.
final int r = random.nextInt(cooptimals.size());
next = cooptimals.get(r);
nextType = cooptimalOps.get(r);
nextCount = cooptimalCounts.get(r);
} else {
// otherwise choose according to preference.
int pref = 0;
int prefIdx = preferredCooptimal.length + 1;
for (int r = 0; r < cooptimals.size(); r++) {
final OperationType op = cooptimalOps.get(r);
if (op == null) {
continue;
}
int p = 0;
while (preferredCooptimal[p] != op) {
p++;
}
if (p < prefIdx) {
prefIdx = p;
pref = r;
}
}
next = cooptimals.get(pref);
nextType = cooptimalOps.get(pref);
nextCount = cooptimalCounts.get(pref);
}
} else {
next = cooptimals.get(0);
nextType = cooptimalOps.get(0);
nextCount = cooptimalCounts.get(0);
}
for (int s = 0; s < nextCount; s++) {
final double[] localCosts;
final Node left;
final Node right;
switch (nextType) {
case SKIPDELETION:
left = a.getNodes().get(i + s);
right = null;
localCosts = getSpecification().
calculateSkipDeletionCosts(left);
break;
case DELETION:
left = a.getNodes().get(i + s);
right = null;
localCosts = getSpecification().
calculateDeletionCosts(left);
break;
case SKIPINSERTION:
left = null;
right = b.getNodes().get(j + s);
localCosts = getSpecification().
calculateSkipInsertionCosts(right);
break;
case INSERTION:
left = null;
right = b.getNodes().get(j + s);
localCosts = getSpecification().
calculateInsertionCosts(right);
break;
case REPLACEMENT:
left = a.getNodes().get(i + s);
right = b.getNodes().get(j + s);
localCosts = getSpecification().
calculateReplacementCosts(left, right);
break;
default:
throw new UnsupportedOperationException(
"Unknown operation! " + nextType);
}
final Operation op = new Operation(left, right,
nextType, path);
op.setComparatorDistances(localCosts);
path.getOperations().add(op);
}
//adjust position
if (nextType != null) {
switch (nextType) {
case SKIPDELETION:
case DELETION:
i += nextCount;
break;
case SKIPINSERTION:
case INSERTION:
j += nextCount;
break;
case REPLACEMENT:
i += nextCount;
j += nextCount;
break;
}
}
//continue calculation.
current = next;
}
return path;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy