
de.citec.tcs.alignment.AbstractStrictDTWAlgorithm Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of algorithms-lib Show documentation
Show all versions of algorithms-lib Show documentation
This module containts standard implementations of
AlignmentAlgorithms. In contrast to the adp module these implementations
are hand-tailored for some specific algorithms and thus achieve somewhat
faster runtime (a constant factor of maybe 30-50 percent).
The newest version!
/*
* TCS Alignment Toolbox Version 3
*
* Copyright (C) 2016
* Benjamin Paaßen
* AG Theoretical Computer Science
* Centre of Excellence Cognitive Interaction Technology (CITEC)
* University of Bielefeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.tcs.alignment;
import de.citec.tcs.alignment.comparators.Comparator;
import de.citec.tcs.alignment.comparators.OperationType;
import java.util.List;
import lombok.NonNull;
/**
* This is an abstract super class implementing the dynamic time warping (DTW) dissimilarity.
*
* See also: https://en.wikipedia.org/wiki/Dynamic_time_warping
*
* Given two sequences (x_1, ... , x_M) ∈ X* and (y_1, ... , y_N) ∈ Y^* as well as a
* Comparator d, this distance D is defined via four (recursive) equations:
*
*
* - D((x_1), (y_1)) := d(x_1, y_1)
* - D((x_1, ..., x_i) , (y_1)) := D((x_i, ... , x_i-1) , (y_1)) + d(x_i, y_1)
* - D((x_1), (y_1, ... , y_j)) := D((x_1) , (y_1, ... , y_j-1)) + d(x_1, y_j)
* - D((x_1, ..., x_i), (y_1, ... , y_j)) := min{
* D((x_i, ... , x_i-1) , (y_1, ... , y_j)) + d(x_i, y_j) ,
* D((x_1, ..., x_i) , (y_1, ... , y_j-1)) + d(x_i, y_j) ,
* D((x_i, ... , x_i-1) , (y_1, ... , y_j-1)) + d(x_i, y_j)
* }
*
*
* Equations (2) and (3) as well as the first two minimum contributions in equation (4) correspond
* to prolonging one of the input sequences. This construction also implies that the
* triangular inequality does not hold for the DTW distance.
*
* Note that d(x_i, y_j) contributes to any element in the minimum set in equation (4) such that, in
* principle, we can compute it outside of the minimum. However, this does only hold if the
* comparator d is consistent across the three operations REPLACEMENT, DELETIONREPLACEMENT and
* INSERTIONREPLACEMENT.
*
* Further note that in the worst case d(x_i, y_j) may return 1 for any input (as per definition of
* a Comparator). In that case the minimum DTW distance is given by replacing all elements in the
* left input sequence with all elements in the right input sequence and replacing the rest with
* gaps. Thus, the worst-case alignment distance is max{M, N}. In order to obtain an alignment
* distance between 0 and 1 we can just return D as specified above, divided by max{M, N}.
*
* @author Benjamin Paassen - [email protected]
* @param the class of the elements in the left input sequence.
* @param the class of the elements in the right input sequence.
* @param The result class.
*/
public abstract class AbstractStrictDTWAlgorithm implements AlignmentAlgorithm {
private final Class resultClass;
private Comparator comparator;
private double[][] lastDTWMatrix;
public AbstractStrictDTWAlgorithm(@NonNull Class resultClass, @NonNull Comparator comparator) {
this.resultClass = resultClass;
ComparatorValidator.validate(this, comparator);
this.comparator = comparator;
}
@Override
public Class getResultClass() {
return resultClass;
}
@Override
public Comparator getComparator() {
return comparator;
}
@Override
public void setComparator(@NonNull Comparator comparator) {
ComparatorValidator.validate(this, comparator);
this.comparator = comparator;
}
@Override
public boolean requires(@NonNull OperationType type) {
switch (type) {
case REPLACEMENT:
case DELETIONREPLACEMENT:
case INSERTIONREPLACEMENT:
return true;
default:
return false;
}
}
/**
* This returns the dynamic programming matrix that was calculated in the last call of
* calculateAlignment.
*
* @return the dynamic programming matrix that was calculated in the last call of
* calculateAlignment.
*/
public double[][] getLastDTWMatrix() {
return lastDTWMatrix;
}
@Override
public R calculateAlignment(@NonNull List a, @NonNull List b) {
if (a.isEmpty()) {
throw new IllegalArgumentException("The first given sequence is empty!");
}
if (b.isEmpty()) {
throw new IllegalArgumentException("The second given sequence is emtpy!");
}
final int m = a.size();
final int n = b.size();
/*
* Pre-cache the replacement costs.
*/
final double[][] repCosts = new double[m][n];
final double[][] delRepCosts;
final double[][] insRepCosts;
if (getComparator().hasCoherentReplacementCost()) {
delRepCosts = repCosts;
insRepCosts = repCosts;
} else {
delRepCosts = new double[m][n];
insRepCosts = new double[m][n];
}
{
X x;
Y y;
for (int i = 0; i < m; i++) {
x = a.get(i);
for (int j = 0; j < n; j++) {
y = b.get(j);
repCosts[i][j] = getComparator().compare(OperationType.REPLACEMENT, x, y);
if (!getComparator().hasCoherentReplacementCost()) {
delRepCosts[i][j] = getComparator().compare(OperationType.DELETIONREPLACEMENT, x, y);
insRepCosts[i][j] = getComparator().compare(OperationType.INSERTIONREPLACEMENT, x, y);
}
}
}
}
final double[][] dtwMatrix = new double[m][n];
/*
* initialize the alignment matrix. Note that I do not use the classic
* trick to initialize the first row and column with infinity. First
* this blows up the matrix by a linear summand, second it is an
* unnecessary workaround from my perspective, because you can exploit
* some nice properties of the second row and column (which I treat as
* the first one) to initialize it directly.
*/
//initialize the first entry, which is just the comparison of the first
//entries of both sequences. Note that the sequences are not allowed to be empty.
dtwMatrix[0][0] = repCosts[0][0];
//initialize first column. Here we can only elongate the first sequence
//until the second one ends.
for (int i = 1; i < m; i++) {
dtwMatrix[i][0] = dtwMatrix[i - 1][0] + delRepCosts[i][0];
}
//initialize the first row. Here we can only elongate the second sequence
//until the first one ends.
for (int j = 1; j < n; j++) {
dtwMatrix[0][j] = dtwMatrix[0][j - 1] + insRepCosts[0][j];
}
/*
* use the actual DTW algorithm
*/
for (int i = 1; i < m; i++) {
for (int j = 1; j < n; j++) {
//calculate the new DTW matrix entry.
dtwMatrix[i][j] = Math.min(
dtwMatrix[i - 1][j - 1] + repCosts[i][j],
Math.min(
dtwMatrix[i - 1][j] + delRepCosts[i][j],
dtwMatrix[i][j - 1] + insRepCosts[i][j]
)
);
}
}
lastDTWMatrix = dtwMatrix;
return transformToResult(dtwMatrix, repCosts, delRepCosts, insRepCosts, a, b);
}
/**
* This method has to be implemented by sub classes to transform a calculated dynamic
* programming matrix to a valid result of that implementation. This also has to implement the
* backtracing if necessary.
*
* @param dtwMatrix a dynamic programming matrix calculated with respect to both input
* sequences.
* @param repCosts the matrix of pairwise REPLACEMENT costs for each pairwise combination of
* elements in the input sequences.
* @param delRepCosts the matrix of pairwise DELETIONREPLACEMENT costs for each pairwise
* combination of elements in the input sequences.
* @param insRepCosts the matrix of pairwise INSERTIONREPLACEMENT costs for each pairwise
* combination of elements in the input sequences.
* @param a the first input sequence.
* @param b the second input sequence.
*
* @return a valid result for this algorithm implementation.
*/
public abstract R transformToResult(@NonNull double[][] dtwMatrix,
@NonNull double[][] repCosts, @NonNull double[][] delRepCosts, @NonNull double[][] insRepCosts,
@NonNull final List a, @NonNull final List b);
/**
* Normalizes the given raw distance by the worst case that could occur in an alignment of the
* two sequences: In the worst case, we replace all elements in a with elements in b and
* delete-replace/insert-replace the remaining elements in the longer sequence. The cost of
* those operations can be 1 at worst, if the Comparator is properly normalized. Thus, assuming
* sequence lengths m and n respectively, we divide the raw distance by max{m, n} for
* normalization.
*
* @param the class of elements in the first input sequence.
* @param the class of elements in the second input sequence.
* @param d the raw alignment distance between sequences a and b in the range [0,infinity)
* @param a the left-hand input sequence.
* @param b the right-hand input sequence.
*
* @return the normalized alignment distance between sequences a and b in the range [0,1].
*/
public static double normalizeDissimilarity(double d, @NonNull final List a, @NonNull final List b) {
final int normalization = Math.max(a.size(), b.size());
if (normalization == 0) {
if (d != 0) {
throw new IllegalArgumentException("Unexpected internal state: "
+ "Two aligned empty sequnces lead to a dissimilarity "
+ "other than 0.");
}
return 0;
} else {
return d / normalization;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy