All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.citec.tcs.alignment.AbstractStrictDTWAlgorithm Maven / Gradle / Ivy

Go to download

This module containts standard implementations of AlignmentAlgorithms. In contrast to the adp module these implementations are hand-tailored for some specific algorithms and thus achieve somewhat faster runtime (a constant factor of maybe 30-50 percent).

The newest version!
/* 
 * TCS Alignment Toolbox Version 3
 * 
 * Copyright (C) 2016
 * Benjamin Paaßen
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment;

import de.citec.tcs.alignment.comparators.Comparator;
import de.citec.tcs.alignment.comparators.OperationType;
import java.util.List;
import lombok.NonNull;

/**
 * This is an abstract super class implementing the dynamic time warping (DTW) dissimilarity.
 *
 * See also: https://en.wikipedia.org/wiki/Dynamic_time_warping
 *
 * Given two sequences (x_1, ... , x_M) ∈ X* and (y_1, ... , y_N) ∈ Y^* as well as a
 * Comparator d, this distance D is defined via four (recursive) equations:
 *
 * 
    *
  1. D((x_1), (y_1)) := d(x_1, y_1)
  2. *
  3. D((x_1, ..., x_i) , (y_1)) := D((x_i, ... , x_i-1) , (y_1)) + d(x_i, y_1)
  4. *
  5. D((x_1), (y_1, ... , y_j)) := D((x_1) , (y_1, ... , y_j-1)) + d(x_1, y_j)
  6. *
  7. D((x_1, ..., x_i), (y_1, ... , y_j)) := min{ * D((x_i, ... , x_i-1) , (y_1, ... , y_j)) + d(x_i, y_j) , * D((x_1, ..., x_i) , (y_1, ... , y_j-1)) + d(x_i, y_j) , * D((x_i, ... , x_i-1) , (y_1, ... , y_j-1)) + d(x_i, y_j) * }
  8. *
* * Equations (2) and (3) as well as the first two minimum contributions in equation (4) correspond * to prolonging one of the input sequences. This construction also implies that the * triangular inequality does not hold for the DTW distance. * * Note that d(x_i, y_j) contributes to any element in the minimum set in equation (4) such that, in * principle, we can compute it outside of the minimum. However, this does only hold if the * comparator d is consistent across the three operations REPLACEMENT, DELETIONREPLACEMENT and * INSERTIONREPLACEMENT. * * Further note that in the worst case d(x_i, y_j) may return 1 for any input (as per definition of * a Comparator). In that case the minimum DTW distance is given by replacing all elements in the * left input sequence with all elements in the right input sequence and replacing the rest with * gaps. Thus, the worst-case alignment distance is max{M, N}. In order to obtain an alignment * distance between 0 and 1 we can just return D as specified above, divided by max{M, N}. * * @author Benjamin Paassen - [email protected] * @param the class of the elements in the left input sequence. * @param the class of the elements in the right input sequence. * @param The result class. */ public abstract class AbstractStrictDTWAlgorithm implements AlignmentAlgorithm { private final Class resultClass; private Comparator comparator; private double[][] lastDTWMatrix; public AbstractStrictDTWAlgorithm(@NonNull Class resultClass, @NonNull Comparator comparator) { this.resultClass = resultClass; ComparatorValidator.validate(this, comparator); this.comparator = comparator; } @Override public Class getResultClass() { return resultClass; } @Override public Comparator getComparator() { return comparator; } @Override public void setComparator(@NonNull Comparator comparator) { ComparatorValidator.validate(this, comparator); this.comparator = comparator; } @Override public boolean requires(@NonNull OperationType type) { switch (type) { case REPLACEMENT: case DELETIONREPLACEMENT: case INSERTIONREPLACEMENT: return true; default: return false; } } /** * This returns the dynamic programming matrix that was calculated in the last call of * calculateAlignment. * * @return the dynamic programming matrix that was calculated in the last call of * calculateAlignment. */ public double[][] getLastDTWMatrix() { return lastDTWMatrix; } @Override public R calculateAlignment(@NonNull List a, @NonNull List b) { if (a.isEmpty()) { throw new IllegalArgumentException("The first given sequence is empty!"); } if (b.isEmpty()) { throw new IllegalArgumentException("The second given sequence is emtpy!"); } final int m = a.size(); final int n = b.size(); /* * Pre-cache the replacement costs. */ final double[][] repCosts = new double[m][n]; final double[][] delRepCosts; final double[][] insRepCosts; if (getComparator().hasCoherentReplacementCost()) { delRepCosts = repCosts; insRepCosts = repCosts; } else { delRepCosts = new double[m][n]; insRepCosts = new double[m][n]; } { X x; Y y; for (int i = 0; i < m; i++) { x = a.get(i); for (int j = 0; j < n; j++) { y = b.get(j); repCosts[i][j] = getComparator().compare(OperationType.REPLACEMENT, x, y); if (!getComparator().hasCoherentReplacementCost()) { delRepCosts[i][j] = getComparator().compare(OperationType.DELETIONREPLACEMENT, x, y); insRepCosts[i][j] = getComparator().compare(OperationType.INSERTIONREPLACEMENT, x, y); } } } } final double[][] dtwMatrix = new double[m][n]; /* * initialize the alignment matrix. Note that I do not use the classic * trick to initialize the first row and column with infinity. First * this blows up the matrix by a linear summand, second it is an * unnecessary workaround from my perspective, because you can exploit * some nice properties of the second row and column (which I treat as * the first one) to initialize it directly. */ //initialize the first entry, which is just the comparison of the first //entries of both sequences. Note that the sequences are not allowed to be empty. dtwMatrix[0][0] = repCosts[0][0]; //initialize first column. Here we can only elongate the first sequence //until the second one ends. for (int i = 1; i < m; i++) { dtwMatrix[i][0] = dtwMatrix[i - 1][0] + delRepCosts[i][0]; } //initialize the first row. Here we can only elongate the second sequence //until the first one ends. for (int j = 1; j < n; j++) { dtwMatrix[0][j] = dtwMatrix[0][j - 1] + insRepCosts[0][j]; } /* * use the actual DTW algorithm */ for (int i = 1; i < m; i++) { for (int j = 1; j < n; j++) { //calculate the new DTW matrix entry. dtwMatrix[i][j] = Math.min( dtwMatrix[i - 1][j - 1] + repCosts[i][j], Math.min( dtwMatrix[i - 1][j] + delRepCosts[i][j], dtwMatrix[i][j - 1] + insRepCosts[i][j] ) ); } } lastDTWMatrix = dtwMatrix; return transformToResult(dtwMatrix, repCosts, delRepCosts, insRepCosts, a, b); } /** * This method has to be implemented by sub classes to transform a calculated dynamic * programming matrix to a valid result of that implementation. This also has to implement the * backtracing if necessary. * * @param dtwMatrix a dynamic programming matrix calculated with respect to both input * sequences. * @param repCosts the matrix of pairwise REPLACEMENT costs for each pairwise combination of * elements in the input sequences. * @param delRepCosts the matrix of pairwise DELETIONREPLACEMENT costs for each pairwise * combination of elements in the input sequences. * @param insRepCosts the matrix of pairwise INSERTIONREPLACEMENT costs for each pairwise * combination of elements in the input sequences. * @param a the first input sequence. * @param b the second input sequence. * * @return a valid result for this algorithm implementation. */ public abstract R transformToResult(@NonNull double[][] dtwMatrix, @NonNull double[][] repCosts, @NonNull double[][] delRepCosts, @NonNull double[][] insRepCosts, @NonNull final List a, @NonNull final List b); /** * Normalizes the given raw distance by the worst case that could occur in an alignment of the * two sequences: In the worst case, we replace all elements in a with elements in b and * delete-replace/insert-replace the remaining elements in the longer sequence. The cost of * those operations can be 1 at worst, if the Comparator is properly normalized. Thus, assuming * sequence lengths m and n respectively, we divide the raw distance by max{m, n} for * normalization. * * @param the class of elements in the first input sequence. * @param the class of elements in the second input sequence. * @param d the raw alignment distance between sequences a and b in the range [0,infinity) * @param a the left-hand input sequence. * @param b the right-hand input sequence. * * @return the normalized alignment distance between sequences a and b in the range [0,1]. */ public static double normalizeDissimilarity(double d, @NonNull final List a, @NonNull final List b) { final int normalization = Math.max(a.size(), b.size()); if (normalization == 0) { if (d != 0) { throw new IllegalArgumentException("Unexpected internal state: " + "Two aligned empty sequnces lead to a dissimilarity " + "other than 0."); } return 0; } else { return d / normalization; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy