de.citec.tcs.alignment.AbstractStrictDTWAlgorithm Maven / Gradle / Ivy

Go to download
/* 
 * TCS Alignment Toolbox
 * 
 * Copyright (C) 2013-2015
 * Benjamin Paaßen, Georg Zentgraf
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment;

import de.citec.tcs.alignment.comparators.Comparator;
import de.citec.tcs.alignment.sequence.Node;
import de.citec.tcs.alignment.sequence.Sequence;
import de.citec.tcs.alignment.sequence.Value;
import java.util.ArrayList;

/**
 * This serves as an abstract super class for strict Dynamic Time Warping
 * implementations. For soft ones please refer to the
 * DynamicTimeWarpingAlgorithm class. Note that this algorithm calculates a
 * distance, while the soft DynamicTimeWarping algorithm calculates a
 * similarity.
 *
 * This implements the forward part of DTW, meaning the calculation of the
 * dynamic programming matrix. The backtracing has to be implemented by
 * subclasses (if it is required at all).
 *
 * @author Benjamin Paassen - [email protected]
 */
public abstract class AbstractStrictDTWAlgorithm implements AlignmentAlgorithm {

	private final Class resultClass;
	private final AlignmentSpecification specification;
	private double[][] lastDTWMatrix;
	private double weightThreshold = 0;

	public AbstractStrictDTWAlgorithm(Class resultClass, AlignmentSpecification specification) {
		this.resultClass = resultClass;
		this.specification = specification;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public Class getResultClass() {
		return resultClass;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public AlignmentSpecification getSpecification() {
		return specification;
	}

	/**
	 * This returns the dynamic programming matrix that was calculated
	 * in the last call of calculateAlignment.
	 *
	 * @return the dynamic programming matrix that was calculated
	 * in the last call of calculateAlignment.
	 */
	public double[][] getLastDTWMatrix() {
		return lastDTWMatrix;
	}

	/**
	 * Set a weight threshold (between 0 and 1) that determines which keywords
	 * should be ignored during calculation because their weight is negligible.
	 *
	 * The default value is 0.
	 *
	 * @param weightThreshold a weight threshold (between 0 and 1)
	 */
	public void setWeightThreshold(double weightThreshold) {
		if (weightThreshold < 0 || weightThreshold > 1) {
			throw new RuntimeException("A weight threshold has to be between 0 and 1!");
		}
		this.weightThreshold = weightThreshold;
	}

	/**
	 *
	 * @return The current weight threshold (0 per default).
	 */
	public double getWeightThreshold() {
		return weightThreshold;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public R calculateAlignment(Sequence a, Sequence b) {

		//check validity
		if (a.getNodes().isEmpty()) {
			throw new IllegalArgumentException("The first given sequence is empty!");
		}
		if (b.getNodes().isEmpty()) {
			throw new IllegalArgumentException("The second given sequence is emtpy!");
		}
		if (a.getNodeSpecification() != specification.getNodeSpecification()
				&& !a.getNodeSpecification().equals(specification.getNodeSpecification())) {
			throw new IllegalArgumentException(
					"The first input sequence has an unexpected node specification!");
		}
		if (a.getNodeSpecification() != b.getNodeSpecification()
				&& !a.getNodeSpecification().equals(b.getNodeSpecification())) {
			throw new IllegalArgumentException(
					"The node specifications of both input sequences to not match!");
		}

		//identify the subset of comparators that have an above threshold weighting.
		final ArrayList relevantIndices = new ArrayList();
		for (int k = 0; k < specification.size(); k++) {
			if (specification.getWeighting()[k] > weightThreshold) {
				relevantIndices.add(k);
			}
		}

		final Comparator[] comparators = new Comparator[relevantIndices.size()];
		final double[] weights = new double[relevantIndices.size()];
		final int[] originalIndices = new int[relevantIndices.size()];
		for (int k = 0; k < comparators.length; k++) {
			comparators[k] = specification.getComparator(relevantIndices.get(k));
			weights[k] = specification.getWeighting()[relevantIndices.get(k)];
			originalIndices[k] = specification.getOriginalIndex(relevantIndices.get(k));
		}

		final int m = a.getNodes().size();
		final int n = b.getNodes().size();

		double local = 0;
		Node aNode, bNode;

		final double[][] dtwMatrix = new double[m][n];
		/*
		 * initialize the alignment matrix. Note that I do not use the classic
		 * trick to initialize the first row and column with infinity. First
		 * this blows up the matrix by a linear summand, second it is an
		 * unnecessary workaround from my perspective, because you can exploit
		 * some nice properties of the second row and column (which I treat as
		 * the first one) to initialize it directly.
		 */
		{

			//initialize the first entry, which is just the comparison of the first
			//entries of both vectors. Note that the sequences are not allowed to be empty.
			final Value[] aValues = new Value[comparators.length];
			final Value[] bValues = new Value[comparators.length];
			aNode = a.getNodes().get(0);
			bNode = b.getNodes().get(0);
			for (int k = 0; k < comparators.length; k++) {
				aValues[k] = aNode.getValue(originalIndices[k]);
				bValues[k] = bNode.getValue(originalIndices[k]);
				local += weights[k] * comparators[k].compare(
						aValues[k], bValues[k]);
			}
			dtwMatrix[0][0] = local;

			//initialize first column. Here we can only elongate the first sequence
			//until the second one ends.
			for (int i = 1; i < m; i++) {
				local = 0;
				aNode = a.getNodes().get(i);
				for (int k = 0; k < comparators.length; k++) {
					local += weights[k] * comparators[k].compare(
							aNode.getValue(originalIndices[k]),
							bValues[k]);
				}
				dtwMatrix[i][0] = dtwMatrix[i - 1][0] + local;
			}
			//initialize the first row. Here we can only elongate the second sequence
			//until the first one ends.
			for (int j = 1; j < n; j++) {
				local = 0;
				bNode = b.getNodes().get(j);
				for (int k = 0; k < comparators.length; k++) {
					local += weights[k] * comparators[k].compare(
							aValues[k],
							bNode.getValue(originalIndices[k]));
				}
				dtwMatrix[0][j] = dtwMatrix[0][j - 1] + local;
			}
		}
		/*
		 * use the actual DTW algorithm
		 */
		//buffer the values of the first sequence.
		final Value[] aValues = new Value[comparators.length];
		for (int i = 1; i < m; i++) {
			aNode = a.getNodes().get(i);
			for (int k = 0; k < comparators.length; k++) {
				aValues[k] = aNode.getValue(originalIndices[k]);
			}
			for (int j = 1; j < n; j++) {
				//calculate the local cost.
				local = 0;
				bNode = b.getNodes().get(j);
				for (int k = 0; k < comparators.length; k++) {
					local += comparators[k].compare(
							aValues[k],
							bNode.getValue(originalIndices[k]));
				}
				//calculate the DTW matrix entry.
				dtwMatrix[i][j] = local + Math.min(
						dtwMatrix[i - 1][j - 1],
						Math.min(dtwMatrix[i - 1][j],
								dtwMatrix[i][j - 1])
				);
			}
		}
		lastDTWMatrix = dtwMatrix;
		return transformToResult(dtwMatrix, a, b);
	}

	/**
	 * This method has to be implemented by sub classes to transform
	 * a calculated dynamic programming matrix to a valid result of
	 * that implementation. This also has to implement the backtracing if
	 * necessary.
	 *
	 * @param dtwMatrix a dynamic programming matrix calculated with respect to
	 * both input sequences.
	 * @param a the first input sequence.
	 * @param b the second input sequence.
	 * @return a valid result for this algorithm implementation.
	 */
	public abstract R transformToResult(double[][] dtwMatrix,
			final Sequence a, final Sequence b);

}