de.citec.tcs.alignment.AlignmentSpecification Maven / Gradle / Ivy

Go to download
/* 
 * TCS Alignment Toolbox
 * 
 * Copyright (C) 2013-2015
 * Benjamin Paaßen, Georg Zentgraf
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment;

import de.citec.tcs.alignment.comparators.Comparator;
import de.citec.tcs.alignment.comparators.GapComparator;
import de.citec.tcs.alignment.comparators.SkipComparator;
import de.citec.tcs.alignment.sequence.IndexingScheme;
import de.citec.tcs.alignment.sequence.KeywordSpecification;
import de.citec.tcs.alignment.sequence.Node;
import de.citec.tcs.alignment.sequence.NodeSpecification;
import java.util.Arrays;

/**
 * The alignment specification defines which keywords of the input sequences
 * shall be used and which comparator is to be used to calculate the local
 * distance between two values for that keyword.
 *
 * More formally speaking: Let K be the set of keywords specified by the
 * NodeSpecification that is the basis for the aligned sequences. Then the
 * AlignmentSpecification specifies a subset K' of K of keywords that are
 * actually relevant for the alignment and it specifies for each k in K' a
 * comparator c_k that maps values from the space specified in the
 * NodeSpecification for the keyword to the interval [0,1].
 *
 * Furthermore it specifies a weighting for the keywords in K'. Please refer to
 * the respective setter method for more information.
 *
 * @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
 */
public class AlignmentSpecification extends IndexingScheme implements Cloneable {

	private final NodeSpecification nodeSpecification;
	private final int[] originalIndices;
	private final Comparator[] comparators;
	private double[] weighting;

	/**
	 * Constructs a copy of the given other AlignmentSpecification. This
	 * will create a copy of the internal comparators array and the weighting
	 * array as well. The originalIndices array and the nodeSpecification will
	 * not be copied, though. Just the reference is copied. Any changes
	 * to the underlying NodeSpecification will affect this copy as well.
	 *
	 * This constructor is also used by the clone method.
	 *
	 * @param other another AlignmentSpecification that shall be copied.
	 */
	public AlignmentSpecification(AlignmentSpecification other) {
		super(other);
		this.nodeSpecification = other.nodeSpecification;
		this.originalIndices = other.originalIndices;
		this.comparators = new Comparator[other.comparators.length];
		System.arraycopy(other.comparators, 0, this.comparators, 0, this.comparators.length);
		this.weighting = new double[other.weighting.length];
		System.arraycopy(other.weighting, 0, this.weighting, 0, this.weighting.length);
	}

	/**
	 *
	 * @param nodeSpecification The node specification that is the basis of all
	 * sequences for this alignment.
	 * @param comparators the comparators that shall be used for the keywords.
	 * Please note that these have to match the types of the keywords.
	 */
	public AlignmentSpecification(NodeSpecification nodeSpecification,
			Comparator[] comparators) {
		this(nodeSpecification, nodeSpecification.getKeywords(), comparators);
	}

	/**
	 *
	 * @param nodeSpecification The node specification that is the basis of all
	 * sequences for this alignment.
	 * @param keywords The keywords that shall be used in this alignment. This
	 * has to be a subset of the keywords in the node specification.
	 * @param comparators the comparators that shall be used for the keywords.
	 * Please note that these have to match the types of the keywords.
	 */
	public AlignmentSpecification(NodeSpecification nodeSpecification,
			String[] keywords, Comparator[] comparators) {
		super(keywords);
		this.nodeSpecification = nodeSpecification;
		final int K = keywords.length;
		if (K != comparators.length) {
			throw new RuntimeException(
					"The number of keywords and the number of comparators given do not match!");
		}
		this.comparators = comparators;
		this.originalIndices = new int[K];
		for (int k = 0; k < K; k++) {
			if (!nodeSpecification.hasKeyword(keywords[k])) {
				throw new RuntimeException("The keyword " + keywords[k]
						+ " is not contained in the given NodeSpecification!");
			}
			final KeywordSpecification keywordSpec = nodeSpecification.getKeywordSpecification(
					keywords[k]);
			if (keywordSpec.getType() != comparators[k].getType()) {
				throw new RuntimeException("The comparator given for keyword " + keywords[k]
						+ " has the wrong type!");
			}
			this.originalIndices[k] = nodeSpecification.getKeywordIndex(keywords[k]);
		}
		this.weighting = new double[K];
		for (int k = 0; k < K; k++) {
			this.weighting[k] = 1. / (double) K;
		}
	}

	/**
	 *
	 * @param nodeSpecification The node specification that is the basis of all
	 * sequences for this alignment.
	 * @param keywords The keywords that shall be used in this alignment. This
	 * has to be a subset of the keywords in the node specification.
	 * @param comparators the comparators that shall be used for the keywords.
	 * Please note that these have to match the types of the keywords.
	 * @param weighting the weights that highlight the importance of certain
	 * keywords/features. These have to be double values between 0 and 1 that
	 * add up to 1.
	 */
	public AlignmentSpecification(NodeSpecification nodeSpecification,
			String[] keywords, Comparator[] comparators, double[] weighting) {
		super(keywords);
		this.nodeSpecification = nodeSpecification;
		if (keywords.length != comparators.length) {
			throw new RuntimeException(
					"The number of keywords and the number of comparators given do not match!");
		}
		if (keywords.length != weighting.length) {
			throw new RuntimeException(
					"The number of keywords and the number of weights given do not match!");
		}
		this.comparators = comparators;
		this.weighting = weighting;
		this.originalIndices = new int[keywords.length];
		for (int k = 0; k < keywords.length; k++) {
			if (!nodeSpecification.hasKeyword(keywords[k])) {
				throw new RuntimeException("The keyword " + keywords[k]
						+ " is not contained in the given NodeSpecification!");
			}
			final KeywordSpecification keywordSpec = nodeSpecification.getKeywordSpecification(
					keywords[k]);
			if (keywordSpec.getType() != comparators[k].getType()) {
				throw new RuntimeException("The comparator given for keyword " + keywords[k]
						+ " has the wrong type!");
			}
			this.originalIndices[k] = nodeSpecification.getKeywordIndex(keywords[k]);
		}
		if (!isValidWeighting(weighting)) {
			throw new RuntimeException("The given weighting is invalid!");
		}
	}

	/**
	 *
	 * @return The NodeSpecification that defines the possible keywords and
	 * types used within this Alignment.
	 */
	public NodeSpecification getNodeSpecification() {
		return nodeSpecification;
	}

	/**
	 * Sets the comparator for a given keyword.
	 *
	 * @param keyword a keyword that is used in this Alignment.
	 * @param comparator a fitting Comparator that defines a local normalized
	 * distance on the values for the given keyword.
	 */
	public void setComparator(final String keyword, final Comparator comparator) {
		setComparator(getKeywordIndex(keyword), comparator);
	}

	/**
	 * Sets the comparator for a given keyword.
	 *
	 * @param index a keyword index that is used in this Alignment.
	 * @param comparator a fitting Comparator that defines a local normalized
	 * distance on the values for the given keyword.
	 */
	public void setComparator(final int index, final Comparator comparator) {
		if (nodeSpecification.getKeywordSpecification(getOriginalIndex(index)).getType() != comparator.getType()) {
			throw new RuntimeException("The given comparator has not the correct type!");
		}
		comparators[index] = comparator;
	}

	/**
	 * Returns the Comparator that is currently set for the given keyword.
	 *
	 * @param keyword a keyword that is used in this Alignment.
	 *
	 * @return the Comparator that is currently set for the given keyword.
	 */
	public Comparator getComparator(final String keyword) {
		return getComparator(getKeywordIndex(keyword));
	}

	/**
	 * Returns the Comparator that is currently set for the given keyword.
	 *
	 * @param index a keyword index that is used in this Alignment.
	 *
	 * @return the Comparator that is currently set for the given keyword.
	 */
	public Comparator getComparator(int index) {
		return comparators[index];
	}

	/**
	 *
	 * This is semantically equivalent to
	 * getNodeSpecification().getKeywordIndex(getKeyword(index)) but this is
	 * preprocessed here for better performance.
	 *
	 * @param index a keyword index in this AlignmentSpecification.
	 *
	 * @return The index of this keyword according to the indexing scheme of the
	 * node specification.
	 */
	public int getOriginalIndex(int index) {
		return originalIndices[index];
	}

	/**
	 * This sets the current weighting of keywords. The input array has to have
	 * the same number of entries as there are keywords in this alignment and it
	 * has to be a valid weighting according to the isValidWeighting method.
	 *
	 * This defines in a way the relevance of certain keywords. The local cost
	 * of an alignment Operation is given as the product of this weighting
	 * vector w and the vector of costs that the comparators give.
	 *
	 * Thus a weight of 0 means that a keyword is disregarded, while a high
	 * weight (close to 1) means, that the Operation cost is dominated by this
	 * Comparator.
	 *
	 * @param weighting a double vector fulfilling the criteria given above.
	 */
	public void setWeighting(double[] weighting) {
		if (weighting.length != this.weighting.length) {
			throw new RuntimeException("The given weighting has the wrong length!");
		}
		if (!isValidWeighting(weighting)) {
			throw new RuntimeException("The given weighting is invalid!");
		}
		this.weighting = weighting;
	}

	/**
	 * Returns the current keyword weighting. Please refer to the setWeighting
	 * method for more information.
	 *
	 * @return the current keyword weighting.
	 */
	public double[] getWeighting() {
		return weighting;
	}

	/**
	 * This calculates the non-weighted costs for replacing the single values in
	 * node a by the values in node b.
	 *
	 * @param a The node from the left sequence.
	 * @param b The node from the right sequence.
	 *
	 * @return the non-weighted costs for replacing the single values in node a
	 * by the values in node b.
	 */
	public double[] calculateReplacementCosts(final Node a, final Node b) {
		final double[] costs = new double[size()];
		for (int k = 0; k < costs.length; k++) {
			costs[k] = comparators[k].compare(
					a.getValue(originalIndices[k]),
					b.getValue(originalIndices[k]));
		}
		return costs;
	}

	/**
	 * This calculates the non-weighted costs for deleting the single values in
	 * node a.
	 *
	 * @param a The node from the left sequence.
	 *
	 * @return the non-weighted costs for deleting the single values in node a.
	 */
	public double[] calculateDeletionCosts(final Node a) {
		final double[] costs = new double[size()];
		for (int k = 0; k < costs.length; k++) {
			final GapComparator comp = (GapComparator) comparators[k];
			costs[k] = comp.delete(a.getValue(originalIndices[k]));
		}
		return costs;
	}

	/**
	 * This calculates the non-weighted costs for inserting the single values of
	 * node b.
	 *
	 * @param b The node from the right sequence.
	 *
	 * @return the non-weighted costs for inserting the single values of node b.
	 */
	public double[] calculateInsertionCosts(final Node b) {
		double[] costs = new double[size()];
		for (int k = 0; k < costs.length; k++) {
			final GapComparator comp = (GapComparator) comparators[k];
			costs[k] = comp.insert(b.getValue(originalIndices[k]));
		}
		return costs;
	}

	/**
	 * This calculates the non-weighted costs for skip-deleting the single
	 * values in node a.
	 *
	 * @param a The node from the left sequence.
	 *
	 * @return the non-weighted costs for skip-deleting the single values in
	 * node a.
	 */
	public double[] calculateSkipDeletionCosts(final Node a) {
		final double[] costs = new double[size()];
		for (int k = 0; k < costs.length; k++) {
			final SkipComparator comp = (SkipComparator) comparators[k];
			costs[k] = comp.skipDelete(a.getValue(originalIndices[k]));
		}
		return costs;
	}

	/**
	 * This calculates the non-weighted costs for skip-inserting the single
	 * values of node b.
	 *
	 * @param b The node from the right sequence.
	 *
	 * @return the non-weighted costs for skip-inserting the single values of
	 * node b.
	 */
	public double[] calculateSkipInsertionCosts(final Node b) {
		double[] costs = new double[size()];
		for (int k = 0; k < costs.length; k++) {
			final SkipComparator comp = (SkipComparator) comparators[k];
			costs[k] = comp.skipInsert(b.getValue(originalIndices[k]));
		}
		return costs;
	}

	/**
	 * This checks whether the given weighting is a valid one. A valid weighting
	 * fulfils the following conditions:
	 * Its entries are in the interval [0,1].
	 * The entries add up to 1
	 *
	 * This makes a weighting in essence to a probability distribution.
	 *
	 * @param weighting a double array that might be a weighting.
	 *
	 * @return true if it is a weighting according to the criteria given above
	 * and false otherwise.
	 */
	public static boolean isValidWeighting(final double[] weighting) {
		double sum = 0;
		for (final double weight : weighting) {
			if (weight < 0 || weight > 1) {
				return false;
			}
			sum += weight;
		}
		if (Math.abs(sum - 1) > 1E-3) {
			return false;
		}
		return true;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public int hashCode() {
		int hash = 3;
		hash = 47 * hash + (this.nodeSpecification != null ? this.nodeSpecification.hashCode() : 0);
		hash = 47 * hash + Arrays.hashCode(this.originalIndices);
		hash = 47 * hash + Arrays.deepHashCode(this.comparators);
		hash = 47 * hash + Arrays.hashCode(this.weighting);
		return hash;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public boolean equals(Object obj) {
		if (obj == null) {
			return false;
		}
		if (getClass() != obj.getClass()) {
			return false;
		}
		final AlignmentSpecification other = (AlignmentSpecification) obj;
		if (this.nodeSpecification != other.nodeSpecification
				&& (this.nodeSpecification == null
				|| !this.nodeSpecification.equals(other.nodeSpecification))) {
			return false;
		}
		if (!Arrays.equals(this.originalIndices, other.originalIndices)) {
			return false;
		}
		if (!Arrays.deepEquals(this.comparators, other.comparators)) {
			return false;
		}
		if (!Arrays.equals(this.weighting, other.weighting)) {
			return false;
		}
		return true;
	}

	@Override
	public String toString() {
		if (size() == 0) {
			return "";
		}
		final StringBuilder builder = new StringBuilder();
		for (int k = 0; k < size(); k++) {
			builder.append(getKeyword(k));
			builder.append(" (weight=");
			builder.append(Double.toString(weighting[k]));
			builder.append(") : ");
			builder.append(comparators[k].toString());
			builder.append("\n");
		}
		builder.delete(builder.length() - 1, builder.length());
		return builder.toString();
	}

	@Override
	protected Object clone() throws CloneNotSupportedException {
		return new AlignmentSpecification(this);
	}
}