de.citec.tcs.alignment.AlignmentSpecification Maven / Gradle / Ivy
/*
* TCS Alignment Toolbox
*
* Copyright (C) 2013-2015
* Benjamin Paaßen, Georg Zentgraf
* AG Theoretical Computer Science
* Centre of Excellence Cognitive Interaction Technology (CITEC)
* University of Bielefeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.tcs.alignment;
import de.citec.tcs.alignment.comparators.Comparator;
import de.citec.tcs.alignment.comparators.GapComparator;
import de.citec.tcs.alignment.comparators.SkipComparator;
import de.citec.tcs.alignment.sequence.IndexingScheme;
import de.citec.tcs.alignment.sequence.KeywordSpecification;
import de.citec.tcs.alignment.sequence.Node;
import de.citec.tcs.alignment.sequence.NodeSpecification;
import java.util.Arrays;
/**
* The alignment specification defines which keywords of the input sequences
* shall be used and which comparator is to be used to calculate the local
* distance between two values for that keyword.
*
* More formally speaking: Let K be the set of keywords specified by the
* NodeSpecification that is the basis for the aligned sequences. Then the
* AlignmentSpecification specifies a subset K' of K of keywords that are
* actually relevant for the alignment and it specifies for each k in K' a
* comparator c_k that maps values from the space specified in the
* NodeSpecification for the keyword to the interval [0,1].
*
* Furthermore it specifies a weighting for the keywords in K'. Please refer to
* the respective setter method for more information.
*
* @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
*/
public class AlignmentSpecification extends IndexingScheme implements Cloneable {
private final NodeSpecification nodeSpecification;
private final int[] originalIndices;
private final Comparator[] comparators;
private double[] weighting;
/**
* Constructs a copy of the given other AlignmentSpecification. This
* will create a copy of the internal comparators array and the weighting
* array as well. The originalIndices array and the nodeSpecification will
* not be copied, though. Just the reference is copied. Any changes
* to the underlying NodeSpecification will affect this copy as well.
*
* This constructor is also used by the clone method.
*
* @param other another AlignmentSpecification that shall be copied.
*/
public AlignmentSpecification(AlignmentSpecification other) {
super(other);
this.nodeSpecification = other.nodeSpecification;
this.originalIndices = other.originalIndices;
this.comparators = new Comparator[other.comparators.length];
System.arraycopy(other.comparators, 0, this.comparators, 0, this.comparators.length);
this.weighting = new double[other.weighting.length];
System.arraycopy(other.weighting, 0, this.weighting, 0, this.weighting.length);
}
/**
*
* @param nodeSpecification The node specification that is the basis of all
* sequences for this alignment.
* @param comparators the comparators that shall be used for the keywords.
* Please note that these have to match the types of the keywords.
*/
public AlignmentSpecification(NodeSpecification nodeSpecification,
Comparator[] comparators) {
this(nodeSpecification, nodeSpecification.getKeywords(), comparators);
}
/**
*
* @param nodeSpecification The node specification that is the basis of all
* sequences for this alignment.
* @param keywords The keywords that shall be used in this alignment. This
* has to be a subset of the keywords in the node specification.
* @param comparators the comparators that shall be used for the keywords.
* Please note that these have to match the types of the keywords.
*/
public AlignmentSpecification(NodeSpecification nodeSpecification,
String[] keywords, Comparator[] comparators) {
super(keywords);
this.nodeSpecification = nodeSpecification;
final int K = keywords.length;
if (K != comparators.length) {
throw new RuntimeException(
"The number of keywords and the number of comparators given do not match!");
}
this.comparators = comparators;
this.originalIndices = new int[K];
for (int k = 0; k < K; k++) {
if (!nodeSpecification.hasKeyword(keywords[k])) {
throw new RuntimeException("The keyword " + keywords[k]
+ " is not contained in the given NodeSpecification!");
}
final KeywordSpecification keywordSpec = nodeSpecification.getKeywordSpecification(
keywords[k]);
if (keywordSpec.getType() != comparators[k].getType()) {
throw new RuntimeException("The comparator given for keyword " + keywords[k]
+ " has the wrong type!");
}
this.originalIndices[k] = nodeSpecification.getKeywordIndex(keywords[k]);
}
this.weighting = new double[K];
for (int k = 0; k < K; k++) {
this.weighting[k] = 1. / (double) K;
}
}
/**
*
* @param nodeSpecification The node specification that is the basis of all
* sequences for this alignment.
* @param keywords The keywords that shall be used in this alignment. This
* has to be a subset of the keywords in the node specification.
* @param comparators the comparators that shall be used for the keywords.
* Please note that these have to match the types of the keywords.
* @param weighting the weights that highlight the importance of certain
* keywords/features. These have to be double values between 0 and 1 that
* add up to 1.
*/
public AlignmentSpecification(NodeSpecification nodeSpecification,
String[] keywords, Comparator[] comparators, double[] weighting) {
super(keywords);
this.nodeSpecification = nodeSpecification;
if (keywords.length != comparators.length) {
throw new RuntimeException(
"The number of keywords and the number of comparators given do not match!");
}
if (keywords.length != weighting.length) {
throw new RuntimeException(
"The number of keywords and the number of weights given do not match!");
}
this.comparators = comparators;
this.weighting = weighting;
this.originalIndices = new int[keywords.length];
for (int k = 0; k < keywords.length; k++) {
if (!nodeSpecification.hasKeyword(keywords[k])) {
throw new RuntimeException("The keyword " + keywords[k]
+ " is not contained in the given NodeSpecification!");
}
final KeywordSpecification keywordSpec = nodeSpecification.getKeywordSpecification(
keywords[k]);
if (keywordSpec.getType() != comparators[k].getType()) {
throw new RuntimeException("The comparator given for keyword " + keywords[k]
+ " has the wrong type!");
}
this.originalIndices[k] = nodeSpecification.getKeywordIndex(keywords[k]);
}
if (!isValidWeighting(weighting)) {
throw new RuntimeException("The given weighting is invalid!");
}
}
/**
*
* @return The NodeSpecification that defines the possible keywords and
* types used within this Alignment.
*/
public NodeSpecification getNodeSpecification() {
return nodeSpecification;
}
/**
* Sets the comparator for a given keyword.
*
* @param keyword a keyword that is used in this Alignment.
* @param comparator a fitting Comparator that defines a local normalized
* distance on the values for the given keyword.
*/
public void setComparator(final String keyword, final Comparator comparator) {
setComparator(getKeywordIndex(keyword), comparator);
}
/**
* Sets the comparator for a given keyword.
*
* @param index a keyword index that is used in this Alignment.
* @param comparator a fitting Comparator that defines a local normalized
* distance on the values for the given keyword.
*/
public void setComparator(final int index, final Comparator comparator) {
if (nodeSpecification.getKeywordSpecification(getOriginalIndex(index)).getType() != comparator.getType()) {
throw new RuntimeException("The given comparator has not the correct type!");
}
comparators[index] = comparator;
}
/**
* Returns the Comparator that is currently set for the given keyword.
*
* @param keyword a keyword that is used in this Alignment.
*
* @return the Comparator that is currently set for the given keyword.
*/
public Comparator getComparator(final String keyword) {
return getComparator(getKeywordIndex(keyword));
}
/**
* Returns the Comparator that is currently set for the given keyword.
*
* @param index a keyword index that is used in this Alignment.
*
* @return the Comparator that is currently set for the given keyword.
*/
public Comparator getComparator(int index) {
return comparators[index];
}
/**
*
* This is semantically equivalent to
* getNodeSpecification().getKeywordIndex(getKeyword(index)) but this is
* preprocessed here for better performance.
*
* @param index a keyword index in this AlignmentSpecification.
*
* @return The index of this keyword according to the indexing scheme of the
* node specification.
*/
public int getOriginalIndex(int index) {
return originalIndices[index];
}
/**
* This sets the current weighting of keywords. The input array has to have
* the same number of entries as there are keywords in this alignment and it
* has to be a valid weighting according to the isValidWeighting method.
*
* This defines in a way the relevance of certain keywords. The local cost
* of an alignment Operation is given as the product of this weighting
* vector w and the vector of costs that the comparators give.
*
* Thus a weight of 0 means that a keyword is disregarded, while a high
* weight (close to 1) means, that the Operation cost is dominated by this
* Comparator.
*
* @param weighting a double vector fulfilling the criteria given above.
*/
public void setWeighting(double[] weighting) {
if (weighting.length != this.weighting.length) {
throw new RuntimeException("The given weighting has the wrong length!");
}
if (!isValidWeighting(weighting)) {
throw new RuntimeException("The given weighting is invalid!");
}
this.weighting = weighting;
}
/**
* Returns the current keyword weighting. Please refer to the setWeighting
* method for more information.
*
* @return the current keyword weighting.
*/
public double[] getWeighting() {
return weighting;
}
/**
* This calculates the non-weighted costs for replacing the single values in
* node a by the values in node b.
*
* @param a The node from the left sequence.
* @param b The node from the right sequence.
*
* @return the non-weighted costs for replacing the single values in node a
* by the values in node b.
*/
public double[] calculateReplacementCosts(final Node a, final Node b) {
final double[] costs = new double[size()];
for (int k = 0; k < costs.length; k++) {
costs[k] = comparators[k].compare(
a.getValue(originalIndices[k]),
b.getValue(originalIndices[k]));
}
return costs;
}
/**
* This calculates the non-weighted costs for deleting the single values in
* node a.
*
* @param a The node from the left sequence.
*
* @return the non-weighted costs for deleting the single values in node a.
*/
public double[] calculateDeletionCosts(final Node a) {
final double[] costs = new double[size()];
for (int k = 0; k < costs.length; k++) {
final GapComparator comp = (GapComparator) comparators[k];
costs[k] = comp.delete(a.getValue(originalIndices[k]));
}
return costs;
}
/**
* This calculates the non-weighted costs for inserting the single values of
* node b.
*
* @param b The node from the right sequence.
*
* @return the non-weighted costs for inserting the single values of node b.
*/
public double[] calculateInsertionCosts(final Node b) {
double[] costs = new double[size()];
for (int k = 0; k < costs.length; k++) {
final GapComparator comp = (GapComparator) comparators[k];
costs[k] = comp.insert(b.getValue(originalIndices[k]));
}
return costs;
}
/**
* This calculates the non-weighted costs for skip-deleting the single
* values in node a.
*
* @param a The node from the left sequence.
*
* @return the non-weighted costs for skip-deleting the single values in
* node a.
*/
public double[] calculateSkipDeletionCosts(final Node a) {
final double[] costs = new double[size()];
for (int k = 0; k < costs.length; k++) {
final SkipComparator comp = (SkipComparator) comparators[k];
costs[k] = comp.skipDelete(a.getValue(originalIndices[k]));
}
return costs;
}
/**
* This calculates the non-weighted costs for skip-inserting the single
* values of node b.
*
* @param b The node from the right sequence.
*
* @return the non-weighted costs for skip-inserting the single values of
* node b.
*/
public double[] calculateSkipInsertionCosts(final Node b) {
double[] costs = new double[size()];
for (int k = 0; k < costs.length; k++) {
final SkipComparator comp = (SkipComparator) comparators[k];
costs[k] = comp.skipInsert(b.getValue(originalIndices[k]));
}
return costs;
}
/**
* This checks whether the given weighting is a valid one. A valid weighting
* fulfils the following conditions:
* - Its entries are in the interval [0,1].
* - The entries add up to 1
*
* This makes a weighting in essence to a probability distribution.
*
* @param weighting a double array that might be a weighting.
*
* @return true if it is a weighting according to the criteria given above
* and false otherwise.
*/
public static boolean isValidWeighting(final double[] weighting) {
double sum = 0;
for (final double weight : weighting) {
if (weight < 0 || weight > 1) {
return false;
}
sum += weight;
}
if (Math.abs(sum - 1) > 1E-3) {
return false;
}
return true;
}
/**
* {@inheritDoc }
*/
@Override
public int hashCode() {
int hash = 3;
hash = 47 * hash + (this.nodeSpecification != null ? this.nodeSpecification.hashCode() : 0);
hash = 47 * hash + Arrays.hashCode(this.originalIndices);
hash = 47 * hash + Arrays.deepHashCode(this.comparators);
hash = 47 * hash + Arrays.hashCode(this.weighting);
return hash;
}
/**
* {@inheritDoc }
*/
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
final AlignmentSpecification other = (AlignmentSpecification) obj;
if (this.nodeSpecification != other.nodeSpecification
&& (this.nodeSpecification == null
|| !this.nodeSpecification.equals(other.nodeSpecification))) {
return false;
}
if (!Arrays.equals(this.originalIndices, other.originalIndices)) {
return false;
}
if (!Arrays.deepEquals(this.comparators, other.comparators)) {
return false;
}
if (!Arrays.equals(this.weighting, other.weighting)) {
return false;
}
return true;
}
@Override
public String toString() {
if (size() == 0) {
return "";
}
final StringBuilder builder = new StringBuilder();
for (int k = 0; k < size(); k++) {
builder.append(getKeyword(k));
builder.append(" (weight=");
builder.append(Double.toString(weighting[k]));
builder.append(") : ");
builder.append(comparators[k].toString());
builder.append("\n");
}
builder.delete(builder.length() - 1, builder.length());
return builder.toString();
}
@Override
protected Object clone() throws CloneNotSupportedException {
return new AlignmentSpecification(this);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy