
de.citec.tcs.alignment.AbstractGapAlignmentAlgorithm Maven / Gradle / Ivy
/*
* TCS Alignment Toolbox
*
* Copyright (C) 2013-2015
* Benjamin Paaßen, Georg Zentgraf
* AG Theoretical Computer Science
* Centre of Excellence Cognitive Interaction Technology (CITEC)
* University of Bielefeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.tcs.alignment;
import de.citec.tcs.alignment.comparators.GapComparator;
import de.citec.tcs.alignment.sequence.Node;
import de.citec.tcs.alignment.sequence.Sequence;
import de.citec.tcs.alignment.sequence.Value;
import java.lang.reflect.Array;
import java.util.ArrayList;
/**
* This is an abstract super class for GapAlignmentAlgorithms that provides a
* generic implementation of the dynamic programming needed to efficiently
* calculate the alignment.
*
* This alignment supports only one matrix recurrence and thus implicitly
* supports Levenshtein-esque simple edit distances that are calculated only
* using the DP-matrix cells in the immediate neighbourhood as well as the local
* deletion, insertion and replacement costs.
*
* The according DP-algorithm is known by many names, e.g.:
* - Needleman-Wunsch-Algorithm (which is slightly more general)
* - Wagner-Fischer-Algorithm
*
* See also: http://en.wikipedia.org/wiki/Edit_distance
*
* @author Benjamin Paassen - [email protected]
* @param The class for entries of the Dynamic Programming Matrix.
* @param The result class.
*/
public abstract class AbstractGapAlignmentAlgorithm implements GapAlignmentAlgorithm {
private final AlignmentSpecification alignmentSpecification;
private final Class entryClass;
private final Class resultClass;
private X[][] lastAlignmentMatrix;
private double weightThreshold = 0;
public AbstractGapAlignmentAlgorithm(AlignmentSpecification alignmentSpecification,
Class entryClass,
Class resultClass) {
this.alignmentSpecification = alignmentSpecification;
this.entryClass = entryClass;
this.resultClass = resultClass;
}
/**
* Returns the class of matrix entries.
*
* @return the class of matrix entries.
*/
public Class getEntryClass() {
return entryClass;
}
/**
* {@inheritDoc }
*/
@Override
public Class getResultClass() {
return resultClass;
}
/**
* The last matrix that was calculated using this algorithm.
*
* @return last matrix that was calculated using this algorithm.
*/
public X[][] getLastAlignmentMatrix() {
return lastAlignmentMatrix;
}
/**
* Set a weight threshold (between 0 and 1) that determines which keywords
* should be ignored during calculation because their weight is negligible.
*
* The default value is 0.
*
* @param weightThreshold a weight threshold (between 0 and 1)
*/
public void setWeightThreshold(double weightThreshold) {
if (weightThreshold < 0 || weightThreshold > 1) {
throw new RuntimeException("A weight threshold has to be between 0 and 1!");
}
this.weightThreshold = weightThreshold;
}
/**
*
* @return The current weight threshold (0 per default).
*/
public double getWeightThreshold() {
return weightThreshold;
}
/**
* {@inheritDoc }
*/
public AlignmentSpecification getSpecification() {
return alignmentSpecification;
}
/**
* {@inheritDoc }
*/
@Override
public R calculateAlignment(final Sequence a, final Sequence b) {
//check validity
if (a.getNodeSpecification() != alignmentSpecification.getNodeSpecification()
&& !a.getNodeSpecification().equals(alignmentSpecification.getNodeSpecification())) {
throw new IllegalArgumentException(
"The first input sequence has an unexpected node specification!");
}
if (a.getNodeSpecification() != b.getNodeSpecification()
&& !a.getNodeSpecification().equals(b.getNodeSpecification())) {
throw new IllegalArgumentException(
"The node specifications of both input sequences to not match!");
}
//check validity of comparators.
for (int k = 0; k < alignmentSpecification.size(); k++) {
if (!(alignmentSpecification.getComparator(k) instanceof GapComparator)) {
throw new UnsupportedOperationException("The comparator for keyword "
+ alignmentSpecification.getKeyword(k) + " does not support gaps!");
}
}
//identify the subset of comparators that have an above threshold weighting.
final ArrayList relevantIndices = new ArrayList();
for (int k = 0; k < alignmentSpecification.size(); k++) {
if (alignmentSpecification.getWeighting()[k] > weightThreshold) {
relevantIndices.add(k);
}
}
final GapComparator[] comparators = new GapComparator[relevantIndices.size()];
final double[] weights = new double[relevantIndices.size()];
final int[] originalIndices = new int[relevantIndices.size()];
for (int k = 0; k < comparators.length; k++) {
comparators[k] = (GapComparator) alignmentSpecification.getComparator(relevantIndices.get(k));
weights[k] = alignmentSpecification.getWeighting()[relevantIndices.get(k)];
originalIndices[k] = alignmentSpecification.getOriginalIndex(relevantIndices.get(k));
}
final int m = a.getNodes().size();
final int n = b.getNodes().size();
double delLocal, insLocal, repLocal;
final Value[] aValues = new Value[comparators.length];
final Value[] bValues = new Value[comparators.length];
Node aNode, bNode;
//initialize the alignment matrix.
final X[][] alignMat = createGenericMatrix(m + 1, n + 1, entryClass);
//initialize first entry.
alignMat[0][0] = createInitial();
//initialize first column, which means the deletion of the entire sequence a.
for (int i = 1; i <= m; i++) {
final X delOld = alignMat[i - 1][0];
delLocal = 0;
aNode = a.getNodes().get(i - 1);
for (int k = 0; k < comparators.length; k++) {
aValues[k] = aNode.getValue(originalIndices[k]);
delLocal += weights[k] * comparators[k].delete(aValues[k]);
}
alignMat[i][0] = createDelInitial(delOld, i, delLocal);
}
//initialize the first row, which means the insertion of the entire sequence b.
for (int j = 1; j <= n; j++) {
final X insOld = alignMat[0][j - 1];
insLocal = 0;
bNode = b.getNodes().get(j - 1);
for (int k = 0; k < comparators.length; k++) {
bValues[k] = bNode.getValue(originalIndices[k]);
insLocal += weights[k] * comparators[k].delete(bValues[k]);
}
alignMat[0][j] = createInsInitial(insOld, j, insLocal);
}
//now start the alignment.
for (int i = 1; i <= m; i++) {
for (int j = 1; j <= n; j++) {
//buffer values first.
aNode = a.getNodes().get(i - 1);
bNode = b.getNodes().get(j - 1);
for (int k = 0; k < comparators.length; k++) {
aValues[k] = aNode.getValue(originalIndices[k]);
bValues[k] = bNode.getValue(originalIndices[k]);
}
final X delOld = alignMat[i - 1][j];
delLocal = 0;
for (int k = 0; k < comparators.length; k++) {
delLocal += weights[k] * comparators[k].delete(aValues[k]);
}
final X insOld = alignMat[i][j - 1];
insLocal = 0;
for (int k = 0; k < comparators.length; k++) {
insLocal += weights[k] * comparators[k].delete(bValues[k]);
}
final X repOld = alignMat[i - 1][j - 1];
repLocal = 0;
for (int k = 0; k < comparators.length; k++) {
repLocal += weights[k] * comparators[k].compare(aValues[k], bValues[k]);
}
//create the new entry.
alignMat[i][j] = createNewEntry(delOld, insOld, repOld,
i, j,
delLocal, insLocal, repLocal);
}
}
final R result = transformToResult(alignMat, a, b);
lastAlignmentMatrix = alignMat;
return result;
}
/**
* This method should not be called from outside!
*
* The subclass specifies the entry (0,0) of the alignment matrix with this
* method.
*
* @return the entry (0,0) of the alignment matrix.
*/
public abstract X createInitial();
/**
* This method should not be called from outside!
*
* The subclass specifies the entry (i+1,0) of the alignment matrix with
* this method given the entry (i,0).
*
* @param delOld the matrix entry (i,0)
* @param i i+1
* @param delLocal the local deletion cost, defined as the weighted sum over
* all comparator deletion costs.
*
* @return the entry (i+1,0) of the alignment matrix.
*/
public abstract X createDelInitial(X delOld, int i, double delLocal);
/**
* This method should not be called from outside!
*
* The subclass specifies the entry (0,j+1) of the alignment matrix with
* this method given the entry (0,j).
*
* @param insOld the matrix entry (0,j+1)
* @param j j+1
* @param insLocal the local insertion cost, defined as the weighted sum
* over all comparator insertion costs.
*
* @return the entry (0,j+1) of the alignment matrix.
*/
public abstract X createInsInitial(X insOld, int j, double insLocal);
/**
* This method should not be called from outside!
*
* The subclass specifies the entry (i+1,j+1) of the alignment matrix with
* this method given the entries (i,j+1) (deletion), (i+1,j) (insertion) and
* (i,j) (replacement).
*
* @param delOld the matrix entry (i,j+1)
* @param insOld the matrix entry (i+1,j)
* @param repOld the matrix entry (i,j)
* @param i i+1
* @param j j+1
* @param delLocal the local deletion cost, defined as the weighted sum over
* all comparator deletion costs.
* @param insLocal the local insertion cost, defined as the weighted sum
* over all comparator insertion costs.
* @param repLocal the local replacement cost, defined as the weighted sum
* over all comparator replacement costs.
*
* @return the entry (i,j) of the alignment matrix.
*/
public abstract X createNewEntry(X delOld, X insOld, X repOld,
int i, int j,
double delLocal, double insLocal, double repLocal);
/**
* This method should not be called from outside!
*
* The subclass uses this method to transform the alignment matrix and the
* input sequences to the actual alignment result.
*
* @param alignmentMatrix the alignment matrix.
* @param a the first sequence.
* @param b th second sequence.
*
* @return the actual alignment result.
*/
public abstract R transformToResult(X[][] alignmentMatrix, final Sequence a,
final Sequence b);
/**
* This circumvents Javas rule against generic array cration by using
* reflection.
*
* @param the type of the new array.
* @param size the size of the new array.
* @param arrClass the class of the new array elements.
*
* @return an array of the specified (generic) type and the specified size.
*/
public static X[] createGenericArray(final int size,
final Class arrClass) {
return (X[]) Array.newInstance(arrClass, size);
}
/**
* This circumvents Javas rule against generic array cration by using
* reflection.
*
* @param the type of the new array.
* @param m the number of rows of the result matrix.
* @param n the number of columns of the result matrix.
* @param entryClass the class of the generic class.
*
* @return a matrix of the specified (generic) type and the specified size.
*/
public static X[][] createGenericMatrix(final int m, final int n,
final Class entryClass) {
final Class arrClass = (Class) Array.
newInstance(entryClass, 0).getClass();
final X[][] mat = (X[][]) Array.newInstance(arrClass, m);
for (int i = 0; i < m; i++) {
mat[i] = (X[]) Array.newInstance(entryClass, n);
}
return mat;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy