All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.citec.tcs.alignment.AbstractGapAlignmentAlgorithm Maven / Gradle / Ivy

/* 
 * TCS Alignment Toolbox
 * 
 * Copyright (C) 2013-2015
 * Benjamin Paaßen, Georg Zentgraf
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment;

import de.citec.tcs.alignment.comparators.GapComparator;
import de.citec.tcs.alignment.sequence.Node;
import de.citec.tcs.alignment.sequence.Sequence;
import de.citec.tcs.alignment.sequence.Value;
import java.lang.reflect.Array;
import java.util.ArrayList;

/**
 * This is an abstract super class for GapAlignmentAlgorithms that provides a
 * generic implementation of the dynamic programming needed to efficiently
 * calculate the alignment.
 *
 * This alignment supports only one matrix recurrence and thus implicitly
 * supports Levenshtein-esque simple edit distances that are calculated only
 * using the DP-matrix cells in the immediate neighbourhood as well as the local
 * deletion, insertion and replacement costs.
 *
 * The according DP-algorithm is known by many names, e.g.:
 * 
  • Needleman-Wunsch-Algorithm (which is slightly more general)
  • *
  • Wagner-Fischer-Algorithm
* * See also: http://en.wikipedia.org/wiki/Edit_distance * * @author Benjamin Paassen - [email protected] * @param The class for entries of the Dynamic Programming Matrix. * @param The result class. */ public abstract class AbstractGapAlignmentAlgorithm implements GapAlignmentAlgorithm { private final AlignmentSpecification alignmentSpecification; private final Class entryClass; private final Class resultClass; private X[][] lastAlignmentMatrix; private double weightThreshold = 0; public AbstractGapAlignmentAlgorithm(AlignmentSpecification alignmentSpecification, Class entryClass, Class resultClass) { this.alignmentSpecification = alignmentSpecification; this.entryClass = entryClass; this.resultClass = resultClass; } /** * Returns the class of matrix entries. * * @return the class of matrix entries. */ public Class getEntryClass() { return entryClass; } /** * {@inheritDoc } */ @Override public Class getResultClass() { return resultClass; } /** * The last matrix that was calculated using this algorithm. * * @return last matrix that was calculated using this algorithm. */ public X[][] getLastAlignmentMatrix() { return lastAlignmentMatrix; } /** * Set a weight threshold (between 0 and 1) that determines which keywords * should be ignored during calculation because their weight is negligible. * * The default value is 0. * * @param weightThreshold a weight threshold (between 0 and 1) */ public void setWeightThreshold(double weightThreshold) { if (weightThreshold < 0 || weightThreshold > 1) { throw new RuntimeException("A weight threshold has to be between 0 and 1!"); } this.weightThreshold = weightThreshold; } /** * * @return The current weight threshold (0 per default). */ public double getWeightThreshold() { return weightThreshold; } /** * {@inheritDoc } */ public AlignmentSpecification getSpecification() { return alignmentSpecification; } /** * {@inheritDoc } */ @Override public R calculateAlignment(final Sequence a, final Sequence b) { //check validity if (a.getNodeSpecification() != alignmentSpecification.getNodeSpecification() && !a.getNodeSpecification().equals(alignmentSpecification.getNodeSpecification())) { throw new IllegalArgumentException( "The first input sequence has an unexpected node specification!"); } if (a.getNodeSpecification() != b.getNodeSpecification() && !a.getNodeSpecification().equals(b.getNodeSpecification())) { throw new IllegalArgumentException( "The node specifications of both input sequences to not match!"); } //check validity of comparators. for (int k = 0; k < alignmentSpecification.size(); k++) { if (!(alignmentSpecification.getComparator(k) instanceof GapComparator)) { throw new UnsupportedOperationException("The comparator for keyword " + alignmentSpecification.getKeyword(k) + " does not support gaps!"); } } //identify the subset of comparators that have an above threshold weighting. final ArrayList relevantIndices = new ArrayList(); for (int k = 0; k < alignmentSpecification.size(); k++) { if (alignmentSpecification.getWeighting()[k] > weightThreshold) { relevantIndices.add(k); } } final GapComparator[] comparators = new GapComparator[relevantIndices.size()]; final double[] weights = new double[relevantIndices.size()]; final int[] originalIndices = new int[relevantIndices.size()]; for (int k = 0; k < comparators.length; k++) { comparators[k] = (GapComparator) alignmentSpecification.getComparator(relevantIndices.get(k)); weights[k] = alignmentSpecification.getWeighting()[relevantIndices.get(k)]; originalIndices[k] = alignmentSpecification.getOriginalIndex(relevantIndices.get(k)); } final int m = a.getNodes().size(); final int n = b.getNodes().size(); double delLocal, insLocal, repLocal; final Value[] aValues = new Value[comparators.length]; final Value[] bValues = new Value[comparators.length]; Node aNode, bNode; //initialize the alignment matrix. final X[][] alignMat = createGenericMatrix(m + 1, n + 1, entryClass); //initialize first entry. alignMat[0][0] = createInitial(); //initialize first column, which means the deletion of the entire sequence a. for (int i = 1; i <= m; i++) { final X delOld = alignMat[i - 1][0]; delLocal = 0; aNode = a.getNodes().get(i - 1); for (int k = 0; k < comparators.length; k++) { aValues[k] = aNode.getValue(originalIndices[k]); delLocal += weights[k] * comparators[k].delete(aValues[k]); } alignMat[i][0] = createDelInitial(delOld, i, delLocal); } //initialize the first row, which means the insertion of the entire sequence b. for (int j = 1; j <= n; j++) { final X insOld = alignMat[0][j - 1]; insLocal = 0; bNode = b.getNodes().get(j - 1); for (int k = 0; k < comparators.length; k++) { bValues[k] = bNode.getValue(originalIndices[k]); insLocal += weights[k] * comparators[k].delete(bValues[k]); } alignMat[0][j] = createInsInitial(insOld, j, insLocal); } //now start the alignment. for (int i = 1; i <= m; i++) { for (int j = 1; j <= n; j++) { //buffer values first. aNode = a.getNodes().get(i - 1); bNode = b.getNodes().get(j - 1); for (int k = 0; k < comparators.length; k++) { aValues[k] = aNode.getValue(originalIndices[k]); bValues[k] = bNode.getValue(originalIndices[k]); } final X delOld = alignMat[i - 1][j]; delLocal = 0; for (int k = 0; k < comparators.length; k++) { delLocal += weights[k] * comparators[k].delete(aValues[k]); } final X insOld = alignMat[i][j - 1]; insLocal = 0; for (int k = 0; k < comparators.length; k++) { insLocal += weights[k] * comparators[k].delete(bValues[k]); } final X repOld = alignMat[i - 1][j - 1]; repLocal = 0; for (int k = 0; k < comparators.length; k++) { repLocal += weights[k] * comparators[k].compare(aValues[k], bValues[k]); } //create the new entry. alignMat[i][j] = createNewEntry(delOld, insOld, repOld, i, j, delLocal, insLocal, repLocal); } } final R result = transformToResult(alignMat, a, b); lastAlignmentMatrix = alignMat; return result; } /** * This method should not be called from outside! * * The subclass specifies the entry (0,0) of the alignment matrix with this * method. * * @return the entry (0,0) of the alignment matrix. */ public abstract X createInitial(); /** * This method should not be called from outside! * * The subclass specifies the entry (i+1,0) of the alignment matrix with * this method given the entry (i,0). * * @param delOld the matrix entry (i,0) * @param i i+1 * @param delLocal the local deletion cost, defined as the weighted sum over * all comparator deletion costs. * * @return the entry (i+1,0) of the alignment matrix. */ public abstract X createDelInitial(X delOld, int i, double delLocal); /** * This method should not be called from outside! * * The subclass specifies the entry (0,j+1) of the alignment matrix with * this method given the entry (0,j). * * @param insOld the matrix entry (0,j+1) * @param j j+1 * @param insLocal the local insertion cost, defined as the weighted sum * over all comparator insertion costs. * * @return the entry (0,j+1) of the alignment matrix. */ public abstract X createInsInitial(X insOld, int j, double insLocal); /** * This method should not be called from outside! * * The subclass specifies the entry (i+1,j+1) of the alignment matrix with * this method given the entries (i,j+1) (deletion), (i+1,j) (insertion) and * (i,j) (replacement). * * @param delOld the matrix entry (i,j+1) * @param insOld the matrix entry (i+1,j) * @param repOld the matrix entry (i,j) * @param i i+1 * @param j j+1 * @param delLocal the local deletion cost, defined as the weighted sum over * all comparator deletion costs. * @param insLocal the local insertion cost, defined as the weighted sum * over all comparator insertion costs. * @param repLocal the local replacement cost, defined as the weighted sum * over all comparator replacement costs. * * @return the entry (i,j) of the alignment matrix. */ public abstract X createNewEntry(X delOld, X insOld, X repOld, int i, int j, double delLocal, double insLocal, double repLocal); /** * This method should not be called from outside! * * The subclass uses this method to transform the alignment matrix and the * input sequences to the actual alignment result. * * @param alignmentMatrix the alignment matrix. * @param a the first sequence. * @param b th second sequence. * * @return the actual alignment result. */ public abstract R transformToResult(X[][] alignmentMatrix, final Sequence a, final Sequence b); /** * This circumvents Javas rule against generic array cration by using * reflection. * * @param the type of the new array. * @param size the size of the new array. * @param arrClass the class of the new array elements. * * @return an array of the specified (generic) type and the specified size. */ public static X[] createGenericArray(final int size, final Class arrClass) { return (X[]) Array.newInstance(arrClass, size); } /** * This circumvents Javas rule against generic array cration by using * reflection. * * @param the type of the new array. * @param m the number of rows of the result matrix. * @param n the number of columns of the result matrix. * @param entryClass the class of the generic class. * * @return a matrix of the specified (generic) type and the specified size. */ public static X[][] createGenericMatrix(final int m, final int n, final Class entryClass) { final Class arrClass = (Class) Array. newInstance(entryClass, 0).getClass(); final X[][] mat = (X[][]) Array.newInstance(arrClass, m); for (int i = 0; i < m; i++) { mat[i] = (X[]) Array.newInstance(entryClass, n); } return mat; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy