
de.citec.tcs.alignment.StrictKPathAlgorithm Maven / Gradle / Ivy
/*
* TCS Alignment Toolbox
*
* Copyright (C) 2013-2015
* Benjamin Paaßen, Georg Zentgraf
* AG Theoretical Computer Science
* Centre of Excellence Cognitive Interaction Technology (CITEC)
* University of Bielefeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.tcs.alignment;
import de.citec.tcs.alignment.StrictAllOptimalAlgorithm.FullMatrixEntry;
import de.citec.tcs.alignment.comparators.OperationType;
import de.citec.tcs.alignment.sequence.Node;
import de.citec.tcs.alignment.sequence.Sequence;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Random;
import java.util.Stack;
import java.util.TreeMap;
/**
* This is an implementation of the Needleman-Wunsch-Algorithm for sequence
* alignment. It returns the k best paths. The scoring scheme is equivalent to
* the StrictAlignmentScoreAlgorithm.
*
* @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
*/
public class StrictKPathAlgorithm
extends AbstractGapAlignmentAlgorithm {
public static final int DEFAULTPATHLIMIT = 100;
private int pathLimit = DEFAULTPATHLIMIT;
/**
* This sets up an AlignmentAlgorithm instance according to the given
* specification.
*
* @param alignmentSpecification an AlignmentSpecification.
*/
public StrictKPathAlgorithm(AlignmentSpecification alignmentSpecification) {
super(alignmentSpecification, FullMatrixEntry.class, PathMap.class);
}
/**
* Please refer to the corresponding setter method for more calculation.
*
* @return a maximum number of paths that shall be calculated.
*/
public int getPathLimit() {
return pathLimit;
}
/**
*
* This sets the k that gives the StrictKPathAlgorithm its name: The number
* of returned AlignmentPaths is limited to the k best alignments. The total
* number of possible AlignmentPaths is combinatorial in the input lengths.
* Thus we need to limit it to the k best. As soon as the limit is reached
* we decide randomly between equally good paths which one is taken.
*
* The default for this is DEFAULTPATHLIMIT.
*
* @param pathLimit a maximum number of paths that shall be calculated.
*/
public void setPathLimit(int pathLimit) {
if (pathLimit < 1) {
throw new IllegalArgumentException("You have to allow at least one path to be calculated!");
}
this.pathLimit = pathLimit;
}
/**
* {@inheritDoc }
*/
@Override
public FullMatrixEntry createInitial() {
return new FullMatrixEntry(0, 0, 0, 0, 0, 0);
}
/**
* {@inheritDoc }
*/
@Override
public FullMatrixEntry createDelInitial(FullMatrixEntry delOld, int i, double delLocal) {
return new FullMatrixEntry(delOld.getScore() + delLocal, delLocal, 0, 0, i, 0);
}
/**
* {@inheritDoc }
*/
@Override
public FullMatrixEntry createInsInitial(FullMatrixEntry insOld, int j, double insLocal) {
return new FullMatrixEntry(insOld.getScore() + insLocal, 0, insLocal, 0, 0, j);
}
/**
* {@inheritDoc }
*/
@Override
public FullMatrixEntry createNewEntry(FullMatrixEntry delOld,
FullMatrixEntry insOld,
FullMatrixEntry repOld,
int i, int j,
double delLocal, double insLocal, double repLocal) {
final double delTotal = delOld.getScore() + delLocal;
final double insTotal = insOld.getScore() + insLocal;
final double repTotal = repOld.getScore() + repLocal;
if (delTotal < insTotal) {
if (delTotal < repTotal) {
return new FullMatrixEntry(delTotal, delLocal, insLocal, repLocal, i, j);
} else {
return new FullMatrixEntry(repTotal, delLocal, insLocal, repLocal, i, j);
}
} else {
if (insTotal < repTotal) {
return new FullMatrixEntry(insTotal, delLocal, insLocal, repLocal, i, j);
} else {
return new FullMatrixEntry(repTotal, delLocal, insLocal, repLocal, i, j);
}
}
}
/**
* {@inheritDoc }
*/
@Override
public PathMap transformToResult(FullMatrixEntry[][] alignmentMatrix,
Sequence a, Sequence b) {
final int M = a.getNodes().size();
final int N = b.getNodes().size();
final double normalization = (double) Math.max(1, M + N);
final AlignmentSpecification spec = getSpecification();
final PathMap pathMap = new PathMap();
final Random rand = new Random();
final FullMatrixEntry last = alignmentMatrix[M][N];
//Now backtrace to reconstruct the paths.
final IndexedStochasticPriorityQueue queue
= new IndexedStochasticPriorityQueue<>();
queue.put(last.getScore(), new TmpPath(M, N, last.getScore()));
TmpPath current;
int pathCounter = 1;
while (!queue.isEmpty()) {
current = queue.pollFirst();
//we are finished if we are at the start.
if (current.i == 0 && current.j == 0) {
//in that case we transform the temporary path to an actual one.
final AlignmentPath path = new AlignmentPath(spec,
a, b, current.getScore() / normalization);
while (!current.opStack.empty()) {
final TmpOp tmpOp = current.opStack.pop();
final Operation op = new Operation(tmpOp.left, tmpOp.right,
tmpOp.type, path);
op.setComparatorDistances(tmpOp.local);
path.getOperations().add(op);
}
pathMap.put(path);
continue;
}
//otherwise we still have work to do.
final Node left;
if (current.i > 0) {
left = a.getNodes().get(current.i - 1);
} else {
left = null;
}
final Node right;
if (current.j > 0) {
right = b.getNodes().get(current.j - 1);
} else {
right = null;
}
double[] local;
//If we are in the first column we can only delete.
if (current.j == 0) {
local = getSpecification().calculateDeletionCosts(left);
final TmpOp del = new TmpOp(left, null, local, spec,
OperationType.DELETION);
current.opStack.push(del);
current.i--;
queue.put(current.getScore(), current);
continue;
}
//if we are in the first row we can only insert.
if (current.i == 0) {
local = getSpecification().calculateInsertionCosts(right);
final TmpOp ins = new TmpOp(null, right, local, spec,
OperationType.INSERTION);
current.opStack.push(ins);
current.j--;
queue.put(current.getScore(), current);
continue;
}
//if we are inside the matrix we consider all operations
final FullMatrixEntry currentCell = alignmentMatrix[current.i][current.j];
final FullMatrixEntry delOld = alignmentMatrix[current.i - 1][current.j];
final FullMatrixEntry insOld = alignmentMatrix[current.i][current.j - 1];
final FullMatrixEntry repOld = alignmentMatrix[current.i - 1][current.j - 1];
final double scoreToEnd = current.actualScoreToEnd;
//Determine the score a path may have at worst to be considered still.
boolean useDeletion = false;
boolean useInsertion = false;
boolean useReplacement = false;
//if we have enough space to explore all possibilites, do that
if (pathCounter < pathLimit - 2) {
useDeletion = true;
useInsertion = true;
useReplacement = true;
} else {
//Otherwise we determine, which path we want to continue.
final double delScore = delOld.getScore()
+ currentCell.getDelLocal()
+ current.actualScoreToEnd;
final double insScore = insOld.getScore()
+ currentCell.getInsLocal()
+ current.actualScoreToEnd;
final double repScore = repOld.getScore()
+ currentCell.getRepLocal()
+ current.actualScoreToEnd;
/*
* determine the rank of each operation by sorting them
* according
* to their score. We shuffle the list to ensure
* random choices of operations.
*/
final ArrayList ranking = new ArrayList<>();
ranking.add(new SortEntry(OperationType.DELETION, delScore));
ranking.add(new SortEntry(OperationType.INSERTION, insScore));
ranking.add(new SortEntry(OperationType.REPLACEMENT, repScore));
Collections.shuffle(ranking);
Collections.sort(ranking);
final int lastOp = pathLimit - pathCounter + 1;
for (int o = 0; o < lastOp; o++) {
switch (ranking.get(o).opType) {
case DELETION:
useDeletion = true;
break;
case INSERTION:
useInsertion = true;
break;
case REPLACEMENT:
useReplacement = true;
break;
}
}
double worstScore;
if (!queue.isEmpty()) {
worstScore = queue.lastKey();
} else {
//If the queue is empty, our shuffling before determined
// already which path gets continued.
worstScore = -1;
}
/*
* We use all later operations only if they are better than our
* current worst score in the queue.
*/
for (int o = lastOp; o < 3; o++) {
final SortEntry opEntry = ranking.get(o);
if (opEntry.score < worstScore) {
switch (opEntry.opType) {
case DELETION:
useDeletion = true;
break;
case INSERTION:
useInsertion = true;
break;
case REPLACEMENT:
useReplacement = true;
break;
}
pathCounter--;
queue.pollLast();
//if we poll something we need to update the worst score.
if (!queue.isEmpty()) {
worstScore = queue.lastKey();
} else {
worstScore = -1;
}
} else if (opEntry.score == worstScore) {
/*
* if the score is equal to the worst one, we roll a die
* to determine which path gets continued.
*/
final ArrayList worstPaths = queue.get(opEntry.score);
final int kickIdx = rand.nextInt(worstPaths.size() + 1);
if (kickIdx < worstPaths.size()) {
pathCounter--;
switch (opEntry.opType) {
case DELETION:
useDeletion = true;
break;
case INSERTION:
useInsertion = true;
break;
case REPLACEMENT:
useReplacement = true;
break;
}
if (worstPaths.size() > 1) {
worstPaths.remove(kickIdx);
} else {
queue.pollLastEntry();
//if we poll something we need to update the worst score.
if (!queue.isEmpty()) {
worstScore = queue.lastKey();
} else {
worstScore = -1;
}
}
}
} else {
//if we are larger than the worst score, we can stop
//searching.
break;
}
}
}
//continue the backtracing with a deletion
if (useDeletion) {
local = getSpecification().calculateDeletionCosts(left);
final TmpOp del = new TmpOp(left, null, local, spec,
OperationType.DELETION);
current.continuePath(delOld.getScore(), del);
final TmpPath delPath = current;
queue.put(delPath.getScore(), delPath);
}
//continue the backtracing with an insertion
if (useInsertion) {
local = getSpecification().calculateInsertionCosts(right);
final TmpOp ins = new TmpOp(null, right, local, spec,
OperationType.INSERTION);
final TmpPath inspPath;
if (!useDeletion) {
current.continuePath(insOld.getScore(), ins);
inspPath = current;
} else {
//if we already continued the current path, we have to create
//a new TmpPath object.
inspPath = new TmpPath(insOld.getScore(), scoreToEnd,
ins, current, OperationType.DELETION);
pathCounter++;
}
queue.put(inspPath.getScore(), inspPath);
}
//continue the backtracing with a replacement
if (useReplacement) {
local = getSpecification().calculateReplacementCosts(left, right);
final TmpOp rep = new TmpOp(left, right, local, spec,
OperationType.REPLACEMENT);
final TmpPath repPath;
if (!useDeletion && !useInsertion) {
current.continuePath(repOld.getScore(), rep);
repPath = current;
} else {
//if we already continued the current path, we have to create
//a new TmpPath object.
if (useDeletion) {
repPath = new TmpPath(repOld.getScore(), scoreToEnd,
rep, current, OperationType.DELETION);
} else {
repPath = new TmpPath(repOld.getScore(), scoreToEnd,
rep, current, OperationType.INSERTION);
}
pathCounter++;
}
queue.put(repPath.getScore(), repPath);
}
}
return pathMap;
}
private static class TmpPath {
private Stack opStack = new Stack<>();
private int i;
private int j;
private double optimalScoreToBegin;
private double actualScoreToEnd;
/**
* This is for initialization.
*
* @param M
* @param N
* @param optimalScore
*/
public TmpPath(int M, int N, double optimalScore) {
this.i = M;
this.j = N;
this.optimalScoreToBegin = optimalScore;
}
/**
* This is for continuation.
*
* @param i
* @param j
* @param optimalScoreToBegin
* @param actualScoreToEnd
* @param local
* @param toEnd
* @param before
*/
public TmpPath(double optimalScoreToBegin,
double actualScoreToEnd,
TmpOp local,
TmpPath toEnd, OperationType before) {
int iOffset = 0;
int jOffset = 0;
switch (before) {
case DELETION:
case SKIPDELETION:
iOffset++;
break;
case INSERTION:
case SKIPINSERTION:
jOffset++;
break;
case REPLACEMENT:
iOffset++;
jOffset++;
break;
}
switch (local.type) {
case DELETION:
case SKIPDELETION:
iOffset--;
break;
case INSERTION:
case SKIPINSERTION:
jOffset--;
break;
case REPLACEMENT:
iOffset--;
jOffset--;
break;
}
this.i = toEnd.i + iOffset;
this.j = toEnd.j + jOffset;
this.optimalScoreToBegin = optimalScoreToBegin;
this.actualScoreToEnd = actualScoreToEnd + local.getWeightedLocalCost();
for (final TmpOp op : toEnd.opStack) {
opStack.push(op);
}
opStack.pop();
opStack.push(local);
}
public void continuePath(double newOptimalScoreToBegin, TmpOp newOp) {
switch (newOp.type) {
case DELETION:
case SKIPDELETION:
this.i--;
break;
case INSERTION:
case SKIPINSERTION:
this.j--;
break;
case REPLACEMENT:
this.i--;
this.j--;
break;
}
this.optimalScoreToBegin = newOptimalScoreToBegin;
this.opStack.add(newOp);
this.actualScoreToEnd = actualScoreToEnd
+ newOp.getWeightedLocalCost();
}
public double getScore() {
return optimalScoreToBegin + actualScoreToEnd;
}
}
private static class TmpOp {
private final Node left;
private final Node right;
private final double[] local;
private final AlignmentSpecification spec;
private final OperationType type;
public TmpOp(Node left, Node right, double[] local,
AlignmentSpecification spec, OperationType type) {
this.left = left;
this.right = right;
this.local = local;
this.spec = spec;
this.type = type;
}
public double getWeightedLocalCost() {
final double[] weighting = spec.getWeighting();
double out = 0;
for (int k = 0; k < spec.size(); k++) {
out += weighting[k] * local[k];
}
return out;
}
}
private static class SortEntry implements Comparable {
private final OperationType opType;
private final double score;
public SortEntry(OperationType opType, double score) {
this.opType = opType;
this.score = score;
}
@Override
public int compareTo(SortEntry t) {
return Double.compare(score, t.score);
}
}
/**
* This is a helper class, which actually extends the TreeMap to implement a
* PriorityQueue that is able to poll the minimum as well as the maximum in
* constanst time.
*
* @author Benjamin Paassen - [email protected]
* @param The Key Class
* @param The Value Class
*/
public static class IndexedStochasticPriorityQueue, V>
extends TreeMap> {
private final Random rand = new Random();
public IndexedStochasticPriorityQueue() {
}
public void put(final K key, final V value) {
ArrayList list = super.get(key);
if (list == null) {
list = new ArrayList<>();
super.put(key, list);
}
list.add(value);
}
/**
* Polls the value associated with the minimum key. If more than one
* element
* is associated with the minimum key, it returns a random element from
* that
* list.
*
* @return the value associated with the minimum key.
*/
public V pollFirst() {
if (isEmpty()) {
return null;
}
final ArrayList list = super.firstEntry().getValue();
if (list.size() > 1) {
final int pollIdx = rand.nextInt(list.size());
return list.remove(pollIdx);
} else {
return super.pollFirstEntry().getValue().get(0);
}
}
/**
* Polls the value associated with the maximum key. If more than one
* element
* is associated with the minimum key, it returns a random element from
* that
* list.
*
* @return the value associated with the maximum key.
*/
public V pollLast() {
if (isEmpty()) {
return null;
}
final ArrayList list = super.lastEntry().getValue();
if (list.size() > 1) {
final int pollIdx = rand.nextInt(list.size());
return list.remove(pollIdx);
} else {
return super.pollLastEntry().getValue().get(0);
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy