
de.citec.tcs.alignment.SoftAlignmentSamplingAlgorithm Maven / Gradle / Ivy
/*
* TCS Alignment Toolbox
*
* Copyright (C) 2013-2015
* Benjamin Paaßen, Georg Zentgraf
* AG Theoretical Computer Science
* Centre of Excellence Cognitive Interaction Technology (CITEC)
* University of Bielefeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.tcs.alignment;
import de.citec.tcs.alignment.SoftPathModel.SoftMatrixEntry;
import de.citec.tcs.alignment.comparators.OperationType;
import de.citec.tcs.alignment.sequence.Node;
import de.citec.tcs.alignment.sequence.Sequence;
import java.util.Random;
import java.util.Stack;
/**
* This calculates the soft (global) alignment of two sequences similarly to the
* Needleman-Wunsch-Algorithm. The whole trick of this
* SoftAlignmentScoreAlgorithm-Algorithm is to replace the strict minimum in the
* cost function by a softmin approximation. Thereby we give the alignment a
* "probabilistic touch": Even suboptimal alignments are considered by the
* softmin approach, but less than the optimal alignment. More information about
* softmin can be found in the Softmin class.
*
* This algorithm returns a PathList containing a sampling from the probability
* distribution defined implicitly by the softmin function: We choose an
* operation according to its softmin probability.
*
* @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
*/
public class SoftAlignmentSamplingAlgorithm
extends AbstractGapAlignmentAlgorithm {
public static final double DEFAULTBETA = 1;
private double beta = DEFAULTBETA;
public static final int DEFAULTPATHLIMIT = 100;
private int pathLimit = 100;
private double approxThreshold = Softmin.DEFAULTAPPROXTHRESHOLD;
/**
* This sets up an AlignmentAlgorithm instance according to the given
* specification.
*
* @param alignmentSpecification an AlignmentSpecification.
*/
public SoftAlignmentSamplingAlgorithm(AlignmentSpecification alignmentSpecification) {
super(alignmentSpecification, SoftMatrixEntry.class, PathList.class);
}
/**
*
* @param beta The parameter defining the "softness" of the alignment. For
* beta towards infinity this alignment becomes closer to the strict
* alignment. For beta = 0 all possible alignments are equally considered
* and softmin returns the average. Please note that a low beta value might
* lead to a very rough approximation and that for higher sequence lengths
* beta has to be higher, too.
*/
public void setBeta(double beta) {
this.beta = beta;
}
/**
*
* @return The parameter defining the "softness" of the alignment. For beta
* towards infinity this alignment becomes closer to the strict alignment.
* For beta = 0 all possible alignments are equally considered and softmin
* returns the average. Please note that a low beta value might lead to a
* very rough approximation and that for higher sequence lengths beta has to
* be higher, too.
*/
public double getBeta() {
return beta;
}
/**
* Returns the the number of paths that shall be sampled.
*
* @return the number of paths that shall be sampled.
*/
public int getPathLimit() {
return pathLimit;
}
/**
*
* This sets the number of paths that shall be sampled.
*
* The default for this is DEFAULTPATHLIMIT.
*
* @param pathLimit the number of paths that shall be sampled.
*/
public void setPathLimit(int pathLimit) {
if (pathLimit < 1) {
throw new IllegalArgumentException("You have to allow at least one path to be calculated!");
}
this.pathLimit = pathLimit;
}
/**
* Sets the threshold for which exponents within the softmin function will
* be disregarded.
*
* This approximation is valid because we are bound to have one softmin
* weight that is equal to 1 (the actual minimum). If another weight gets
* very small it will be dominated by the actual minimum. Thus the weight
* can be approximated by 0.
*
* To save some Math.exp-calculations the threshold is given as exponent
* rather than weight. The transformation between both is easy though:
*
* weight_threshold = Math.exp(-approxThreshold)
*
* which means
*
* approxThreshold = -log(weight_threshold)
*
* @param approxThreshold the threshold for which exponents within the
* softmin function will be disregarded.
*/
public void setApproxThreshold(double approxThreshold) {
this.approxThreshold = approxThreshold;
}
/**
* Returns the threshold for which exponents within the softmin function
* will be disregarded.
*
* This approximation is valid because we are bound to have one softmin
* weight that is equal to 1 (the actual minimum). If another weight gets
* very small it will be dominated by the actual minimum. Thus the weight
* can be approximated by 0.
*
* To save some Math.exp-calculations the threshold is given as exponent
* rather than weight. The transformation between both is easy though:
*
* weight_threshold = Math.exp(-approxThreshold)
*
* which means
*
* approxThreshold = -log(weight_threshold)
*
* @return the threshold for which exponents within the softmin function
* will be disregarded.
*/
public double getApproxThreshold() {
return approxThreshold;
}
/**
* {@inheritDoc }
*/
@Override
public SoftMatrixEntry createInitial() {
return new SoftMatrixEntry();
}
/**
* {@inheritDoc }
*/
@Override
public SoftMatrixEntry createDelInitial(SoftMatrixEntry delOld, int i, double delLocal) {
final SoftMatrixEntry delEntry = new SoftMatrixEntry();
delEntry.setDelLocal(delLocal);
delEntry.setDelProb(1);
delEntry.setSoftmin(delOld.getSoftmin() + delLocal);
return delEntry;
}
/**
* {@inheritDoc }
*/
@Override
public SoftMatrixEntry createInsInitial(SoftMatrixEntry insOld, int j, double insLocal) {
final SoftMatrixEntry insEntry = new SoftMatrixEntry();
insEntry.setInsLocal(insLocal);
insEntry.setInsProb(1);
insEntry.setSoftmin(insOld.getSoftmin() + insLocal);
return insEntry;
}
/**
* {@inheritDoc }
*/
@Override
public SoftMatrixEntry createNewEntry(
SoftMatrixEntry delOld,
SoftMatrixEntry insOld,
SoftMatrixEntry repOld,
int i, int j,
double delLocal, double insLocal, double repLocal) {
/*
* We calculate softmin here and store details. Please refer to the
* softmin implementation in SoftAlignmentScoreAlgorithm for details.
*/
final double[] args = new double[3];
args[0] = delOld.getSoftmin() + delLocal;
args[1] = insOld.getSoftmin() + insLocal;
args[2] = repOld.getSoftmin() + repLocal;
double min = args[0];
int minIdx = 0;
for (int o = 1; o < args.length; o++) {
if (args[o] < min) {
min = args[o];
minIdx = o;
}
}
final double[] weights = new double[3];
double normalization = 0;
double weightedSum = 0;
double exponent;
for (int o = 0; o < args.length; o++) {
if (o == minIdx) {
//if this is the minimum then the result is clear.
weights[o] = 1;
normalization += 1;
weightedSum += args[o];
} else {
exponent = beta * (args[o] - min);
/*
* as we are garantueed to have one weight = 1 as baseline we
* can approximate very small weights with zero as they will be
* dominated by the minimum. This should smooth out some
* numerical issues.
*/
if (exponent < approxThreshold) {
weights[o] = Math.exp(-exponent);
normalization += weights[o];
weightedSum += weights[o] * args[o];
}
}
}
final SoftMatrixEntry newEntry = new SoftMatrixEntry();
newEntry.setDelLocal(delLocal);
newEntry.setDelProb(weights[0] / normalization);
newEntry.setInsLocal(insLocal);
newEntry.setInsProb(weights[1] / normalization);
newEntry.setRepLocal(repLocal);
newEntry.setRepProb(weights[2] / normalization);
newEntry.setSoftmin(weightedSum / normalization);
return newEntry;
}
/**
* {@inheritDoc }
*/
@Override
public PathList transformToResult(
SoftMatrixEntry[][] alignmentMatrix,
Sequence a, Sequence b) {
final int M = a.getNodes().size();
final int N = b.getNodes().size();
final double normalization = (double) (M + N);
final AlignmentSpecification spec = getSpecification();
final PathList out = new PathList();
final Random rand = new Random();
final double[] weighting = spec.getWeighting();
//now sample k paths.
for (int l = 0; l < pathLimit; l++) {
int i = M;
int j = N;
final Stack opStack = new Stack<>();
double score = 0;
while (i > 0 || j > 0) {
final SoftMatrixEntry current = alignmentMatrix[i][j];
Node left = null;
Node right = null;
final double r = rand.nextDouble();
final OperationType type;
/*
* Choose the operation randomly but according to the
* probabilities calculated before.
*/
final double delProb = current.getDelProb();
final double insProb = current.getInsProb();
final double[] compCosts;
if (r < delProb) {
type = OperationType.DELETION;
left = a.getNodes().get(i - 1);
if (current.getDelCosts() == null) {
compCosts = spec.calculateDeletionCosts(left);
current.setDelCosts(compCosts);
} else {
compCosts = current.getDelCosts();
}
i--;
} else if (r > delProb && r < delProb + insProb) {
type = OperationType.INSERTION;
right = b.getNodes().get(j - 1);
if (current.getInsCosts() == null) {
compCosts = spec.calculateInsertionCosts(right);
current.setInsCosts(compCosts);
} else {
compCosts = current.getInsCosts();
}
j--;
} else {
type = OperationType.REPLACEMENT;
left = a.getNodes().get(i - 1);
right = b.getNodes().get(j - 1);
if (current.getRepCosts() == null) {
compCosts = spec.calculateReplacementCosts(left, right);
current.setRepCosts(compCosts);
} else {
compCosts = current.getRepCosts();
}
i--;
j--;
}
final TmpOp newOp = new TmpOp(left, right, compCosts, spec, type);
score += newOp.getWeightedLocalCost();
opStack.push(newOp);
}
/*
* If we have a path, transform it to a proper object.
*/
final AlignmentPath path = new AlignmentPath(spec,
a, b, score / normalization);
while (!opStack.empty()) {
final TmpOp tmpOp = opStack.pop();
final Operation op = new Operation(tmpOp.left, tmpOp.right,
tmpOp.type, path);
op.setComparatorDistances(tmpOp.compDists);
path.getOperations().add(op);
}
out.add(path);
}
return out;
}
private static class TmpOp {
private final Node left;
private final Node right;
private final double[] compDists;
private final AlignmentSpecification spec;
private final OperationType type;
public TmpOp(Node left, Node right, double[] local,
AlignmentSpecification spec, OperationType type) {
this.left = left;
this.right = right;
this.compDists = local;
this.spec = spec;
this.type = type;
}
public double getWeightedLocalCost() {
final double[] weighting = spec.getWeighting();
double out = 0;
for (int k = 0; k < spec.size(); k++) {
out += weighting[k] * compDists[k];
}
return out;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy