
de.citec.tcs.alignment.SoftAlignmentFullAlgorithm Maven / Gradle / Ivy
/*
* TCS Alignment Toolbox
*
* Copyright (C) 2013-2015
* Benjamin Paaßen, Georg Zentgraf
* AG Theoretical Computer Science
* Centre of Excellence Cognitive Interaction Technology (CITEC)
* University of Bielefeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.tcs.alignment;
import de.citec.tcs.alignment.sequence.Sequence;
/**
* This calculates the soft (global) alignment of two sequences similarly to the
* Needleman-Wunsch-Algorithm. The whole trick of this
* SoftAlignmentScoreAlgorithm-Algorithm is to replace the strict minimum in the
* cost function by a softmin approximation. Thereby we give the alignment a
* "probabilistic touch": Even suboptimal alignments are considered by the
* softmin approach, but less than the optimal alignment. More information about
* softmin can be found in the Softmin class.
*
* The result of this algorithm is the full dynamic programming matrix that
* allows for calculating the soft derivative.
*
* @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
*/
public class SoftAlignmentFullAlgorithm
extends AbstractGapAlignmentAlgorithm {
public static final double DEFAULTBETA = 1;
private double beta = DEFAULTBETA;
private double approxThreshold = Softmin.DEFAULTAPPROXTHRESHOLD;
/**
* This sets up an AlignmentAlgorithm instance according to the given
* specification.
*
* @param alignmentSpecification an AlignmentSpecification.
*/
public SoftAlignmentFullAlgorithm(AlignmentSpecification alignmentSpecification) {
super(alignmentSpecification,
SoftPathModel.SoftMatrixEntry.class,
SoftPathModel.class);
}
/**
* Sets the threshold for which exponents within the softmin function will
* be disregarded.
*
* This approximation is valid because we are bound to have one softmin
* weight that is equal to 1 (the actual minimum). If another weight gets
* very small it will be dominated by the actual minimum. Thus the weight
* can be approximated by 0.
*
* To save some Math.exp-calculations the threshold is given as exponent
* rather than weight. The transformation between both is easy though:
*
* weight_threshold = Math.exp(-approxThreshold)
*
* which means
*
* approxThreshold = -log(weight_threshold)
*
* @param approxThreshold the threshold for which exponents within the
* softmin function will be disregarded.
*/
public void setApproxThreshold(double approxThreshold) {
this.approxThreshold = approxThreshold;
}
/**
* Returns the threshold for which exponents within the softmin function
* will be disregarded.
*
* This approximation is valid because we are bound to have one softmin
* weight that is equal to 1 (the actual minimum). If another weight gets
* very small it will be dominated by the actual minimum. Thus the weight
* can be approximated by 0.
*
* To save some Math.exp-calculations the threshold is given as exponent
* rather than weight. The transformation between both is easy though:
*
* weight_threshold = Math.exp(-approxThreshold)
*
* which means
*
* approxThreshold = -log(weight_threshold)
*
* @return the threshold for which exponents within the softmin function
* will be disregarded.
*/
public double getApproxThreshold() {
return approxThreshold;
}
/**
*
* @param beta The parameter defining the "softness" of the alignment. For
* beta towards infinity this alignment becomes closer to the strict
* alignment. For beta = 0 all possible alignments are equally considered
* and softmin returns the average. Please note that a low beta value might
* lead to a very rough approximation and that for higher sequence lengths
* beta has to be higher, too.
*/
public void setBeta(double beta) {
this.beta = beta;
}
/**
*
* @return The parameter defining the "softness" of the alignment. For beta
* towards infinity this alignment becomes closer to the strict alignment.
* For beta = 0 all possible alignments are equally considered and softmin
* returns the average. Please note that a low beta value might lead to a
* very rough approximation and that for higher sequence lengths beta has to
* be higher, too.
*/
public double getBeta() {
return beta;
}
/**
* {@inheritDoc }
*/
@Override
public SoftPathModel.SoftMatrixEntry createInitial() {
return new SoftPathModel.SoftMatrixEntry();
}
/**
* {@inheritDoc }
*/
@Override
public SoftPathModel.SoftMatrixEntry createDelInitial(SoftPathModel.SoftMatrixEntry delOld, int i, double delLocal) {
final SoftPathModel.SoftMatrixEntry delEntry = new SoftPathModel.SoftMatrixEntry();
delEntry.setDelLocal(delLocal);
delEntry.setDelProb(1);
delEntry.setSoftmin(delOld.getSoftmin() + delLocal);
return delEntry;
}
/**
* {@inheritDoc }
*/
@Override
public SoftPathModel.SoftMatrixEntry createInsInitial(SoftPathModel.SoftMatrixEntry insOld, int j, double insLocal) {
final SoftPathModel.SoftMatrixEntry insEntry = new SoftPathModel.SoftMatrixEntry();
insEntry.setInsLocal(insLocal);
insEntry.setInsProb(1);
insEntry.setSoftmin(insOld.getSoftmin() + insLocal);
return insEntry;
}
/**
* {@inheritDoc }
*/
@Override
public SoftPathModel.SoftMatrixEntry createNewEntry(
SoftPathModel.SoftMatrixEntry delOld,
SoftPathModel.SoftMatrixEntry insOld,
SoftPathModel.SoftMatrixEntry repOld,
int i, int j,
double delLocal, double insLocal, double repLocal) {
/*
* We calculate softmin here and store details. Please refer to the
* softmin implementation in SoftAlignmentScoreAlgorithm for details.
*/
final double[] args = new double[3];
args[0] = delOld.getSoftmin() + delLocal;
args[1] = insOld.getSoftmin() + insLocal;
args[2] = repOld.getSoftmin() + repLocal;
double min = args[0];
int minIdx = 0;
for (int o = 1; o < args.length; o++) {
if (args[o] < min) {
min = args[o];
minIdx = o;
}
}
final double[] weights = new double[3];
double normalization = 0;
double weightedSum = 0;
double exponent;
for (int o = 0; o < args.length; o++) {
if (o == minIdx) {
//if this is the minimum then the result is clear.
weights[o] = 1;
normalization += 1;
weightedSum += args[o];
} else {
exponent = beta * (args[o] - min);
/*
* as we are garantueed to have one weight = 1 as baseline we
* can approximate very small weights with zero as they will be
* dominated by the minimum. This should smooth out some
* numerical issues.
*/
if (exponent < approxThreshold) {
weights[o] = Math.exp(-exponent);
normalization += weights[o];
weightedSum += weights[o] * args[o];
}
}
}
final SoftPathModel.SoftMatrixEntry newEntry = new SoftPathModel.SoftMatrixEntry();
newEntry.setDelLocal(delLocal);
newEntry.setDelProb(weights[0] / normalization);
newEntry.setInsLocal(insLocal);
newEntry.setInsProb(weights[1] / normalization);
newEntry.setRepLocal(repLocal);
newEntry.setRepProb(weights[2] / normalization);
newEntry.setSoftmin(weightedSum / normalization);
return newEntry;
}
/**
* {@inheritDoc }
*/
@Override
public SoftPathModel transformToResult(SoftPathModel.SoftMatrixEntry[][] alignmentMatrix, Sequence a, Sequence b) {
final double rawScore = alignmentMatrix[a.getNodes().size()][b.getNodes().size()].
getSoftmin();
/*
* In each step of the alignment we had a distance of 1 at most. Thus we
* can normalize the score to the range [0,1] if we just divide the raw
* score by the worst case, which would be to delete a entirely and
* insert b entirely => worst case score = m+n.
*/
final double finalScore = rawScore / ((double) (a.getNodes().size() + b.getNodes().size()));
return new SoftPathModel(getSpecification(), a, b, finalScore, beta, alignmentMatrix);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy