de.citec.tcs.alignment.SoftAlignmentFullAlgorithm Maven / Gradle / Ivy

Go to download
/* 
 * TCS Alignment Toolbox
 * 
 * Copyright (C) 2013-2015
 * Benjamin Paaßen, Georg Zentgraf
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment;

import de.citec.tcs.alignment.sequence.Sequence;

/**
 * This calculates the soft (global) alignment of two sequences similarly to the
 * Needleman-Wunsch-Algorithm. The whole trick of this
 * SoftAlignmentScoreAlgorithm-Algorithm is to replace the strict minimum in the
 * cost function by a softmin approximation. Thereby we give the alignment a
 * "probabilistic touch": Even suboptimal alignments are considered by the
 * softmin approach, but less than the optimal alignment. More information about
 * softmin can be found in the Softmin class.
 *
 * The result of this algorithm is the full dynamic programming matrix that
 * allows for calculating the soft derivative.
 *
 * @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
 */
public class SoftAlignmentFullAlgorithm
        extends AbstractGapAlignmentAlgorithm {

    public static final double DEFAULTBETA = 1;
    private double beta = DEFAULTBETA;
    private double approxThreshold = Softmin.DEFAULTAPPROXTHRESHOLD;

    /**
     * This sets up an AlignmentAlgorithm instance according to the given
     * specification.
     *
     * @param alignmentSpecification an AlignmentSpecification.
     */
    public SoftAlignmentFullAlgorithm(AlignmentSpecification alignmentSpecification) {
        super(alignmentSpecification,
                SoftPathModel.SoftMatrixEntry.class,
                SoftPathModel.class);
    }

    /**
     * Sets the threshold for which exponents within the softmin function will
     * be disregarded.
     *
     * This approximation is valid because we are bound to have one softmin
     * weight that is equal to 1 (the actual minimum). If another weight gets
     * very small it will be dominated by the actual minimum. Thus the weight
     * can be approximated by 0.
     *
     * To save some Math.exp-calculations the threshold is given as exponent
     * rather than weight. The transformation between both is easy though:
     *
     * weight_threshold = Math.exp(-approxThreshold)
     *
     * which means
     *
     * approxThreshold = -log(weight_threshold)
     *
     * @param approxThreshold the threshold for which exponents within the
     * softmin function will be disregarded.
     */
    public void setApproxThreshold(double approxThreshold) {
        this.approxThreshold = approxThreshold;
    }

    /**
     * Returns the threshold for which exponents within the softmin function
     * will be disregarded.
     *
     * This approximation is valid because we are bound to have one softmin
     * weight that is equal to 1 (the actual minimum). If another weight gets
     * very small it will be dominated by the actual minimum. Thus the weight
     * can be approximated by 0.
     *
     * To save some Math.exp-calculations the threshold is given as exponent
     * rather than weight. The transformation between both is easy though:
     *
     * weight_threshold = Math.exp(-approxThreshold)
     *
     * which means
     *
     * approxThreshold = -log(weight_threshold)
     *
     * @return the threshold for which exponents within the softmin function
     * will be disregarded.
     */
    public double getApproxThreshold() {
        return approxThreshold;
    }

    /**
     *
     * @param beta The parameter defining the "softness" of the alignment. For
     * beta towards infinity this alignment becomes closer to the strict
     * alignment. For beta = 0 all possible alignments are equally considered
     * and softmin returns the average. Please note that a low beta value might
     * lead to a very rough approximation and that for higher sequence lengths
     * beta has to be higher, too.
     */
    public void setBeta(double beta) {
        this.beta = beta;
    }

    /**
     *
     * @return The parameter defining the "softness" of the alignment. For beta
     * towards infinity this alignment becomes closer to the strict alignment.
     * For beta = 0 all possible alignments are equally considered and softmin
     * returns the average. Please note that a low beta value might lead to a
     * very rough approximation and that for higher sequence lengths beta has to
     * be higher, too.
     */
    public double getBeta() {
        return beta;
    }

    /**
     * {@inheritDoc }
     */
    @Override
    public SoftPathModel.SoftMatrixEntry createInitial() {
        return new SoftPathModel.SoftMatrixEntry();
    }

    /**
     * {@inheritDoc }
     */
    @Override
    public SoftPathModel.SoftMatrixEntry createDelInitial(SoftPathModel.SoftMatrixEntry delOld, int i, double delLocal) {
        final SoftPathModel.SoftMatrixEntry delEntry = new SoftPathModel.SoftMatrixEntry();
        delEntry.setDelLocal(delLocal);
        delEntry.setDelProb(1);
        delEntry.setSoftmin(delOld.getSoftmin() + delLocal);
        return delEntry;
    }

    /**
     * {@inheritDoc }
     */
    @Override
    public SoftPathModel.SoftMatrixEntry createInsInitial(SoftPathModel.SoftMatrixEntry insOld, int j, double insLocal) {
        final SoftPathModel.SoftMatrixEntry insEntry = new SoftPathModel.SoftMatrixEntry();
        insEntry.setInsLocal(insLocal);
        insEntry.setInsProb(1);
        insEntry.setSoftmin(insOld.getSoftmin() + insLocal);
        return insEntry;
    }

    /**
     * {@inheritDoc }
     */
    @Override
    public SoftPathModel.SoftMatrixEntry createNewEntry(
            SoftPathModel.SoftMatrixEntry delOld,
            SoftPathModel.SoftMatrixEntry insOld,
            SoftPathModel.SoftMatrixEntry repOld,
            int i, int j,
            double delLocal, double insLocal, double repLocal) {

        /*
         * We calculate softmin here and store details. Please refer to the
         * softmin implementation in SoftAlignmentScoreAlgorithm for details.
         */
        final double[] args = new double[3];
        args[0] = delOld.getSoftmin() + delLocal;
        args[1] = insOld.getSoftmin() + insLocal;
        args[2] = repOld.getSoftmin() + repLocal;

        double min = args[0];
        int minIdx = 0;
        for (int o = 1; o < args.length; o++) {
            if (args[o] < min) {
                min = args[o];
                minIdx = o;
            }
        }

        final double[] weights = new double[3];
        double normalization = 0;
        double weightedSum = 0;
        double exponent;
        for (int o = 0; o < args.length; o++) {
            if (o == minIdx) {
                //if this is the minimum then the result is clear.
                weights[o] = 1;
                normalization += 1;
                weightedSum += args[o];
            } else {
                exponent = beta * (args[o] - min);
                /*
                 * as we are garantueed to have one weight = 1 as baseline we
                 * can approximate very small weights with zero as they will be
                 * dominated by the minimum. This should smooth out some
                 * numerical issues.
                 */
                if (exponent < approxThreshold) {
                    weights[o] = Math.exp(-exponent);
                    normalization += weights[o];
                    weightedSum += weights[o] * args[o];
                }
            }
        }

        final SoftPathModel.SoftMatrixEntry newEntry = new SoftPathModel.SoftMatrixEntry();
        newEntry.setDelLocal(delLocal);
        newEntry.setDelProb(weights[0] / normalization);
        newEntry.setInsLocal(insLocal);
        newEntry.setInsProb(weights[1] / normalization);
        newEntry.setRepLocal(repLocal);
        newEntry.setRepProb(weights[2] / normalization);
        newEntry.setSoftmin(weightedSum / normalization);

        return newEntry;
    }

    /**
     * {@inheritDoc }
     */
    @Override
    public SoftPathModel transformToResult(SoftPathModel.SoftMatrixEntry[][] alignmentMatrix, Sequence a, Sequence b) {
        final double rawScore = alignmentMatrix[a.getNodes().size()][b.getNodes().size()].
                getSoftmin();
        /*
         * In each step of the alignment we had a distance of 1 at most. Thus we
         * can normalize the score to the range [0,1] if we just divide the raw
         * score by the worst case, which would be to delete a entirely and
         * insert b entirely => worst case score = m+n.
         */
        final double finalScore = rawScore / ((double) (a.getNodes().size() + b.getNodes().size()));
        return new SoftPathModel(getSpecification(), a, b, finalScore, beta, alignmentMatrix);
    }

}