de.citec.tcs.alignment.SoftAlignmentFullAlgorithm Maven / Gradle / Ivy

Go to download
/* 
 * TCS Alignment Toolbox
 * 
 * Copyright (C) 2013-2015
 * Benjamin Paaßen, Georg Zentgraf
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment;

import de.citec.tcs.alignment.sequence.Sequence;

/**
 * This calculates the soft (global) alignment of two sequences similarly to the
 * Needleman-Wunsch-Algorithm. The whole trick of this
 * SoftAlignmentScoreAlgorithm-Algorithm is to replace the strict minimum in the
 * cost function by a softmin approximation. Thereby we give the alignment a
 * "probabilistic touch": Even suboptimal alignments are considered by the
 * softmin approach, but less than the optimal alignment. More information about
 * softmin can be found in the Softmin class.
 *
 * The result of this algorithm is the full dynamic programming matrix that
 * allows for calculating the soft derivative.
 *
 * @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
 */
public class SoftAlignmentFullAlgorithm
		extends AbstractGapAlignmentAlgorithm {

	public static final double DEFAULTBETA = 1;
	private double beta = DEFAULTBETA;
	private double approxThreshold = Softmin.DEFAULTAPPROXTHRESHOLD;

	/**
	 * This sets up an AlignmentAlgorithm instance according to the given
	 * specification.
	 *
	 * @param alignmentSpecification an AlignmentSpecification.
	 */
	public SoftAlignmentFullAlgorithm(AlignmentSpecification alignmentSpecification) {
		super(alignmentSpecification,
				SoftPathModel.SoftMatrixEntry.class,
				SoftPathModel.class);
	}

	/**
	 * Sets the threshold for which exponents within the softmin function will
	 * be disregarded.
	 *
	 * This approximation is valid because we are bound to have one softmin
	 * weight that is equal to 1 (the actual minimum). If another weight gets
	 * very small it will be dominated by the actual minimum. Thus the weight
	 * can be approximated by 0.
	 *
	 * To save some Math.exp-calculations the threshold is given as exponent
	 * rather than weight. The transformation between both is easy though:
	 *
	 * weight_threshold = Math.exp(-approxThreshold)
	 *
	 * which means
	 *
	 * approxThreshold = -log(weight_threshold)
	 *
	 * @param approxThreshold the threshold for which exponents within the
	 * softmin function will be disregarded.
	 */
	public void setApproxThreshold(double approxThreshold) {
		this.approxThreshold = approxThreshold;
	}

	/**
	 * Returns the threshold for which exponents within the softmin function
	 * will be disregarded.
	 *
	 * This approximation is valid because we are bound to have one softmin
	 * weight that is equal to 1 (the actual minimum). If another weight gets
	 * very small it will be dominated by the actual minimum. Thus the weight
	 * can be approximated by 0.
	 *
	 * To save some Math.exp-calculations the threshold is given as exponent
	 * rather than weight. The transformation between both is easy though:
	 *
	 * weight_threshold = Math.exp(-approxThreshold)
	 *
	 * which means
	 *
	 * approxThreshold = -log(weight_threshold)
	 *
	 * @return the threshold for which exponents within the softmin function
	 * will be disregarded.
	 */
	public double getApproxThreshold() {
		return approxThreshold;
	}

	/**
	 *
	 * @param beta The parameter defining the "softness" of the alignment. For
	 * beta towards infinity this alignment becomes closer to the strict
	 * alignment. For beta = 0 all possible alignments are equally considered
	 * and softmin returns the average. Please note that a low beta value might
	 * lead to a very rough approximation and that for higher sequence lengths
	 * beta has to be higher, too.
	 */
	public void setBeta(double beta) {
		this.beta = beta;
	}

	/**
	 *
	 * @return The parameter defining the "softness" of the alignment. For beta
	 * towards infinity this alignment becomes closer to the strict alignment.
	 * For beta = 0 all possible alignments are equally considered and softmin
	 * returns the average. Please note that a low beta value might lead to a
	 * very rough approximation and that for higher sequence lengths beta has to
	 * be higher, too.
	 */
	public double getBeta() {
		return beta;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public SoftPathModel.SoftMatrixEntry createInitial() {
		return new SoftPathModel.SoftMatrixEntry();
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public SoftPathModel.SoftMatrixEntry createDelInitial(SoftPathModel.SoftMatrixEntry delOld, int i, double delLocal) {
		final SoftPathModel.SoftMatrixEntry delEntry = new SoftPathModel.SoftMatrixEntry();
		delEntry.setDelLocal(delLocal);
		delEntry.setDelProb(1);
		delEntry.setSoftmin(delOld.getSoftmin() + delLocal);
		return delEntry;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public SoftPathModel.SoftMatrixEntry createInsInitial(SoftPathModel.SoftMatrixEntry insOld, int j, double insLocal) {
		final SoftPathModel.SoftMatrixEntry insEntry = new SoftPathModel.SoftMatrixEntry();
		insEntry.setInsLocal(insLocal);
		insEntry.setInsProb(1);
		insEntry.setSoftmin(insOld.getSoftmin() + insLocal);
		return insEntry;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public SoftPathModel.SoftMatrixEntry createNewEntry(
			SoftPathModel.SoftMatrixEntry delOld,
			SoftPathModel.SoftMatrixEntry insOld,
			SoftPathModel.SoftMatrixEntry repOld,
			int i, int j,
			double delLocal, double insLocal, double repLocal) {

		/*
		 * We calculate softmin here and store details. Please refer to the
		 * softmin implementation in SoftAlignmentScoreAlgorithm for details.
		 */
		final double[] args = new double[3];
		args[0] = delOld.getSoftmin() + delLocal;
		args[1] = insOld.getSoftmin() + insLocal;
		args[2] = repOld.getSoftmin() + repLocal;

		double min = args[0];
		int minIdx = 0;
		for (int o = 1; o < args.length; o++) {
			if (args[o] < min) {
				min = args[o];
				minIdx = o;
			}
		}

		final double[] weights = new double[3];
		double normalization = 0;
		double weightedSum = 0;
		double exponent;
		for (int o = 0; o < args.length; o++) {
			if (o == minIdx) {
				//if this is the minimum then the result is clear.
				weights[o] = 1;
				normalization += 1;
				weightedSum += args[o];
			} else {
				exponent = beta * (args[o] - min);
				/*
				 * as we are garantueed to have one weight = 1 as baseline we
				 * can approximate very small weights with zero as they will be
				 * dominated by the minimum. This should smooth out some
				 * numerical issues.
				 */
				if (exponent < approxThreshold) {
					weights[o] = Math.exp(-exponent);
					normalization += weights[o];
					weightedSum += weights[o] * args[o];
				}
			}
		}

		final SoftPathModel.SoftMatrixEntry newEntry = new SoftPathModel.SoftMatrixEntry();
		newEntry.setDelLocal(delLocal);
		newEntry.setDelProb(weights[0] / normalization);
		newEntry.setInsLocal(insLocal);
		newEntry.setInsProb(weights[1] / normalization);
		newEntry.setRepLocal(repLocal);
		newEntry.setRepProb(weights[2] / normalization);
		newEntry.setSoftmin(weightedSum / normalization);

		return newEntry;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public SoftPathModel transformToResult(SoftPathModel.SoftMatrixEntry[][] alignmentMatrix, Sequence a, Sequence b) {
		final double rawScore = alignmentMatrix[a.getNodes().size()][b.getNodes().size()].
				getSoftmin();
		/*
		 * In each step of the alignment we had a distance of 1 at most. Thus we
		 * can normalize the score to the range [0,1] if we just divide the raw
		 * score by the worst case, which would be to delete a entirely and
		 * insert b entirely => worst case score = m+n.
		 */
		final double finalScore;
		if (rawScore == 0) {
			finalScore = 0;
		} else {
			finalScore = rawScore / ((double) (a.getNodes().size() + b.getNodes().size()));
		}
		return new SoftPathModel(getSpecification(), a, b, finalScore, beta, alignmentMatrix);
	}

}