de.citec.tcs.alignment.SoftAlignmentSamplingAlgorithm Maven / Gradle / Ivy

Go to download
/* 
 * TCS Alignment Toolbox
 * 
 * Copyright (C) 2013-2015
 * Benjamin Paaßen, Georg Zentgraf
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment;

import de.citec.tcs.alignment.SoftPathModel.SoftMatrixEntry;
import de.citec.tcs.alignment.comparators.OperationType;
import de.citec.tcs.alignment.sequence.Node;
import de.citec.tcs.alignment.sequence.Sequence;
import java.util.Random;
import java.util.Stack;

/**
 * This calculates the soft (global) alignment of two sequences similarly to the
 * Needleman-Wunsch-Algorithm. The whole trick of this
 * SoftAlignmentScoreAlgorithm-Algorithm is to replace the strict minimum in the
 * cost function by a softmin approximation. Thereby we give the alignment a
 * "probabilistic touch": Even suboptimal alignments are considered by the
 * softmin approach, but less than the optimal alignment. More information about
 * softmin can be found in the Softmin class.
 *
 * This algorithm returns a PathList containing a sampling from the probability
 * distribution defined implicitly by the softmin function: We choose an
 * operation according to its softmin probability.
 *
 * @author Benjamin Paassen - bpaassen(at)techfak.uni-bielefeld.de
 */
public class SoftAlignmentSamplingAlgorithm
		extends AbstractGapAlignmentAlgorithm {

	public static final double DEFAULTBETA = 1;
	private double beta = DEFAULTBETA;
	public static final int DEFAULTPATHLIMIT = 100;
	private int pathLimit = 100;
	private double approxThreshold = Softmin.DEFAULTAPPROXTHRESHOLD;

	/**
	 * This sets up an AlignmentAlgorithm instance according to the given
	 * specification.
	 *
	 * @param alignmentSpecification an AlignmentSpecification.
	 */
	public SoftAlignmentSamplingAlgorithm(AlignmentSpecification alignmentSpecification) {
		super(alignmentSpecification, SoftMatrixEntry.class, PathList.class);
	}

	/**
	 *
	 * @param beta The parameter defining the "softness" of the alignment. For
	 * beta towards infinity this alignment becomes closer to the strict
	 * alignment. For beta = 0 all possible alignments are equally considered
	 * and softmin returns the average. Please note that a low beta value might
	 * lead to a very rough approximation and that for higher sequence lengths
	 * beta has to be higher, too.
	 */
	public void setBeta(double beta) {
		this.beta = beta;
	}

	/**
	 *
	 * @return The parameter defining the "softness" of the alignment. For beta
	 * towards infinity this alignment becomes closer to the strict alignment.
	 * For beta = 0 all possible alignments are equally considered and softmin
	 * returns the average. Please note that a low beta value might lead to a
	 * very rough approximation and that for higher sequence lengths beta has to
	 * be higher, too.
	 */
	public double getBeta() {
		return beta;
	}

	/**
	 * Returns the the number of paths that shall be sampled.
	 *
	 * @return the number of paths that shall be sampled.
	 */
	public int getPathLimit() {
		return pathLimit;
	}

	/**
	 *
	 * This sets the number of paths that shall be sampled.
	 *
	 * The default for this is DEFAULTPATHLIMIT.
	 *
	 * @param pathLimit the number of paths that shall be sampled.
	 */
	public void setPathLimit(int pathLimit) {
		if (pathLimit < 1) {
			throw new IllegalArgumentException("You have to allow at least one path to be calculated!");
		}
		this.pathLimit = pathLimit;
	}

	/**
	 * Sets the threshold for which exponents within the softmin function will
	 * be disregarded.
	 *
	 * This approximation is valid because we are bound to have one softmin
	 * weight that is equal to 1 (the actual minimum). If another weight gets
	 * very small it will be dominated by the actual minimum. Thus the weight
	 * can be approximated by 0.
	 *
	 * To save some Math.exp-calculations the threshold is given as exponent
	 * rather than weight. The transformation between both is easy though:
	 *
	 * weight_threshold = Math.exp(-approxThreshold)
	 *
	 * which means
	 *
	 * approxThreshold = -log(weight_threshold)
	 *
	 * @param approxThreshold the threshold for which exponents within the
	 * softmin function will be disregarded.
	 */
	public void setApproxThreshold(double approxThreshold) {
		this.approxThreshold = approxThreshold;
	}

	/**
	 * Returns the threshold for which exponents within the softmin function
	 * will be disregarded.
	 *
	 * This approximation is valid because we are bound to have one softmin
	 * weight that is equal to 1 (the actual minimum). If another weight gets
	 * very small it will be dominated by the actual minimum. Thus the weight
	 * can be approximated by 0.
	 *
	 * To save some Math.exp-calculations the threshold is given as exponent
	 * rather than weight. The transformation between both is easy though:
	 *
	 * weight_threshold = Math.exp(-approxThreshold)
	 *
	 * which means
	 *
	 * approxThreshold = -log(weight_threshold)
	 *
	 * @return the threshold for which exponents within the softmin function
	 * will be disregarded.
	 */
	public double getApproxThreshold() {
		return approxThreshold;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public SoftMatrixEntry createInitial() {
		return new SoftMatrixEntry();
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public SoftMatrixEntry createDelInitial(SoftMatrixEntry delOld, int i, double delLocal) {
		final SoftMatrixEntry delEntry = new SoftMatrixEntry();
		delEntry.setDelLocal(delLocal);
		delEntry.setDelProb(1);
		delEntry.setSoftmin(delOld.getSoftmin() + delLocal);
		return delEntry;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public SoftMatrixEntry createInsInitial(SoftMatrixEntry insOld, int j, double insLocal) {
		final SoftMatrixEntry insEntry = new SoftMatrixEntry();
		insEntry.setInsLocal(insLocal);
		insEntry.setInsProb(1);
		insEntry.setSoftmin(insOld.getSoftmin() + insLocal);
		return insEntry;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public SoftMatrixEntry createNewEntry(
			SoftMatrixEntry delOld,
			SoftMatrixEntry insOld,
			SoftMatrixEntry repOld,
			int i, int j,
			double delLocal, double insLocal, double repLocal) {

		/*
		 * We calculate softmin here and store details. Please refer to the
		 * softmin implementation in SoftAlignmentScoreAlgorithm for details.
		 */
		final double[] args = new double[3];
		args[0] = delOld.getSoftmin() + delLocal;
		args[1] = insOld.getSoftmin() + insLocal;
		args[2] = repOld.getSoftmin() + repLocal;

		double min = args[0];
		int minIdx = 0;
		for (int o = 1; o < args.length; o++) {
			if (args[o] < min) {
				min = args[o];
				minIdx = o;
			}
		}

		final double[] weights = new double[3];
		double normalization = 0;
		double weightedSum = 0;
		double exponent;
		for (int o = 0; o < args.length; o++) {
			if (o == minIdx) {
				//if this is the minimum then the result is clear.
				weights[o] = 1;
				normalization += 1;
				weightedSum += args[o];
			} else {
				exponent = beta * (args[o] - min);
				/*
				 * as we are garantueed to have one weight = 1 as baseline we
				 * can approximate very small weights with zero as they will be
				 * dominated by the minimum. This should smooth out some
				 * numerical issues.
				 */
				if (exponent < approxThreshold) {
					weights[o] = Math.exp(-exponent);
					normalization += weights[o];
					weightedSum += weights[o] * args[o];
				}
			}
		}

		final SoftMatrixEntry newEntry = new SoftMatrixEntry();
		newEntry.setDelLocal(delLocal);
		newEntry.setDelProb(weights[0] / normalization);
		newEntry.setInsLocal(insLocal);
		newEntry.setInsProb(weights[1] / normalization);
		newEntry.setRepLocal(repLocal);
		newEntry.setRepProb(weights[2] / normalization);
		newEntry.setSoftmin(weightedSum / normalization);

		return newEntry;
	}

	/**
	 * {@inheritDoc }
	 */
	@Override
	public PathList transformToResult(
			SoftMatrixEntry[][] alignmentMatrix,
			Sequence a, Sequence b) {
		final int M = a.getNodes().size();
		final int N = b.getNodes().size();
		final double normalization = (double) (M + N);
		final AlignmentSpecification spec = getSpecification();

		final PathList out = new PathList();

		final Random rand = new Random();

		final double[] weighting = spec.getWeighting();

		//now sample k paths.
		for (int l = 0; l < pathLimit; l++) {
			int i = M;
			int j = N;
			final Stack opStack = new Stack<>();
			double score = 0;
			while (i > 0 || j > 0) {
				final SoftMatrixEntry current = alignmentMatrix[i][j];
				Node left = null;
				Node right = null;
				final double r = rand.nextDouble();
				final OperationType type;
				/*
				 * Choose the operation randomly but according to the
				 * probabilities calculated before.
				 */
				final double delProb = current.getDelProb();
				final double insProb = current.getInsProb();
				final double[] compCosts;
				if (r < delProb) {
					type = OperationType.DELETION;
					left = a.getNodes().get(i - 1);
					if (current.getDelCosts() == null) {
						compCosts = spec.calculateDeletionCosts(left);
						current.setDelCosts(compCosts);
					} else {
						compCosts = current.getDelCosts();
					}
					i--;
				} else if (r > delProb && r < delProb + insProb) {
					type = OperationType.INSERTION;
					right = b.getNodes().get(j - 1);
					if (current.getInsCosts() == null) {
						compCosts = spec.calculateInsertionCosts(right);
						current.setInsCosts(compCosts);
					} else {
						compCosts = current.getInsCosts();
					}
					j--;
				} else {
					type = OperationType.REPLACEMENT;
					left = a.getNodes().get(i - 1);
					right = b.getNodes().get(j - 1);
					if (current.getRepCosts() == null) {
						compCosts = spec.calculateReplacementCosts(left, right);
						current.setRepCosts(compCosts);
					} else {
						compCosts = current.getRepCosts();
					}
					i--;
					j--;
				}
				final TmpOp newOp = new TmpOp(left, right, compCosts, spec, type);
				score += newOp.getWeightedLocalCost();
				opStack.push(newOp);
			}
			/*
			 * If we have a path, transform it to a proper object.
			 */
			final AlignmentPath path = new AlignmentPath(spec,
					a, b, score / normalization);
			while (!opStack.empty()) {
				final TmpOp tmpOp = opStack.pop();
				final Operation op = new Operation(tmpOp.left, tmpOp.right,
						tmpOp.type, path);
				op.setComparatorDistances(tmpOp.compDists);
				path.getOperations().add(op);
			}
			out.add(path);
		}
		return out;
	}

	private static class TmpOp {

		private final Node left;
		private final Node right;
		private final double[] compDists;
		private final AlignmentSpecification spec;
		private final OperationType type;

		public TmpOp(Node left, Node right, double[] local,
				AlignmentSpecification spec, OperationType type) {
			this.left = left;
			this.right = right;
			this.compDists = local;
			this.spec = spec;
			this.type = type;
		}

		public double getWeightedLocalCost() {
			final double[] weighting = spec.getWeighting();
			double out = 0;
			for (int k = 0; k < spec.size(); k++) {
				out += weighting[k] * compDists[k];
			}
			return out;
		}
	}

}