de.citec.tcs.alignment.Softmin Maven / Gradle / Ivy

Go to download
/* 
 * TCS Alignment Toolbox
 * 
 * Copyright (C) 2013-2015
 * Benjamin Paaßen, Georg Zentgraf
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment;

/**
 * This implements utility functions for the softmin function.
 *
 * The softmin function approximates the strict minimum by a differentiable
 * function. It is defined as follows:
 *
 * Given real-valued input variables x_1, ... , x_n we define:
 *
 * softmin weights:	e_i	:= exp(-beta * x_i)
 * normalization:	Z	:= Sum_i e_i
 * softmin probability:	p_i	:= e_i / Z
 *
 * and finally:
 *
 * softmin(x_1, ... , x_n) := Sum_i p_i * x_i
 *
 * The beta parameter steers the "strictness" of the softmin approximation. The
 * parameter lies in the interval [0,infinity). For beta = 0 it holds:
 *
 * e_i = 1, Z = n, p_i = 1/n and
 *
 * softmin(x_1, ... , x_n) = 1/n * Sum_i x_i
 *
 * which is the average of all input variables. For large beta softmin converges
 * towards min.
 *
 * There is an additional trick to make softmin more precise and more effective
 * numerically: Note that p_i will remain equal if we add a constant term to
 * all variables x_i. If we use -min(x_1, ... , x_n) as constant term we get:
 *
 * e*_i := exp(-beta * (x_i - min(x_1, ... , x_n)))
 * Z*	:= Sum_i e*_i
 * p_i	= e_i / Z = e*_i / Z*
 *
 * which is easier to calculate for two reasons:
 *
 * First we are garantueed that e*_i = 1 for at least one i. Therefore Z* >= 1.
 * This minimizes errors due to double precision rounding.
 *
 * Second we can approximate p_i = 0 if e*_i is very small. This is the case if
 * beta * (x_i - min(x_1, ... , x_n)) gets bigger than a certain threshold.
 * This threshold can be set by the user and is at DEFAULTAPPROXTHRESHOLD per
 * default.
 *
 * @author Benjamin Paassen - [email protected]
 */
public class Softmin {

	public static final double DEFAULTBETA = 1;
	/**
	 * This means that softmin arguments with a weight smaller than 10^-3 will
	 * be disregarded. This is a valid approximation because we are garantueed
	 * to have at least one weight = 1 which will dominate all very small
	 * weights.
	 */
	public static final double DEFAULTAPPROXTHRESHOLD = 3 * Math.log(10);

	/**
	 * This calculates softmin for the given input and the given value
	 * of beta. More information can be found at the class header.
	 *
	 * @param beta Softmin equals min for beta towards infinity. For beta = 0
	 * softmin returns the average of the given input arguments. Thus beta
	 * regulates the "strictness" of the softmin approximation.
	 * @param variables the variables x_i.
	 *
	 * @return a soft approximation of the minimum of all input arguments.
	 */
	public static double softmin(double beta, double[] variables) {
		return softmin(beta, DEFAULTAPPROXTHRESHOLD, variables);
	}

	/**
	 * This calculates softmin for the given input and the given value
	 * of beta. More information can be found at the class header.
	 *
	 * @param beta Softmin equals min for beta towards infinity. For beta = 0
	 * softmin returns the average of the given input arguments. Thus beta
	 * regulates the "strictness" of the softmin approximation.
	 * @param approxThreshold a threshold where a softmin weight should be
	 * approximated with zero. The threshold refers to the exponent of the
	 * softmin weight. The exact definition is: If
	 *
	 * (x_i - min(x_1, ... , x_n)) * beta > approxThreshold
	 *
	 * then
	 *
	 * exp(x_i - min(x_1, ... , x_n)) * beta
	 *
	 * @param variables the variables x_i.
	 *
	 * @return a soft approximation of the minimum of all input arguments.
	 */
	public static double softmin(double beta, double approxThreshold,
			double[] variables) {
		if (variables.length == 0) {
			throw new IllegalArgumentException("Softmin is not defined for empty input!");
		}
		/*
		 * first get the minimum and substract it from all inputs to prevent
		 * numeric trouble. This is allowed because softmin returns the same
		 * value if we substract a constant.
		 */
		double min = variables[0];
		int minIdx = 0;
		for (int i = 1; i < variables.length; i++) {
			if (variables[i] < min) {
				min = variables[i];
				minIdx = i;
			}
		}
		/**
		 * Then calculate actual softmin.
		 */
		double normalization = 0;
		double weightedSum = 0;
		double weight;
		double exponent;
		for (int i = 0; i < variables.length; i++) {
			if (i == minIdx) {
				//if this is the minimum then the result is clear.
				normalization += 1;
				weightedSum += variables[i];
			} else {
				exponent = beta * (variables[i] - min);
				/*
				 * as we are garantueed to have one weight = 1 as baseline we
				 * can approximate very small weights with zero as they will be
				 * dominated by the minimum. This should smooth out some
				 * numerical issues.
				 */
				if (exponent == 0) {
					normalization += 1;
					weightedSum += variables[i];
				} else if (exponent < approxThreshold) {
					weight = Math.exp(-exponent);
					normalization += weight;
					weightedSum += weight * variables[i];
				}
			}
		}
		return weightedSum / normalization;
	}

	/**
	 * Calculates the softmin probabilities p_i for the given beta and
	 * the given variables. More information can be found in the class header.
	 *
	 * @param beta Softmin equals min for beta towards infinity. For beta = 0
	 * softmin returns the average of the given input arguments. Thus beta
	 * regulates the "strictness" of the softmin approximation.
	 * @param variables the variables x_i.
	 *
	 * @return the softmin probabilities p_i for the given beta and
	 * the given variables.
	 */
	public static double[] calculateSoftminProbabilities(double beta,
			double[] variables) {
		return calculateSoftminProbabilities(beta, DEFAULTAPPROXTHRESHOLD, variables);
	}

	/**
	 * Calculates the softmin probabilities p_i for the given beta and
	 * the given variables. More information can be found in the class header.
	 *
	 * @param beta Softmin equals min for beta towards infinity. For beta = 0
	 * softmin returns the average of the given input arguments. Thus beta
	 * regulates the "strictness" of the softmin approximation.
	 * @param approxThreshold a threshold where a softmin weight should be
	 * approximated with zero. The threshold refers to the exponent of the
	 * softmin weight. The exact definition is: If
	 *
	 * (x_i - min(x_1, ... , x_n)) * beta > approxThreshold
	 *
	 * then
	 *
	 * exp(x_i - min(x_1, ... , x_n)) * beta
	 *
	 * @param variables the variables x_i.
	 *
	 * @return the softmin probabilities p_i for the given beta and
	 * the given variables.
	 */
	public static double[] calculateSoftminProbabilities(double beta,
			double approxThreshold, double[] variables) {
		if (variables.length == 0) {
			throw new IllegalArgumentException("Softmin is not defined for empty input!");
		}
		//first calculate the strict minimum.
		double min = variables[0];
		int minIdx = 0;
		for (int i = 1; i < variables.length; i++) {
			if (variables[i] < min) {
				min = variables[i];
				minIdx = i;
			}
		}
		//calculate softmin weights.
		final double[] probs = new double[variables.length];
		double normalization = 0;
		double exponent;
		for (int i = 0; i < variables.length; i++) {
			if (i == minIdx) {
				probs[i] = 1;
				normalization += 1;
			} else {
				exponent = beta * (variables[i] - min);
				/*
				 * as we are garantueed to have one weight = 1 as baseline we
				 * can approximate very small weights with zero as they will be
				 * dominated by the minimum. This should smooth out some
				 * numerical issues.
				 */
				if (exponent == 0) {
					probs[i] = 1;
					normalization += 1;
				} else if (exponent < approxThreshold) {
					probs[i] = Math.exp(-exponent);
					normalization += probs[i];
				}
			}
		}
		//calculate softmin probabilities.
		for (int i = 0; i < variables.length; i++) {
			if (probs[i] > 0) {
				probs[i] /= normalization;
			}
		}
		return probs;
	}

	/**
	 * Calculates the softmin'(x_i) function for all variables x_i in the
	 * given input.
	 *
	 * @param beta Softmin equals min for beta towards infinity. For beta = 0
	 * softmin returns the average of the given input arguments. Thus beta
	 * regulates the "strictness" of the softmin approximation.
	 * @param variables the variables x_i.
	 *
	 * @return softmin'(x_i) for all x_i.
	 */
	public static double[] calculateSoftminDerivatives(double beta,
			double[] variables) {
		return calculateSoftminDerivatives(beta, DEFAULTAPPROXTHRESHOLD, variables);
	}

	/**
	 * Calculates the softmin'(x_i) function for all variables x_i in the
	 * given input.
	 *
	 * @param beta Softmin equals min for beta towards infinity. For beta = 0
	 * softmin returns the average of the given input arguments. Thus beta
	 * regulates the "strictness" of the softmin approximation.
	 * @param approxThreshold a threshold where a softmin weight should be
	 * approximated with zero. The threshold refers to the exponent of the
	 * softmin weight. The exact definition is: If
	 *
	 * (x_i - min(x_1, ... , x_n)) * beta > approxThreshold
	 *
	 * then
	 *
	 * exp(x_i - min(x_1, ... , x_n)) * beta
	 *
	 * @param variables the variables x_i.
	 *
	 * @return softmin'(x_i) for all x_i.
	 */
	public static double[] calculateSoftminDerivatives(double beta,
			double approxThreshold, double[] variables) {
		if (variables.length == 0) {
			throw new IllegalArgumentException("Softmin' is not defined for empty input!");
		}
		final double[] out = new double[variables.length];
		//first calculate the strict minimum.
		double min = variables[0];
		int minIdx = 0;
		for (int i = 1; i < variables.length; i++) {
			if (variables[i] < min) {
				min = variables[i];
				minIdx = i;
			}
		}
		//calculate softmin.
		final double[] weights = new double[variables.length];
		double normalization = 0;
		double softmin = 0;
		double exponent;
		for (int i = 0; i < variables.length; i++) {
			if (i == minIdx) {
				weights[i] = 1;
				normalization += 1;
				softmin += variables[i];
			} else {
				exponent = beta * (variables[i] - min);
				/*
				 * as we are garantueed to have one weight = 1 as baseline we
				 * can approximate very small weights with zero as they will be
				 * dominated by the minimum. This should smooth out some
				 * numerical issues.
				 */
				if (exponent == 0) {
					weights[i] = 1;
					normalization += 1;
					softmin += variables[i];
				} else if (exponent < approxThreshold) {
					weights[i] = Math.exp(-exponent);
					normalization += weights[i];
					softmin += weights[i] * (variables[i]);
				}
			}
		}
		softmin /= normalization;

		//now calculate softmin'
		for (int i = 0; i < variables.length; i++) {
			out[i] = ((1 - beta * (variables[i] - softmin)) * weights[i]) / normalization;
		}
		return out;
	}

}