de.citec.tcs.alignment.Softmin Maven / Gradle / Ivy
/*
* TCS Alignment Toolbox
*
* Copyright (C) 2013-2015
* Benjamin Paaßen, Georg Zentgraf
* AG Theoretical Computer Science
* Centre of Excellence Cognitive Interaction Technology (CITEC)
* University of Bielefeld
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.tcs.alignment;
/**
* This implements utility functions for the softmin function.
*
* The softmin function approximates the strict minimum by a differentiable
* function. It is defined as follows:
*
* Given real-valued input variables x_1, ... , x_n we define:
*
* softmin weights: e_i := exp(-beta * x_i)
* normalization: Z := Sum_i e_i
* softmin probability: p_i := e_i / Z
*
* and finally:
*
* softmin(x_1, ... , x_n) := Sum_i p_i * x_i
*
* The beta parameter steers the "strictness" of the softmin approximation. The
* parameter lies in the interval [0,infinity). For beta = 0 it holds:
*
* e_i = 1, Z = n, p_i = 1/n and
*
* softmin(x_1, ... , x_n) = 1/n * Sum_i x_i
*
* which is the average of all input variables. For large beta softmin converges
* towards min.
*
* There is an additional trick to make softmin more precise and more effective
* numerically: Note that p_i will remain equal if we add a constant term to
* all variables x_i. If we use -min(x_1, ... , x_n) as constant term we get:
*
* e*_i := exp(-beta * (x_i - min(x_1, ... , x_n)))
* Z* := Sum_i e*_i
* p_i = e_i / Z = e*_i / Z*
*
* which is easier to calculate for two reasons:
*
* First we are garantueed that e*_i = 1 for at least one i. Therefore Z* >= 1.
* This minimizes errors due to double precision rounding.
*
* Second we can approximate p_i = 0 if e*_i is very small. This is the case if
* beta * (x_i - min(x_1, ... , x_n)) gets bigger than a certain threshold.
* This threshold can be set by the user and is at DEFAULTAPPROXTHRESHOLD per
* default.
*
* @author Benjamin Paassen - [email protected]
*/
public class Softmin {
public static final double DEFAULTBETA = 1;
/**
* This means that softmin arguments with a weight smaller than 10^-3 will
* be disregarded. This is a valid approximation because we are garantueed
* to have at least one weight = 1 which will dominate all very small
* weights.
*/
public static final double DEFAULTAPPROXTHRESHOLD = 3 * Math.log(10);
/**
* This calculates softmin for the given input and the given value
* of beta. More information can be found at the class header.
*
* @param beta Softmin equals min for beta towards infinity. For beta = 0
* softmin returns the average of the given input arguments. Thus beta
* regulates the "strictness" of the softmin approximation.
* @param variables the variables x_i.
*
* @return a soft approximation of the minimum of all input arguments.
*/
public static double softmin(double beta, double[] variables) {
return softmin(beta, DEFAULTAPPROXTHRESHOLD, variables);
}
/**
* This calculates softmin for the given input and the given value
* of beta. More information can be found at the class header.
*
* @param beta Softmin equals min for beta towards infinity. For beta = 0
* softmin returns the average of the given input arguments. Thus beta
* regulates the "strictness" of the softmin approximation.
* @param approxThreshold a threshold where a softmin weight should be
* approximated with zero. The threshold refers to the exponent of the
* softmin weight. The exact definition is: If
*
* (x_i - min(x_1, ... , x_n)) * beta > approxThreshold
*
* then
*
* exp(x_i - min(x_1, ... , x_n)) * beta
*
* @param variables the variables x_i.
*
* @return a soft approximation of the minimum of all input arguments.
*/
public static double softmin(double beta, double approxThreshold,
double[] variables) {
if (variables.length == 0) {
throw new IllegalArgumentException("Softmin is not defined for empty input!");
}
/*
* first get the minimum and substract it from all inputs to prevent
* numeric trouble. This is allowed because softmin returns the same
* value if we substract a constant.
*/
double min = variables[0];
int minIdx = 0;
for (int i = 1; i < variables.length; i++) {
if (variables[i] < min) {
min = variables[i];
minIdx = i;
}
}
/**
* Then calculate actual softmin.
*/
double normalization = 0;
double weightedSum = 0;
double weight;
double exponent;
for (int i = 0; i < variables.length; i++) {
if (i == minIdx) {
//if this is the minimum then the result is clear.
normalization += 1;
weightedSum += variables[i];
} else {
exponent = beta * (variables[i] - min);
/*
* as we are garantueed to have one weight = 1 as baseline we
* can approximate very small weights with zero as they will be
* dominated by the minimum. This should smooth out some
* numerical issues.
*/
if (exponent == 0) {
normalization += 1;
weightedSum += variables[i];
} else if (exponent < approxThreshold) {
weight = Math.exp(-exponent);
normalization += weight;
weightedSum += weight * variables[i];
}
}
}
return weightedSum / normalization;
}
/**
* Calculates the softmin probabilities p_i for the given beta and
* the given variables. More information can be found in the class header.
*
* @param beta Softmin equals min for beta towards infinity. For beta = 0
* softmin returns the average of the given input arguments. Thus beta
* regulates the "strictness" of the softmin approximation.
* @param variables the variables x_i.
*
* @return the softmin probabilities p_i for the given beta and
* the given variables.
*/
public static double[] calculateSoftminProbabilities(double beta,
double[] variables) {
return calculateSoftminProbabilities(beta, DEFAULTAPPROXTHRESHOLD, variables);
}
/**
* Calculates the softmin probabilities p_i for the given beta and
* the given variables. More information can be found in the class header.
*
* @param beta Softmin equals min for beta towards infinity. For beta = 0
* softmin returns the average of the given input arguments. Thus beta
* regulates the "strictness" of the softmin approximation.
* @param approxThreshold a threshold where a softmin weight should be
* approximated with zero. The threshold refers to the exponent of the
* softmin weight. The exact definition is: If
*
* (x_i - min(x_1, ... , x_n)) * beta > approxThreshold
*
* then
*
* exp(x_i - min(x_1, ... , x_n)) * beta
*
* @param variables the variables x_i.
*
* @return the softmin probabilities p_i for the given beta and
* the given variables.
*/
public static double[] calculateSoftminProbabilities(double beta,
double approxThreshold, double[] variables) {
if (variables.length == 0) {
throw new IllegalArgumentException("Softmin is not defined for empty input!");
}
//first calculate the strict minimum.
double min = variables[0];
int minIdx = 0;
for (int i = 1; i < variables.length; i++) {
if (variables[i] < min) {
min = variables[i];
minIdx = i;
}
}
//calculate softmin weights.
final double[] probs = new double[variables.length];
double normalization = 0;
double exponent;
for (int i = 0; i < variables.length; i++) {
if (i == minIdx) {
probs[i] = 1;
normalization += 1;
} else {
exponent = beta * (variables[i] - min);
/*
* as we are garantueed to have one weight = 1 as baseline we
* can approximate very small weights with zero as they will be
* dominated by the minimum. This should smooth out some
* numerical issues.
*/
if (exponent == 0) {
probs[i] = 1;
normalization += 1;
} else if (exponent < approxThreshold) {
probs[i] = Math.exp(-exponent);
normalization += probs[i];
}
}
}
//calculate softmin probabilities.
for (int i = 0; i < variables.length; i++) {
if (probs[i] > 0) {
probs[i] /= normalization;
}
}
return probs;
}
/**
* Calculates the softmin'(x_i) function for all variables x_i in the
* given input.
*
* @param beta Softmin equals min for beta towards infinity. For beta = 0
* softmin returns the average of the given input arguments. Thus beta
* regulates the "strictness" of the softmin approximation.
* @param variables the variables x_i.
*
* @return softmin'(x_i) for all x_i.
*/
public static double[] calculateSoftminDerivatives(double beta,
double[] variables) {
return calculateSoftminDerivatives(beta, DEFAULTAPPROXTHRESHOLD, variables);
}
/**
* Calculates the softmin'(x_i) function for all variables x_i in the
* given input.
*
* @param beta Softmin equals min for beta towards infinity. For beta = 0
* softmin returns the average of the given input arguments. Thus beta
* regulates the "strictness" of the softmin approximation.
* @param approxThreshold a threshold where a softmin weight should be
* approximated with zero. The threshold refers to the exponent of the
* softmin weight. The exact definition is: If
*
* (x_i - min(x_1, ... , x_n)) * beta > approxThreshold
*
* then
*
* exp(x_i - min(x_1, ... , x_n)) * beta
*
* @param variables the variables x_i.
*
* @return softmin'(x_i) for all x_i.
*/
public static double[] calculateSoftminDerivatives(double beta,
double approxThreshold, double[] variables) {
if (variables.length == 0) {
throw new IllegalArgumentException("Softmin' is not defined for empty input!");
}
final double[] out = new double[variables.length];
//first calculate the strict minimum.
double min = variables[0];
int minIdx = 0;
for (int i = 1; i < variables.length; i++) {
if (variables[i] < min) {
min = variables[i];
minIdx = i;
}
}
//calculate softmin.
final double[] weights = new double[variables.length];
double normalization = 0;
double softmin = 0;
double exponent;
for (int i = 0; i < variables.length; i++) {
if (i == minIdx) {
weights[i] = 1;
normalization += 1;
softmin += variables[i];
} else {
exponent = beta * (variables[i] - min);
/*
* as we are garantueed to have one weight = 1 as baseline we
* can approximate very small weights with zero as they will be
* dominated by the minimum. This should smooth out some
* numerical issues.
*/
if (exponent == 0) {
weights[i] = 1;
normalization += 1;
softmin += variables[i];
} else if (exponent < approxThreshold) {
weights[i] = Math.exp(-exponent);
normalization += weights[i];
softmin += weights[i] * (variables[i]);
}
}
}
softmin /= normalization;
//now calculate softmin'
for (int i = 0; i < variables.length; i++) {
out[i] = ((1 - beta * (variables[i] - softmin)) * weights[i]) / normalization;
}
return out;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy