de.citec.tcs.alignment.ScoreBasedWeighting Maven / Gradle / Ivy

Go to download
/* 
 * TCS Alignment Toolbox
 * 
 * Copyright (C) 2013-2015
 * Benjamin Paaßen, Georg Zentgraf
 * AG Theoretical Computer Science
 * Centre of Excellence Cognitive Interaction Technology (CITEC)
 * University of Bielefeld
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.tcs.alignment;

import java.util.Collection;
import java.util.HashMap;

/**
 * This is a helper class to enable users to weight a collection of scores based
 * on different schemes specified below.
 *
 * @author Benjamin Paassen - [email protected]
 */
public enum ScoreBasedWeighting {

    /**
     * This is a linear weighting by the paths scores. The scores of an
     * AlignmentPath are normalized between 0 and 1. Therefore we can weight
     * a path by 1-score (because 1 is the worst possible score and in that
     * case its contribution to the derivative should be neglected).
     */
    LINEAR,
    /**
     * This is a softmin weighting of the paths scores. Each AlignmentPath
     * x_i has a contribution of
     *
     * exp(-score(x_i) * beta)
     *
     * This is derived from the Boltzman Free Energy Minimization. Beta is
     * set to 4,6 to ensure that a worst possible path with score 1
     * contributes at best 1% to the overall derivative if another path with
     * an optimal score of 0 is present.
     */
    SOFTMIN,
    /**
     * In this case the weight of each path is determined to be the value of
     * the Gaussian probability density function with zero mean and standard
     * deviation sigma at its score. As a formula:
     *
     * exp(-score(x_i)^2 / (2 * sigma^2))
     *
     * sigma is set to 0.33 to ensure that a worst possible path with score
     * 1 contributes at best 1% to the overall derivative if another path
     * with an optimal score of 0 is present.
     */
    GAUSSIAN;

    private static final double SOFTMIN_BETA = 4.6;
    private static final double GAUSSIAN_NORMALIZATION = 1 / (2 * 0.33 * 0.33);

    public double[] calculateWeighting(Collection scores) {
        final double[] scoreArr = new double[scores.size()];
        int i = 0;
        for (Double score : scores) {
            scoreArr[i] = score;
        }
        return calculateWeighting(scoreArr);
    }

    /**
     * This calculates the normalized weights (between 0 and 1 and adding up
     * to 1) for the given scores.
     *
     * @param scores an array of scores.
     *
     * @return the normalized weights (between 0 and 1 and adding up to 1)
     * for the given scores.
     */
    public double[] calculateWeighting(double[] scores) {
        final double[] weights = new double[scores.length];
        double normalization = 0;
        //cache the score values.
        final HashMap cache = new HashMap<>();
        for (int i = 0; i < scores.length; i++) {
            //if we have a cached value for this score, use it.
            final Double cachedVal = cache.get(scores[i]);
            if (cachedVal != null) {
                weights[i] = cachedVal;
            } else {
                //otherwise calculate it.
                switch (this) {
                    case LINEAR:
                        weights[i] = 1 - scores[i];
                        break;
                    case SOFTMIN:
                        weights[i] = Math.exp(-SOFTMIN_BETA * scores[i]);
                        break;
                    case GAUSSIAN:
                        weights[i] = Math.exp(-(scores[i] * scores[i])
                                * GAUSSIAN_NORMALIZATION);
                        break;
                    default:
                        throw new UnsupportedOperationException(
                                "The Weighting " + this + " is not supported!");
                }
                cache.put(scores[i], weights[i]);
            }
            normalization += weights[i];
        }
        //normalize the weights
        for (int i = 0; i < weights.length; i++) {
            weights[i] /= normalization;
        }
        return weights;
    }
}