All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.nlpub.watset.eval.Pairwise Maven / Gradle / Ivy

Go to download

An open source implementation of the Watset algorithm for fuzzy graph clustering.

There is a newer version: 2.8.2
Show newest version
/*
 * Copyright 2018 Dmitry Ustalov
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 */

package org.nlpub.watset.eval;

import org.jgrapht.alg.util.Pair;

import java.util.Collection;
import java.util.HashSet;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static java.util.Objects.requireNonNull;

/**
 * Pairwise precision, recall, and F-score for cluster evaluation.
 *
 * @param  the type of cluster elements
 * @see Evaluation of clustering
 * @see Manandhar et al. (SemEval 2010)
 * @see Ustalov et al. (COLI 45:3)
 */
public class Pairwise {
    /**
     * Transform a collection of clusters to a collection of pairs
     * generated using 2-combinations of the cluster elements.
     *
     * @param clusters the collection of clusters
     * @param       the type of cluster elements
     * @return a collection of pairs
     */
    public static  Set> transform(Collection> clusters) {
        return clusters.parallelStream().flatMap(Pairwise::combination).collect(Collectors.toSet());
    }

    /**
     * Return a stream of pairs generated as 2-combinations of the cluster elements.
     *
     * @param cluster the cluster
     * @param      the type of cluster elements
     * @return a stream of 2-combinations
     */
    public static  Stream> combination(Collection cluster) {
        return cluster.stream().
                flatMap(first -> cluster.stream().map(second -> pairOf(first, second))).
                filter(pair -> !pair.getFirst().equals(pair.getSecond()));
    }

    /**
     * Create a pair of elements ordered by hashCode.
     *
     * @param first  the first object
     * @param second the second object
     * @param     the type of objects
     * @return a pair
     */
    public static  Pair pairOf(V first, V second) {
        return (first.hashCode() <= second.hashCode()) ? Pair.of(first, second) : Pair.of(second, first);
    }

    /**
     * Compute a pairwise precision, recall, and F-score.
     *
     * @param clusterPairs the cluster pairs to evaluate
     * @param classPairs   the gold standard pairs to evaluate
     * @return precision and recalled wrapped in an instance of {@link PrecisionRecall}
     */
    public PrecisionRecall evaluate(Set> clusterPairs, Set> classPairs) {
        final var union = new HashSet<>(clusterPairs);
        union.addAll(classPairs);

        final var preds = new boolean[union.size()];
        final var trues = new boolean[union.size()];

        var i = 0;

        for (final var pair : union) {
            preds[i] = clusterPairs.contains(pair);
            trues[i] = classPairs.contains(pair);
            i++;
        }

        int tp = 0, fp = 0, fn = 0;

        for (i = 0; i < union.size(); i++) {
            if (preds[i] && trues[i]) tp++;
            if (preds[i] && !trues[i]) fp++;
            if (!preds[i] && trues[i]) fn++;
        }

        double tp_fp = tp + fp, tp_fn = tp + fn;

        return new PrecisionRecall(tp_fp == 0d ? 0 : tp / tp_fp, tp_fn == 0d ? 0 : tp / tp_fn);
    }

    /**
     * Transform the clusters to pairs and compute a pairwise precision, recall, and F-score.
     *
     * @param clusters the collection of the clusters to evaluate
     * @param classes  the collection of the gold standard clusters
     * @return precision and recalled wrapped in an instance of {@link PrecisionRecall}
     */
    public PrecisionRecall evaluate(Collection> clusters, Collection> classes) {
        final var clusterPairs = transform(requireNonNull(clusters));
        final var classPairs = transform(requireNonNull(classes));
        return evaluate(clusterPairs, classPairs);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy