All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.math.distance.JaccardDistance Maven / Gradle / Ivy

There is a newer version: 2.6.0
Show newest version
/******************************************************************************
 *                   Confidential Proprietary                                 *
 *         (c) Copyright Haifeng Li 2011, All Rights Reserved                 *
 ******************************************************************************/

package smile.math.distance;

import java.util.Set;
import java.util.HashSet;

/**
 * The Jaccard index, also known as the Jaccard similarity coefficient is a
 * statistic used for comparing the similarity and diversity of sample sets.
 *
 * The Jaccard coefficient measures similarity between sample sets, and is
 * defined as the size of the intersection divided by the size of the union
 * of the sample sets.
 *
 * The Jaccard distance, which measures dissimilarity between sample sets,
 * is complementary to the Jaccard coefficient and is obtained by subtracting
 * the Jaccard coefficient from 1, or, equivalently, by dividing the difference
 * of the sizes of the union and the intersection of two sets by the size of
 * the union.
 *
 * @author Haifeng Li
 */
public class JaccardDistance implements Distance {

    /**
     * Constructor.
     */
    public JaccardDistance() {
    }

    @Override
    public String toString() {
        return "Jaccard distance";
    }

    @Override
    public double d(T[] a, T[] b) {
        Set union = new HashSet();
        Set intersection = new HashSet();

        for (int i = 0; i < b.length; i++)
            union.add(b[i]);

        for (int i = 0; i < a.length; i++)
            intersection.add(a[i]);

        intersection.retainAll(union);

        for (int i = 0; i < a.length; i++)
            union.add(a[i]);

        return 1.0 - (double) intersection.size() / union.size();
    }

    /**
     * Returns the Jaccard distance between sets.
     */
    public static  double d(Set a, Set b) {
        Set union = new HashSet(a);
        union.addAll(b);

        Set intersection = new HashSet(a);
        intersection.retainAll(b);

        return 1.0 - (double) intersection.size() / union.size();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy