All Downloads are FREE. Search and download functionalities are using the official Maven repository.

apoc.generate.utils.WeightedReservoirSampler Maven / Gradle / Ivy

package apoc.generate.utils;

import java.util.List;
import java.util.ListIterator;
import java.util.Random;
import java.util.Set;

/**
 * Utility to allow for weighted reservoir sampling using A-ES (Algorithm of Efraimidis and Spirakis) algorithm from:
 *
 * Weighted Random Sampling over Data Streams
 * by Pavlos S. Efraimidis
 * (Democritus University of Thrace)
 */
public class WeightedReservoirSampler {
    private final Random random;

    /**
     * Create a new sampler with a certain reservoir size.
     */
    public WeightedReservoirSampler() {
        random = new Random();
    }


    /**
     * Returns an integer at random, weighted according to its index
     * @param weights weights to sample from
     * @return index chosen according to the weight supplied
     */
    public int randomIndexChoice(List weights) {
        int result = 0, index;
        double maxKey = 0.0;
        double u, key;
        int weight;

        for (ListIterator it = weights.listIterator(); it.hasNext(); ) {
            index = it.nextIndex();
            weight = it.next();
            u = random.nextDouble();
            key = Math.pow(u, (1.0/weight)); // Protect from zero division?

            if (key > maxKey) {
                maxKey = key;
                result = index;
            }
        }

        return result;
    }

    /**
     * Returns an integer at random, weighted according to its index,
     * omitting given indices
     * @param weights weights to sample from
     * @param omitIndices indices to omit from sampling
     * @return index chosen according to the weight supplied
     */
    public int randomIndexChoice(List weights, Set omitIndices) {
        int result = 0, index;
        double maxKey = 0.0;
        double u, key;
        int weight;

        for (ListIterator it = weights.listIterator(); it.hasNext(); ) {
            index = it.nextIndex();
            weight = it.next();

            if (omitIndices.contains(index))
                continue;

            u = random.nextDouble();
            key = Math.pow(u, (1.0 / weight));

            if (key > maxKey) {
                maxKey = key;
                result = index;
            }
        }

        return result;
    }


    /**
     * Returns an integer at random, weighted according to its index,
     * omitting a single index.
     * This is very specific override used in the Simple graph
     * algorithm.
     * @param weights list of weights to sample from
     * @param omit index to omit from sampling
     * @return index chosen according to the weight supplied
     */
    public int randomIndexChoice(List weights, int omit) {
        int result = 0, index;
        double maxKey = 0.0;
        double u, key;
        int weight;

        for (ListIterator it = weights.listIterator(); it.hasNext(); ) {
            index = it.nextIndex();
            weight = it.next();

            if (index == omit)
                continue;

            u = random.nextDouble();
            key = Math.pow(u, (1.0/weight));

            if (key > maxKey) {
                maxKey = key;
                result = index;
            }
        }

        return result;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy