All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.plumelib.util.RandomSelector Maven / Gradle / Ivy

There is a newer version: 1.10.0
Show newest version
package org.plumelib.util;

import java.util.ArrayList;
import java.util.List;
import java.util.Random;

/**
 * RandomSelector selects k elements uniformly at random from an arbitrary iterator, using O(k)
 * space. A naive algorithm would use O(n) space. For example, selecting 1 element from a FileStream
 * containing 1000 elements will take O(1) space. The class takes as input the number k during
 * initialization and then can accept() any number of Objects in the future. At any point in time,
 * getValues() will either return k randomly selected elements from the elements previous accepted
 * or if accept() was called fewer than k times, will return all elements previously accepted.
 *
 * 

The random selection is independent between every constructed instance of RandomSelector * objects, but for the same instance, multiple calls to getValues() are not independent. Making two * calls to consecutive getValues() without an accept() in between will return two new Lists * containing the same elements. * *

A second mode allows for a fixed probability of randomly keeping each item as opposed to a * fixed number of samples. * *

SPECFIELDS:
* values : Set : The values chosen based on the Objects observed
* observed : int : The number of Objects observed
* numElts : int : The number of elements to choose ('k' above)
* keepProbability: double : The percentage of elements to keep
* selector_mode : {FIXED,PERCENT} : either fixed amount of samples or fixed percent. * *

Example use: * *

{@code
 * // randomly selects 100 lines of text from a file
 * List selectedLines = null;
 * try {
 *    BufferedReader br = new BufferedReader(new FileReader("myfile.txt"));
 *    RandomSelector selector = new RandomSelector(100);
 *    while (br.ready()) {
 *      selector.accept(br.readLine());
 *    }
 *    selectedLines = selector.getValues();
 *  }
 *  catch (IOException e2) { e2.printStackTrace(); }
 * }
* * @param the type of elements being selected over */ public class RandomSelector { // Rep Invariant: values != null && values.size() <= numElts && // ((numElts == -1 && coinTossMode == true) || // (keepProbability == -1.0 && coinTossMode == false)) // Abstraction Function: // 1. for all elements, 'val' of AF(values), // this.values.indexOf (val) != -1 // 2. AF(observed) = this.observed // 3. AF(numElts) = this.numElts // 4. AF(keepProbability) = this.keepProbability // 5. AF(selector_mode) = fixed amount if coinTossMode == true // fixed percentage if coinTossMode == false /** If true, numElts and observers are -1. If false, keepProbability = -1. */ private boolean coinTossMode; /** The percentage of elements to keep. */ private double keepProbability = -1.0; /** The number of objects to choose, or -1. */ private int numElts = -1; /** The number of objects observed. */ private int observed = -1; /** The Random instance to use (for reproducibility). */ private Random generator; /** The values chosen. */ private ArrayList values = new ArrayList<>(); /** * Creates a new RandomSelector. * * @param numElts the number of elements intended to be selected from the input elements */ public RandomSelector(int numElts) { this(numElts, new Random()); } /** * Creates a new RandomSelector. * * @param numElts the number of elements intended to be selected from the input elements * @param r the seed to give for random number generation */ public RandomSelector(int numElts, Random r) { coinTossMode = false; this.numElts = numElts; observed = 0; generator = r; } /** * Creates a new RandomSelector. * * @param keepProbability the probability that each element is selected from the oncoming * Iteration * @param r the seed to give for random number generation */ public RandomSelector(double keepProbability, Random r) { coinTossMode = true; this.keepProbability = keepProbability; generator = r; } /** * When in fixed sample mode, increments the number of observed elements i by 1, then with * probability k / i, the Object 'next' will be added to the currently selected values 'values' * where k is equal to 'numElts'. If the size of values exceeds numElts, then one of the existing * elements in values will be removed at random. * *

When in probability mode, adds next to 'values' with probability equal to 'keepProbability'. * * @param next value to be added to this selector */ public void accept(T next) { // if we are in coin toss mode, then we want to keep // with probability == keepProbability. if (coinTossMode) { if (generator.nextDouble() < keepProbability) { values.add(next); // System.out.println ("ACCEPTED " + keepProbability ); } else { // System.out.println ("didn't accept " + keepProbability ); } return; } // in fixed sample mode, the i-th element has a k/i chance // of being accepted where k is numElts. if (generator.nextDouble() < ((double) numElts / ++observed)) { if (values.size() < numElts) { values.add(next); } else { @SuppressWarnings("lowerbound:argument") // no list support int rem = generator.nextInt(values.size()); // values should be MinLen(1), meaning that values.size() is positive. values.set(rem, next); } } // do nothing if the probability condition is not met } /** * Returns values, modifies none. * * @return values */ public List getValues() { // avoid concurrent mod errors and rep exposure ArrayList ret = new ArrayList<>(); ret.addAll(values); return ret; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy