All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.stat.Sampling Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright (c) 2010-2020 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of
 * the License, or (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with Smile.  If not, see .
 ******************************************************************************/

package smile.stat;

import java.util.Arrays;
import smile.math.MathEx;
import smile.util.IntSet;

/**
 * Random sampling Sampling is the selection of a subset of individuals
 * from within a statistical population to estimate characteristics of
 * the whole population.
 *
 * @author Haifeng Li
 */
public interface Sampling {
    /**
     * Random sampling. All samples have an equal probability of being selected.
     *
     * @param n the size of samples.
     * @param subsample sampling rate. Draw samples with replacement if it is 1.0.
     */
    static int[] random(int n, double subsample) {
        if (subsample == 1.0) {
            // draw with replacement.
            int[] samples = new int[n];
            for (int i = 0; i < n; i++) {
                samples[i] = MathEx.randomInt(n);
            }
            return samples;
        } else {
            // draw without replacement.
            int size = (int) Math.round(subsample * n);
            int[] samples = MathEx.permutate(n);
            return Arrays.copyOf(samples, size);
        }
    }

    /**
     * Stratified sampling. When the population embraces a number of
     * distinct categories, the frame can be organized by these categories
     * into separate strata. Each stratum is then sampled as an independent
     * sub-population, out of which individual elements can be randomly selected.
     *
     * @param category the strata labels.
     * @param subsample sampling rate. Draw samples with replacement if it is 1.0.
     */
    static int[] strateified(int[] category, double subsample) {
        int[] unique = MathEx.unique(category);
        int m = unique.length;

        Arrays.sort(unique);
        IntSet encoder = new IntSet(unique);

        int n = category.length;
        int[] y = category;
        if (unique[0] != 0 || unique[m-1] != m-1) {
            y = new int[n];
            for (int i = 0; i < n; i++) {
                y[i] = encoder.indexOf(category[i]);
            }
        }

        // # of samples in each strata
        int[] ni = new int[m];
        for (int i : y) ni[i]++;

        int[][] strata = new int[m][];
        for (int i = 0; i < m; i++) {
            strata[i] = new int[ni[i]];
        }

        int[] pos = new int[m];
        for (int i = 0; i < n; i++) {
            int j =  y[i];
            strata[j][pos[j]++] = i;
        }

        if (subsample == 1.0) {
            // draw with replacement.
            int[] samples = new int[n];
            int l = 0;
            for (int i = 0; i < m; i++) {
                int[] stratum = strata[i];
                int size = ni[i];
                for (int j = 0; j < size; j++) {
                    samples[l++] = stratum[MathEx.randomInt(size)];
                }
            }
            return samples;
        } else {
            // draw without replacement.
            int size = 0;
            for (int i = 0; i < m; i++) {
                size += (int) Math.round(subsample * ni[i]);
            }

            int[] samples = new int[size];
            int l = 0;
            for (int i = 0; i < m; i++) {
                int sub = (int) Math.round(subsample * ni[i]);
                int[] stratum = strata[i];
                int[] permutation = MathEx.permutate(ni[i]);
                for (int j = 0; j < sub; j++) {
                    samples[l++] = stratum[permutation[j]];
                }
            }
            return samples;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy