All Downloads are FREE. Search and download functionalities are using the official Maven repository.

smile.plot.swing.Histogram Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
 *
 * Smile is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * Smile is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Smile.  If not, see .
 */

package smile.plot.swing;

import java.awt.Color;

/**
 * A histogram is a graphical display of tabulated frequencies, shown as bars.
 * It shows what proportion of cases fall into each of several categories:
 * it is a form of data binning. The categories are usually specified as
 * non-overlapping intervals of some variable. The categories (bars) must
 * be adjacent. The intervals (or bands, or bins) are generally of the same
 * size, and are most easily interpreted if they are.
 * 
 * @author Haifeng Li
 */
public class Histogram {
    /**
     * Creates a histogram plot.
     * The number of bins will be determined by square-root rule
     * and the y-axis will be in the probability scale.
     * @param data a sample set.
     */
    public static BarPlot of(int[] data) {
        return of(data, smile.math.Histogram.bins(data.length), true);
    }

    /**
     * Creates a histogram plot.
     * @param data a sample set.
     * @param k the number of bins.
     * @param prob if true, the y-axis will be in the probability scale.
     * Otherwise, y-axis will be in the frequency scale.
     */
    public static BarPlot of(int[] data, int k, boolean prob) {
        return of(data, k, prob, Color.BLUE);
    }

    /**
     * Creates a histogram plot.
     * @param data a sample set.
     * @param k the number of bins.
     * @param prob if true, the y-axis will be in the probability scale.
     * Otherwise, y-axis will be in the frequency scale.
     */
    public static BarPlot of(int[] data, int k, boolean prob, Color color) {
        double[][] hist = smile.math.Histogram.of(data, k);

        // The number of bins may be extended to cover all data.
        k = hist[0].length;
        double[][] freq = new double[k][2];
        for (int i = 0; i < k; i++) {
            freq[i][0] = (hist[0][i] + hist[1][i]) / 2.0;
            freq[i][1] = hist[2][i];
        }

        if (prob) {
            double n = data.length;
            for (int i = 0; i < k; i++) {
                freq[i][1] /= n;
            }
        }

        return new BarPlot(new Bar(freq, width(freq), color));
    }

    /**
     * Creates a histogram plot.
     * @param data a sample set.
     * @param breaks an array of size k+1 giving the breakpoints between
     * histogram cells. Must be in ascending order.
     * @param prob if true, the y-axis will be in the probability scale.
     * Otherwise, y-axis will be in the frequency scale.
     */
    public static BarPlot of(int[] data, double[] breaks, boolean prob) {
        return of(data, breaks, prob, Color.BLUE);
    }

    /**
     * Creates a histogram plot.
     * @param data a sample set.
     * @param breaks an array of size k+1 giving the breakpoints between
     * histogram cells. Must be in ascending order.
     * @param prob if true, the y-axis will be in the probability scale.
     * Otherwise, y-axis will be in the frequency scale.
     */
    public static BarPlot of(int[] data, double[] breaks, boolean prob, Color color) {
        int k = breaks.length - 1;
        if (k <= 1) {
            throw new IllegalArgumentException("Invalid number of bins: " + k);
        }

        double[][] hist = smile.math.Histogram.of(data, breaks);

        double[][] freq = new double[k][2];
        for (int i = 0; i < k; i++) {
            freq[i][0] = (hist[0][i] + hist[1][i]) / 2.0;
            freq[i][1] = hist[2][i];
        }

        if (prob) {
            double n = data.length;
            for (int i = 0; i < k; i++) {
                freq[i][1] /= n;
            }
        }

        return new BarPlot(new Bar(freq, width(freq), color));
    }

    /**
     * Creates a histogram plot.
     * The number of bins will be determined by square-root rule
     * and the y-axis will be in the probability scale.
     * @param data a sample set.
     */
    public static BarPlot of(double[] data) {
        return of(data, smile.math.Histogram.bins(data.length), true, Color.BLUE);
    }

    /**
     * Creates a histogram plot.
     * @param data a sample set.
     * @param k the number of bins.
     * @param prob if true, the y-axis will be in the probability scale.
     * Otherwise, y-axis will be in the frequency scale.
     */
    public static BarPlot of(double[] data, int k, boolean prob) {
        return of(data, k, prob, Color.BLUE);
    }

    /**
     * Creates a histogram plot.
     * @param data a sample set.
     * @param k the number of bins.
     * @param prob if true, the y-axis will be in the probability scale.
     * Otherwise, y-axis will be in the frequency scale.
     */
    public static BarPlot of(double[] data, int k, boolean prob, Color color) {
        double[][] hist = smile.math.Histogram.of(data, k);

        // The number of bins may be extended to cover all data.
        k = hist[0].length;
        double[][] freq = new double[k][2];
        for (int i = 0; i < k; i++) {
            freq[i][0] = (hist[0][i] + hist[1][i]) / 2.0;
            freq[i][1] = hist[2][i];
        }

        if (prob) {
            double n = data.length;
            for (int i = 0; i < k; i++) {
                freq[i][1] /= n;
            }
        }

        return new BarPlot(new Bar(freq, width(freq), color));
    }

    /**
     * Creates a histogram plot.
     * @param data a sample set.
     * @param breaks an array of size k+1 giving the breakpoints between
     * histogram cells. Must be in ascending order.
     * @param prob if true, the y-axis will be in the probability scale.
     * Otherwise, y-axis will be in the frequency scale.
     */
    public static BarPlot of(double[] data, double[] breaks, boolean prob) {
        return of(data, breaks, prob, Color.BLUE);
    }

    /**
     * Creates a histogram plot.
     * @param data a sample set.
     * @param breaks an array of size k+1 giving the breakpoints between
     * histogram cells. Must be in ascending order.
     * @param prob if true, the y-axis will be in the probability scale.
     * Otherwise, y-axis will be in the frequency scale.
     */
    public static BarPlot of(double[] data, double[] breaks, boolean prob, Color color) {
        int k = breaks.length - 1;
        if (k <= 1) {
            throw new IllegalArgumentException("Invalid number of bins: " + k);
        }

        double[][] hist = smile.math.Histogram.of(data, breaks);

        double[][] freq = new double[k][2];
        for (int i = 0; i < k; i++) {
            freq[i][0] = (hist[0][i] + hist[1][i]) / 2.0;
            freq[i][1] = hist[2][i];
        }

        if (prob) {
            double n = data.length;
            for (int i = 0; i < k; i++) {
                freq[i][1] /= n;
            }
        }

        return new BarPlot(new Bar(freq, width(freq), color));
    }

    /** Calculates the width of bins. */
    private static double width(double[][] freq) {
        double width = Double.MAX_VALUE;
        for (int i = 1; i < freq.length; i++) {
            double w = Math.abs(freq[i][0] - freq[i - 1][0]);
            if (width > w) width = w;
        }
        return width;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy