![JAR search and dependency download from the Maven repository](/logo.png)
smile.plot.swing.BoxPlot Maven / Gradle / Ivy
/*
* Copyright (c) 2010-2021 Haifeng Li. All rights reserved.
*
* Smile is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Smile is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Smile. If not, see .
*/
package smile.plot.swing;
import java.util.Arrays;
import java.util.Optional;
import smile.math.MathEx;
/**
* A boxplot is a convenient way of graphically depicting groups of numerical
* data through their five-number summaries (the smallest observation
* (sample minimum), lower quartile (Q1), median (Q2), upper quartile (Q3),
* and largest observation (sample maximum). A boxplot may also indicate
* which observations, if any, might be considered outliers.
*
* Boxplots can be useful to display differences between populations without
* making any assumptions of the underlying statistical distribution: they are
* non-parametric. The spacings between the different parts of the box help
* indicate the degree of dispersion (spread) and skewness in the data, and
* identify outliers.
*
* For a data set, we construct a boxplot in the following manner:
*
* - Calculate the first q1, the median q2 and third
* quartile q3.
*
- Calculate the interquartile range (IQR) by subtracting the first
* quartile from the third quartile. (q3 ? q1)
*
- Construct a box above the number line bounded on the bottom by the first
* quartile (q1) and on the top by the third quartile (q3).
*
- Indicate where the median lies inside of the box with the presence of
* a line dividing the box at the median value.
*
- Any data observation which lies more than 1.5*IQR lower than the first
* quartile or 1.5IQR higher than the third quartile is considered an outlier.
* Indicate where the smallest value that is not an outlier is by connecting it
* to the box with a horizontal line or "whisker". Optionally, also mark the
* position of this value more clearly using a small vertical line. Likewise,
* connect the largest value that is not an outlier to the box by a "whisker"
* (and optionally mark it with another small vertical line).
*
- Indicate outliers by dots.
*
*
* @author Haifeng Li
*/
public class BoxPlot extends Plot {
/**
* Tooltip format string.
*/
private static String format = "Median %g Q1 %g Q3 %g
";
/**
* The input data. Each row is a variable.
*/
private double[][] data;
/**
* The label of each variable.
*/
private String[] labels;
/**
* The quantiles of data.
*/
private double[][] quantiles;
/**
* Constructor.
* @param data the input dataset of which each row is a set of samples
* and will have a corresponding box plot.
*/
public BoxPlot(double[][] data, String[] labels) {
if (labels != null && labels.length != data.length) {
throw new IllegalArgumentException("Data size and label size don't match.");
}
this.data = data;
this.labels = labels;
// Calculate quantiles.
quantiles = new double[data.length][8];
for (int i = 0; i < data.length; i++) {
int n = data[i].length;
Arrays.sort(data[i]);
quantiles[i][1] = data[i][n / 4];
quantiles[i][2] = data[i][n / 2];
quantiles[i][3] = data[i][3 * n / 4];
quantiles[i][5] = quantiles[i][3] - quantiles[i][1]; // interquartile range
quantiles[i][6] = quantiles[i][1] - 1.5 * quantiles[i][5];
quantiles[i][7] = quantiles[i][3] + 1.5 * quantiles[i][5];
quantiles[i][0] = quantiles[i][6] < data[i][0] ? data[i][0] : quantiles[i][6];
quantiles[i][4] = quantiles[i][7] > data[i][data[i].length - 1] ? data[i][data[i].length - 1] : quantiles[i][7];
}
}
@Override
public Optional tooltip(double[] coord) {
String tooltip = null;
for (int i = 0; i < data.length; i++) {
if (coord[0] < i + 0.8 && coord[0] > i + 0.2 && coord[1] < quantiles[i][3] && coord[1] > quantiles[i][1]) {
tooltip = String.format(format, quantiles[i][2], quantiles[i][1], quantiles[i][3]);
break;
}
}
return Optional.ofNullable(tooltip);
}
@Override
public double[] getLowerBound() {
double[] bound = {0, MathEx.min(data)};
return bound;
}
@Override
public double[] getUpperBound() {
double[] bound = {data.length, MathEx.max(data)};
return bound;
}
@Override
public void paint(Graphics g) {
g.setColor(color);
double[] start = new double[2];
double[] end = new double[2];
for (int i = 0; i < data.length; i++) {
start[0] = i + 0.4;
start[1] = quantiles[i][0];
end[0] = i + 0.6;
end[1] = quantiles[i][0];
g.drawLine(start, end);
start[0] = i + 0.4;
start[1] = quantiles[i][4];
end[0] = i + 0.6;
end[1] = quantiles[i][4];
g.drawLine(start, end);
start[0] = i + 0.2;
start[1] = quantiles[i][2];
end[0] = i + 0.8;
end[1] = quantiles[i][2];
g.drawLine(start, end);
start[0] = i + 0.5;
start[1] = quantiles[i][0];
end[0] = i + 0.5;
end[1] = quantiles[i][1];
g.drawLine(start, end);
start[0] = i + 0.5;
start[1] = quantiles[i][4];
end[0] = i + 0.5;
end[1] = quantiles[i][3];
g.drawLine(start, end);
start[0] = i + 0.2;
start[1] = quantiles[i][3];
end[0] = i + 0.8;
end[1] = quantiles[i][1];
g.drawRect(start, end);
start[0] = i + 0.5;
for (int j = 0; j < data[i].length; j++) {
if (data[i][j] < quantiles[i][6] || data[i][j] > quantiles[i][7]) {
start[1] = data[i][j];
g.drawPoint('o', start);
}
}
}
}
@Override
public Canvas canvas() {
double[] lowerBound = getLowerBound();
double[] upperBound = getUpperBound();
Canvas canvas = new Canvas(lowerBound, upperBound);
canvas.add(this);
canvas.getAxis(0).setGridVisible(false);
if (labels != null) {
int k = labels.length;
double[] locations = new double[k];
for (int i = 0; i < k; i++) {
locations[i] = i + 0.5;
}
canvas.getAxis(0).setTicks(labels, locations);
if (k > 10) {
canvas.getAxis(0).setRotation(-Math.PI / 2);
}
} else {
canvas.getAxis(0).setTickVisible(false);
}
return canvas;
}
/**
* Create a plot canvas with multiple box plots of given data.
* @param data a data matrix of which each row will create a box plot.
*/
public static BoxPlot of(double[]... data) {
return new BoxPlot(data, null);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy