All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cern.jet.stat.tdouble.quantile.DoubleEquiDepthHistogram Maven / Gradle / Ivy

Go to download

Parallel Colt is a multithreaded version of Colt - a library for high performance scientific computing in Java. It contains efficient algorithms for data analysis, linear algebra, multi-dimensional arrays, Fourier transforms, statistics and histogramming.

The newest version!
/*
Copyright (C) 1999 CERN - European Organization for Nuclear Research.
Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
is hereby granted without fee, provided that the above copyright notice appear in all copies and 
that both that copyright notice and this permission notice appear in supporting documentation. 
CERN makes no representations about the suitability of this software for any purpose. 
It is provided "as is" without expressed or implied warranty.
 */
package cern.jet.stat.tdouble.quantile;

/**
 * Read-only equi-depth histogram for selectivity estimation. Assume you have
 * collected statistics over a data set, among them a one-dimensional equi-depth
 * histogram (quantiles). Then an applications or DBMS might want to estimate
 * the selectivity of some range query [from,to], i.e. the
 * percentage of data set elements contained in the query range. This class does
 * not collect equi-depth histograms but only space efficiently stores already
 * produced histograms and provides operations for selectivity estimation. Uses
 * linear interpolation.
 * 

* This class stores a list l of double values for which * holds: *

  • Let v be a list of values (sorted ascending) an equi-depth * histogram has been computed over.
  • *
  • Let s=l.length.
  • *
  • Let p=(0, 1/s-1), 2/s-1,..., s-1/s-1=1.0) be a list of the * s percentages.
  • *
  • Then for each * i=0..s-1: l[i] = e : v.contains(e) && v[0],..., v[p[i]*v.length] <= e * .
  • *
  • (In particular: l[0]=min(v)=v[0] and * l[s-1]=max(v)=v[s-1].)
  • * * @author [email protected] * @version 1.0, 09/24/99 */ public class DoubleEquiDepthHistogram extends cern.colt.PersistentObject { /** * */ private static final long serialVersionUID = 1L; protected double[] binBoundaries; /** * Constructs an equi-depth histogram with the given quantile elements. * Quantile elements must be sorted ascending and have the form specified in * the class documentation. */ public DoubleEquiDepthHistogram(double[] quantileElements) { this.binBoundaries = quantileElements; } /** * Returns the bin index of the given element. In other words, returns a * handle to the range the element falls into. * * @param element * the element to search for. * @throws java.lang.IllegalArgumentException * if the element is not contained in any bin. */ public int binOfElement(double element) { int index = java.util.Arrays.binarySearch(binBoundaries, element); if (index >= 0) { // element found. if (index == binBoundaries.length - 1) index--; // last bin is a closed interval. } else { // element not found. index -= -1; // index = -index-1; now index is the insertion // point. if (index == 0 || index == binBoundaries.length) { throw new IllegalArgumentException("Element=" + element + " not contained in any bin."); } index--; } return index; } /** * Returns the number of bins. In other words, returns the number of * subdomains partitioning the entire value domain. */ public int bins() { return binBoundaries.length - 1; } /** * Returns the end of the range associated with the given bin. * * @throws ArrayIndexOutOfBoundsException * if binIndex < 0 || binIndex >= bins(). */ public double endOfBin(int binIndex) { return binBoundaries[binIndex + 1]; } /** * Returns the percentage of elements in the range (from,to]. Does linear * interpolation. * * @param from * the start point (exclusive). * @param to * the end point (inclusive). * @return a number in the closed interval [0.0,1.0]. */ public double percentFromTo(double from, double to) { return phi(to) - phi(from); } /** * Returns how many percent of the elements contained in the receiver are * <= element. Does linear interpolation. * * @param element * the element to search for. * @return a number in the closed interval [0.0,1.0]. */ public double phi(double element) { int size = binBoundaries.length; if (element <= binBoundaries[0]) return 0.0; if (element >= binBoundaries[size - 1]) return 1.0; double binWidth = 1.0 / (size - 1); int index = java.util.Arrays.binarySearch(binBoundaries, element); // int index = new DoubleArrayList(binBoundaries).binarySearch(element); if (index >= 0) { // found return binWidth * index; } // do linear interpolation int insertionPoint = -index - 1; double from = binBoundaries[insertionPoint - 1]; double to = binBoundaries[insertionPoint] - from; double p = (element - from) / to; return binWidth * (p + (insertionPoint - 1)); } /** * @deprecated Deprecated. Returns the number of bin boundaries. */ @Deprecated public int size() { return binBoundaries.length; } /** * Returns the start of the range associated with the given bin. * * @throws ArrayIndexOutOfBoundsException * if binIndex < 0 || binIndex >= bins(). */ public double startOfBin(int binIndex) { return binBoundaries[binIndex]; } /** * Not yet commented. */ public static void test(double element) { double[] quantileElements = { 50.0, 100.0, 200.0, 300.0, 1400.0, 1500.0, 1600.0, 1700.0, 1800.0, 1900.0, 2000.0 }; DoubleEquiDepthHistogram histo = new DoubleEquiDepthHistogram(quantileElements); System.out.println("elem=" + element + ", phi=" + histo.phi(element)); } }




    © 2015 - 2025 Weber Informatics LLC | Privacy Policy