![JAR search and dependency download from the Maven repository](/logo.png)
edu.berkeley.nlp.util.Histogram Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of berkeleyparser Show documentation
Show all versions of berkeleyparser Show documentation
The Berkeley parser analyzes the grammatical structure of natural language using probabilistic context-free grammars (PCFGs).
The newest version!
package edu.berkeley.nlp.util;
import java.io.BufferedWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.io.StringWriter;
import java.util.ArrayList;
import java.util.List;
/**
* A simple histogram class. It can be used to accumulate a histogram and
* calculate statistical information about it.
*
* @author Simon George
* @version 1.0 31 Aug 2001
*
* Extended by John DeNero
*/
public class Histogram {
private static final long serialVersionUID = 1L;
private static final int DEFAULT_NUM_BINS = 10;
private static int currentNumBins = DEFAULT_NUM_BINS;
private boolean binsHaveBeenSet;
private List data;
public Histogram() {
this("Histogram");
}
public Histogram(String title) {
this.title = title;
data = new ArrayList();
}
public static Histogram histogramOfCounts(Counter counter) {
Histogram h = new Histogram();
for (T o : counter.keySet()) {
h.add(counter.getCount(o));
}
return h;
}
public static Histogram histogramOfValues(Counter counter) {
Histogram h = new Histogram();
for (Double d : counter.keySet()) {
double count = counter.getCount(d);
for(int i = 0; i < count; i++) {
h.add(d);
}
}
return h;
}
public void add(double value) {
data.add(value);
}
/**
* Enter data into the histogram. The fill method takes the given value, works
* out which bin this corresponds to, and increments this bin by one.
*
* @param x
* is the value to add in to the histogram
*/
private void fill(double x) {
// use findBin method to work out which bin x falls in
BinInfo bin = findBin(x);
// check the result of findBin in case it was an overflow or underflow
if (bin.isUnderflow) {
m_underflow++;
}
if (bin.isOverflow) {
m_overflow++;
}
if (bin.isInRange) {
m_hist[bin.index]++;
}
// count the number of entries made by the fill method
m_entries++;
}
private class BinInfo {
public int index;
public boolean isUnderflow;
public boolean isOverflow;
public boolean isInRange;
}
/**
* Private internal utility method to figure out which bin of the histogram a
* number falls in.
*
* @return info on which bin x falls in.
*/
private BinInfo findBin(double x) {
BinInfo bin = new BinInfo();
bin.isInRange = false;
bin.isUnderflow = false;
bin.isOverflow = false;
// first check if x is outside the range of the normal histogram bins
if (x < minValue) {
bin.isUnderflow = true;
} else if (x > maxValue) {
bin.isOverflow = true;
} else {
bin.isInRange = true;
for (int i = 0; i < numBins; i++) {
if (x < binUpperBounds[i]) {
bin.index = i;
break;
}
}
if (x == maxValue) {
bin.index = numBins - 1;
}
}
return bin;
}
/**
* Save the histogram data to a file. The file format is very simple,
* human-readable text so it can be imported into Excel or cut & pasted into
* other applications.
*
* @param fileName
* name of the file to write the histogram to. Note this must be
* valid for your operating system, e.g. a unix filename might not
* work under windows
* @exception IOException
* if file cannot be opened or written to.
*/
public void write(PrintWriter outfile) {
setBuckets();
fillHistogram();
writeToPrintWriter(outfile);
}
private void writeToPrintWriter(PrintWriter outfile) {
outfile.println(title);
outfile.println("Bins:\t" + numBins);
outfile.println("Min:\t" + minValue);
outfile.println("Max:\t" + maxValue);
outfile.println("Entries:\t" + m_entries);
if (m_overflow > 0) {
outfile.println("Over:\t" + m_overflow);
}
if (m_underflow > 0) {
outfile.println("Under:\t" + m_underflow);
}
for (int i = 0; i < numBins; i++) {
String l = String.format("%.2f", binLowerBounds[i]);
String u = String.format("%.2f", binUpperBounds[i]);
outfile.print("[" + l + ", " + u);
if (numBins - 1 != i) {
outfile.print(")");
} else {
outfile.print("]");
}
outfile.println(":\t" + m_hist[i]);
}
outfile.close();
}
public String toString() {
setBuckets();
fillHistogram();
StringWriter s = new StringWriter();
PrintWriter pw = new PrintWriter(new BufferedWriter(s));
writeToPrintWriter(pw);
return s.getBuffer().toString();
}
private void fillHistogram() {
m_entries = 0;
m_overflow = 0;
m_underflow = 0;
m_hist = new int[numBins];
for (double d : data) {
fill(d);
}
}
private void setBuckets() {
setBuckets(currentNumBins);
}
private void setBuckets(int numBins) {
if (!binsHaveBeenSet) {
setBuckets(numBins, getMin(), getMax());
binsHaveBeenSet = false;
}
}
public void setBuckets(int numBins, double min, double max) {
double[] lowers = new double[numBins];
double step = (max - min) / (numBins);
for (int i = 0; i < numBins; i++) {
lowers[i] = min + i * step;
}
setBuckets(lowers, min, max);
}
private void setBuckets(double[] lowers) {
setBuckets(lowers, lowers[0], Double.POSITIVE_INFINITY);
}
public void setBuckets(double[] binLowerBounds, double min, double max) {
numBins = binLowerBounds.length;
this.binLowerBounds = binLowerBounds;
assert (min == binLowerBounds[0]);
minValue = min;
maxValue = max;
updateBinUpperBounds();
binsHaveBeenSet = true;
}
private void updateBinUpperBounds() {
binUpperBounds = new double[numBins];
for (int i = 0; i < numBins - 1; i++) {
binUpperBounds[i] = binLowerBounds[i + 1];
}
binUpperBounds[numBins - 1] = maxValue;
}
public double getMax() {
double max = Double.NEGATIVE_INFINITY;
for (double d : data) {
max = Math.max(max, d);
}
return max;
}
public double getMin() {
double min = Double.POSITIVE_INFINITY;
for (double d : data) {
min = Math.min(min, d);
}
return min;
}
// private data used internally by this class.
private int[] m_hist;
private String title;
private double minValue;
private double maxValue;
private int numBins;
private double[] binLowerBounds, binUpperBounds;
private int m_entries;
private double m_overflow;
private double m_underflow;
public static void main(String[] args) {
Histogram h = new Histogram();
for (double i = 1; i < 43400; i *= 1.2) {
h.add((double) i);
}
System.out.println(h);
double[] lowers = new double[3];
lowers[0] = 0;
lowers[1] = 100;
lowers[2] = 1000;
h.setBuckets(lowers);
System.out.println(h);
h.setLogBuckets(10);
System.out.println(h);
}
public void setLogBuckets(int numBuckets) {
setLogBuckets(numBuckets, getMin(), getMax());
}
public void setLogBuckets(int numBuckets, double min, double max) {
double step = Math.pow(max - min + 1, 1.0 / numBuckets);
double[] lowers = new double[numBuckets];
for (int i = 0; i < numBuckets; i++) {
lowers[i] = (min - 1) + Math.pow(step, i);
}
setBuckets(lowers, min, max);
}
public void setTitle(String t) {
title = t;
}
public static int getNumBins() {
return currentNumBins;
}
public static void setNumBins(int currentNumBins) {
Histogram.currentNumBins = currentNumBins;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy