de.citec.scie.pdf.Histogramm Maven / Gradle / Ivy
/*
* SCIE -- Spinal Cord Injury Information Extraction
* Copyright (C) 2013, 2014
* Raphael Dickfelder, Jan Göpfert, Benjamin Paaßen, Andreas Stöckel
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
package de.citec.scie.pdf;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
/**
* A convenience implementation for histogramms.
*
* @author Benjamin Paassen - [email protected]
* @param the space the histogram is build over. This may be arbitrary
* objects. Note that the bins must be created by yourself. This class just
* counts. Also note that this uses a HashMap in the backend, so datapoints/bins
* that are equal according to their equals method will be mapped to the same
* bin.
*/
public class Histogramm {
private final HashMap backingMap = new HashMap<>();
public Histogramm() {
}
/**
* Adds all values from another histogramm.
*
* @param otherHisto another histogramm.
*/
public void addAll(final Histogramm otherHisto) {
for (final Entry otherEntry : otherHisto.backingMap.entrySet()) {
int newValue = otherEntry.getValue();
final Integer thisValue = backingMap.get(otherEntry.getKey());
if (thisValue != null) {
newValue += thisValue;
}
backingMap.put(otherEntry.getKey(), newValue);
}
}
/**
* Add a new datapoint/bin (or override an old one).
*
* @param datapoint a new datapoint.
* @return the old value stored for that datapoint or null if no data
* existed before.
*/
public Integer addDataPoint(final H datapoint) {
final int currentNum = getNumber(datapoint);
return backingMap.put(datapoint, currentNum + 1);
}
/**
* Returns the current count for a given datapoint/bin.
*
* @param datapoint a datapoint.
* @return the current count for that datapoint.
*/
public int getNumber(final H datapoint) {
final Integer currentNum = backingMap.get(datapoint);
if (currentNum == null) {
return 0;
}
return currentNum;
}
/**
* This only works if the given class type is a number. It returns the
* average key in this histogramm according to the histogramm, meaning:
*
* avg := Sum_{(h,n) in Histogramm} h*n / Sum_{(h,n) in Histogramm} n
*
* Please note that this does a cast to double. Therefore numeric precision
* is not garuanteed for BigIntegers and similar classes.
*
* @return the average key in this histogramm according to the histogramm,
* meaning:
*
* avg := Sum_{(h,n) in Histogramm} h*n / Sum_{(h,n) in Histogramm} n
*/
public double getAverage() {
if (backingMap.isEmpty()) {
return 0;
}
final H firstKey = backingMap.keySet().iterator().next();
if (!(firstKey instanceof Number)) {
throw new RuntimeException(
"Attempted to calculate average for non-numeric class!");
}
double weightedSum = 0;
int normalization = 0;
for (final Entry mapEntry : backingMap.entrySet()) {
final Number numKey = (Number) mapEntry.getKey();
final double doubleKey = numKey.doubleValue();
final int occs = mapEntry.getValue();
weightedSum += occs * doubleKey;
normalization += occs;
}
return weightedSum / normalization;
}
/**
* Returns the backing HashMap. Usually you won't need this.
*
* @return the backing HashMap.
*/
public Map getBackingMap() {
return backingMap;
}
/**
* Returns the element that was counted the most.
*
* @return the element that was counted the most.
*/
public H getMaxElement() {
int currentMax = 0;
H currentMaxElem = null;
for (final Entry mapEntry : backingMap.entrySet()) {
if (mapEntry.getValue() > currentMax) {
currentMaxElem = mapEntry.getKey();
currentMax = mapEntry.getValue();
}
}
return currentMaxElem;
}
}