All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.citec.scie.pdf.Histogramm Maven / Gradle / Ivy

/*
 * SCIE -- Spinal Cord Injury Information Extraction
 * Copyright (C) 2013, 2014
 * Raphael Dickfelder, Jan Göpfert, Benjamin Paaßen, Andreas Stöckel
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */
package de.citec.scie.pdf;

import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

/**
 * A convenience implementation for histogramms.
 *
 * @author Benjamin Paassen - [email protected]
 * @param  the space the histogram is build over. This may be arbitrary
 * objects. Note that the bins must be created by yourself. This class just
 * counts. Also note that this uses a HashMap in the backend, so datapoints/bins
 * that are equal according to their equals method will be mapped to the same
 * bin.
 */
public class Histogramm {

	private final HashMap backingMap = new HashMap<>();

	public Histogramm() {
	}

	/**
	 * Adds all values from another histogramm.
	 *
	 * @param otherHisto another histogramm.
	 */
	public void addAll(final Histogramm otherHisto) {
		for (final Entry otherEntry : otherHisto.backingMap.entrySet()) {
			int newValue = otherEntry.getValue();
			final Integer thisValue = backingMap.get(otherEntry.getKey());
			if (thisValue != null) {
				newValue += thisValue;
			}
			backingMap.put(otherEntry.getKey(), newValue);
		}
	}

	/**
	 * Add a new datapoint/bin (or override an old one).
	 *
	 * @param datapoint a new datapoint.
	 * @return the old value stored for that datapoint or null if no data
	 * existed before.
	 */
	public Integer addDataPoint(final H datapoint) {
		final int currentNum = getNumber(datapoint);
		return backingMap.put(datapoint, currentNum + 1);
	}

	/**
	 * Returns the current count for a given datapoint/bin.
	 *
	 * @param datapoint a datapoint.
	 * @return the current count for that datapoint.
	 */
	public int getNumber(final H datapoint) {
		final Integer currentNum = backingMap.get(datapoint);
		if (currentNum == null) {
			return 0;
		}
		return currentNum;
	}

	/**
	 * This only works if the given class type is a number. It returns the
	 * average key in this histogramm according to the histogramm, meaning:
	 *
	 * avg := Sum_{(h,n) in Histogramm} h*n / Sum_{(h,n) in Histogramm} n
	 *
	 * Please note that this does a cast to double. Therefore numeric precision
	 * is not garuanteed for BigIntegers and similar classes.
	 *
	 * @return the average key in this histogramm according to the histogramm,
	 * meaning:
	 *
	 * avg := Sum_{(h,n) in Histogramm} h*n / Sum_{(h,n) in Histogramm} n
	 */
	public double getAverage() {
		if (backingMap.isEmpty()) {
			return 0;
		}
		final H firstKey = backingMap.keySet().iterator().next();
		if (!(firstKey instanceof Number)) {
			throw new RuntimeException(
					"Attempted to calculate average for non-numeric class!");
		}
		double weightedSum = 0;
		int normalization = 0;
		for (final Entry mapEntry : backingMap.entrySet()) {
			final Number numKey = (Number) mapEntry.getKey();
			final double doubleKey = numKey.doubleValue();
			final int occs = mapEntry.getValue();
			weightedSum += occs * doubleKey;
			normalization += occs;
		}
		return weightedSum / normalization;
	}

	/**
	 * Returns the backing HashMap. Usually you won't need this.
	 *
	 * @return the backing HashMap.
	 */
	public Map getBackingMap() {
		return backingMap;
	}

	/**
	 * Returns the element that was counted the most.
	 *
	 * @return the element that was counted the most.
	 */
	public H getMaxElement() {
		int currentMax = 0;
		H currentMaxElem = null;
		for (final Entry mapEntry : backingMap.entrySet()) {
			if (mapEntry.getValue() > currentMax) {
				currentMaxElem = mapEntry.getKey();
				currentMax = mapEntry.getValue();
			}
		}
		return currentMaxElem;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy