All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.citec.scie.util.Histogramm Maven / Gradle / Ivy

Go to download

Contains the SCIE main application and the CLI interface. This project integrates the named entity recognition (NER), the PDF import and the classification and interfaces with the UIMA framework. The command line interface can be used to produce a set of UIMA XCAS files.

There is a newer version: 2.0.1
Show newest version
/*
 * SCIE -- Spinal Cord Injury Information Extraction
 * Copyright (C) 2013, 2014
 * Raphael Dickfelder, Jan Göpfert, Benjamin Paaßen, Andreas Stöckel
 * 
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 * 
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see .
 */

package de.citec.scie.util;

import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;

/**
 * A convenience implementation for histogramms.
 *
 * @author Benjamin Paassen - [email protected]
 */
public class Histogramm {

	private final HashMap backingMap = new HashMap<>();

	public Histogramm() {
	}

	public void addAll(final Histogramm otherHisto) {
		for (final Entry otherEntry : otherHisto.backingMap.entrySet()) {
			int newValue = otherEntry.getValue();
			final Integer thisValue = backingMap.get(otherEntry.getKey());
			if (thisValue != null) {
				newValue += thisValue;
			}
			backingMap.put(otherEntry.getKey(), newValue);
		}
	}

	public void addDataPoint(final H datapoint) {
		final int currentNum = getNumber(datapoint);
		backingMap.put(datapoint, currentNum + 1);
	}

	public int getNumber(final H datapoint) {
		final Integer currentNum = backingMap.get(datapoint);
		if (currentNum == null) {
			return 0;
		}
		return currentNum;
	}

	/**
	 * This only works if the given class type is a number. It returns the
	 * average key in this histogramm according to the histogramm, meaning:
	 *
	 * avg := Sum_{(h,n) in Histogramm} h*n / Sum_{(h,n) in Histogramm} n
	 *
	 * Please note that this does a cast to double. Therefore numeric precision
	 * is not garuanteed for BigIntegers and similar classes.
	 *
	 * @return the average key in this histogramm according to the histogramm,
	 * meaning:
	 *
	 * avg := Sum_{(h,n) in Histogramm} h*n / Sum_{(h,n) in Histogramm} n
	 */
	public double getAverage() {
		if (backingMap.isEmpty()) {
			return 0;
		}
		final H firstKey = backingMap.keySet().iterator().next();
		if (!(firstKey instanceof Number)) {
			throw new RuntimeException(
					"Attempted to calculate average for non-numeric class!");
		}
		double weightedSum = 0;
		int normalization = 0;
		for (final Entry mapEntry : backingMap.entrySet()) {
			final Number numKey = (Number) mapEntry.getKey();
			final double doubleKey = numKey.doubleValue();
			final int occs = mapEntry.getValue();
			weightedSum += occs * doubleKey;
			normalization += occs;
		}
		return weightedSum / normalization;
	}

	public Map getBackingMap() {
		return backingMap;
	}

	/**
	 * Returns the element that was counted the most.
	 *
	 * @return the element that was counted the most.
	 */
	public H getMaxElement() {
		int currentMax = 0;
		H currentMaxElem = null;
		for (final Entry mapEntry : backingMap.entrySet()) {
			if (mapEntry.getValue() > currentMax) {
				currentMaxElem = mapEntry.getKey();
				currentMax = mapEntry.getValue();
			}
		}
		return currentMaxElem;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy