gov.sandia.cognition.learning.function.summarizer.MostFrequentSummarizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cognitive-foundry Show documentation
Show all versions of cognitive-foundry Show documentation
A single jar with all the Cognitive Foundry components.
/*
* File: MostFrequentSummarizer.java
* Authors: Justin Basilico
* Company: Sandia National Laboratories
* Project: Cognitive Foundry
*
* Copyright April 07, 2009, Sandia Corporation.
* Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
* license for use of this work by or on behalf of the U.S. Government. Export
* of this program may require a license from the United States Government.
* See CopyrightHistory.txt for complete details.
*
*/
package gov.sandia.cognition.learning.function.summarizer;
import gov.sandia.cognition.util.AbstractCloneableSerializable;
import gov.sandia.cognition.util.Summarizer;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.Map;
/**
* Summarizes a set of values by returning the most frequent value. If there
* is a tie, the first encountered value is returned.
*
* @param Type of Data to summarize.
* @author Justin Basilico
* @since 3.0
*/
public class MostFrequentSummarizer
extends AbstractCloneableSerializable
implements Summarizer
{
/**
* Creates a new {@code MostFrequentSummarizer}.
*/
public MostFrequentSummarizer()
{
super();
}
/**
* Summarizes the given data by returning the most frequent value. If there
* are multiple values at the same (maximum) frequency, then the first one
* is used.
*
* @param data
* The data to summarize.
* @return
* The most frequent value.
*/
public DataType summarize(
final Collection extends DataType> data)
{
// TODO: There is probably a more efficient data structure for doing this. Maybe
// a heap combined with a hash map.
// Create the map of counts.
final LinkedHashMap counts =
new LinkedHashMap();
for (DataType item : data)
{
Integer count = counts.get(item);
if (count == null)
{
count = 0;
}
counts.put(item, count + 1);
}
// Find the one with the best count.
int bestCount = 0;
DataType best = null;
for (Map.Entry entry : counts.entrySet())
{
final int count = entry.getValue();
if (count > bestCount)
{
bestCount = count;
best = entry.getKey();
}
}
// Return the best value found.
return best;
}
}