All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.sandia.cognition.learning.function.summarizer.MostFrequentSummarizer Maven / Gradle / Ivy

/*
 * File:                MostFrequentSummarizer.java
 * Authors:             Justin Basilico
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 * 
 * Copyright April 07, 2009, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive 
 * license for use of this work by or on behalf of the U.S. Government. Export 
 * of this program may require a license from the United States Government. 
 * See CopyrightHistory.txt for complete details.
 * 
 */

package gov.sandia.cognition.learning.function.summarizer;

import gov.sandia.cognition.util.AbstractCloneableSerializable;
import gov.sandia.cognition.util.Summarizer;
import java.util.Collection;
import java.util.LinkedHashMap;
import java.util.Map;

/**
 * Summarizes a set of values by returning the most frequent value. If there
 * is a tie, the first encountered value is returned.
 * 
 * @param  Type of Data to summarize.
 * @author  Justin Basilico
 * @since   3.0
 */
public class MostFrequentSummarizer
    extends AbstractCloneableSerializable
    implements Summarizer
{

    /**
     * Creates a new {@code MostFrequentSummarizer}.
     */
    public MostFrequentSummarizer()
    {
        super();
    }

    /**
     * Summarizes the given data by returning the most frequent value. If there
     * are multiple values at the same (maximum) frequency, then the first one
     * is used.
     * 
     * @param   data
     *      The data to summarize.
     * @return
     *      The most frequent value.
     */
    public DataType summarize(
        final Collection data)
    {
// TODO: There is probably a more efficient data structure for doing this. Maybe
// a heap combined with a hash map.
        // Create the map of counts.
        final LinkedHashMap counts =
            new LinkedHashMap();
        for (DataType item : data)
        {
            Integer count = counts.get(item);
            if (count == null)
            {
                count = 0;
            }

            counts.put(item, count + 1);
        }

        // Find the one with the best count.
        int bestCount = 0;
        DataType best = null;
        for (Map.Entry entry : counts.entrySet())
        {
            final int count = entry.getValue();
            if (count > bestCount)
            {
                bestCount = count;
                best = entry.getKey();
            }
        }

        // Return the best value found.
        return best;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy