All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.sandia.cognition.learning.algorithm.baseline.MostFrequentLearner Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/*
 * File:                MostFrequentLearner.java
 * Authors:             Justin Basilico
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 * 
 * Copyright April 17, 2008, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive 
 * license for use of this work by or on behalf of the U.S. Government. Export 
 * of this program may require a license from the United States Government. 
 * See CopyrightHistory.txt for complete details.
 * 
 */

package gov.sandia.cognition.learning.algorithm.baseline;

import gov.sandia.cognition.annotation.CodeReview;
import gov.sandia.cognition.learning.algorithm.SupervisedBatchLearner;
import gov.sandia.cognition.learning.data.InputOutputPair;
import gov.sandia.cognition.learning.function.ConstantEvaluator;
import gov.sandia.cognition.statistics.distribution.DefaultDataDistribution;
import gov.sandia.cognition.util.AbstractCloneableSerializable;
import java.util.Collection;

/**
 * The {@code MostFrequentLearner} class implements a baseline learner that
 * computes the most frequent output value.
 * 
 * @param    The type of the output.
 * @author  Justin Basilico
 * @since   2.1
 */
@CodeReview(
    reviewer="Kevin R. Dixon",
    date="2008-07-22",
    changesNeeded=false,
    comments={
        "Fixed a few typos in javadoc.",
        "Removed implements Serializeable, as BatchLearner already does that.",
        "I don't particularly like this class... I just don't think it's useful.",
        "However, the code looks fine."
    }
)
public class MostFrequentLearner
    extends AbstractCloneableSerializable
    implements SupervisedBatchLearner>
{
    /**
     * Creates a new {@code MostFrequentLearner}.
     */
    public MostFrequentLearner()
    {
        super();
    }

    /**
     * Creates a constant evaluator that for the most frequent output value of
     * the given dataset.
     * 
     * @param   data The dataset of input-output pairs to use.
     * @return  A constant evaluator for the most frequent output value.
     */
    @Override
    public ConstantEvaluator learn(
        final Collection> data )
    {
        // We are going to count up how many times each value occurs.
        final DefaultDataDistribution counts =
            new DefaultDataDistribution();
        
        for (InputOutputPair example : data)
        {
            counts.increment(example.getOutput());
        }
        
        // Create the resulting evaluator.
        final ConstantEvaluator result = 
            new ConstantEvaluator();
        if (counts.getTotal() > 0)
        {
            result.setValue(counts.getMaxValueKey());
        }
        // else - There is no most frequent output value.
        return result;
    }
    
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy