All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.sandia.cognition.learning.algorithm.delta.BurrowsDeltaCategorizer Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/*
 * File:                BurrowsDeltaCategorizer.java
 * Authors:             Alex Killian
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 * 
 * Copyright May 23, 2016, Sandia Corporation.
 * Under the terms of Contract DE-AC04-94AL85000, there is a non-exclusive
 * license for use of this work by or on behalf of the U.S. Government. 
 * Export of this program may require a license from the United States
 * Government. See CopyrightHistory.txt for complete details.
 * 
 */
package gov.sandia.cognition.learning.algorithm.delta;

import gov.sandia.cognition.annotation.PublicationReference;
import gov.sandia.cognition.annotation.PublicationReferences;
import gov.sandia.cognition.annotation.PublicationType;
import gov.sandia.cognition.learning.data.DefaultWeightedValueDiscriminant;
import gov.sandia.cognition.learning.data.InputOutputPair;
import gov.sandia.cognition.learning.data.ValueDiscriminantPair;
import gov.sandia.cognition.math.UnivariateStatisticsUtil;
import gov.sandia.cognition.math.matrix.Vector;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

/**
 * The regular Burrows' Delta algorithm implementation.
 * 
 * @author alkilli
 * @param 
 */
@PublicationReferences(
    references={
        @PublicationReference(
            author={
                "John Burrows"
            },
            title="‘Delta’: a Measure of Stylistic Difference and a Guide to Likely Authorship",
            type=PublicationType.Journal,
            year=2002,
            pages={267,287}
        )
    }

)
public class BurrowsDeltaCategorizer
    extends AbstractDeltaCategorizer
{
   
    /**
     * Constructor that takes a learner and featureStddev.
     * 
     * @param learner
     * @param featureStddev 
     */
    protected BurrowsDeltaCategorizer(
        Learner learner,
        ArrayList featureStddev) 
    {
        super(learner, featureStddev);
    }

    /**
     * This method implements the evaluation aspect of BurrowsDelta.
     * That is, given an unknownVector, this method should return
     * a discriminant value paired with the corresponding most likely category.
     * The discriminant value is the score.
     * 
     * @param unknownVector
     * @return 
     */
    @Override
    public ValueDiscriminantPair evaluateWithDiscriminant(
        Vector unknownVector)
    {
        
        double minDelta = Double.MAX_VALUE;
        CategoryType minCat = null;
        
        DefaultWeightedValueDiscriminant result = 
            new DefaultWeightedValueDiscriminant();

        for (InputOutputPair pair : 
            learner.trainingSet) 
        {
            Vector knownVector = pair.getInput();
            double delta = 0.0;
            for (int featureIndex = 0; 
                featureIndex < knownVector.getDimensionality();
                featureIndex++) 
            {
                
                delta += Math.abs((unknownVector.get(featureIndex) - 
                        knownVector.get(featureIndex)) /
                        (double) this.featureStddev.get(featureIndex));
            }
            
            if (delta < minDelta ) 
            {
                minDelta = delta;
                minCat = pair.getOutput();
            }
        }
        result.setValue(minCat);
        result.setWeight(minDelta);

        return result;
    }
    
    /**
     * Evaluates an unknown input, but does not return the discriminant value.
     * Only returns the category that the unknown input most likely corresponds 
     * to.
     * 
     * @param unknownInput
     * @return 
     */
    @Override
    public CategoryType evaluate(
        final Vector unknownInput)
    {
        return this.evaluateWithDiscriminant(unknownInput).getValue();
    }
    
    /**
     * Learner for a BurrowsDeltaCategorizer.
     * @param  Type of the categories of the categorizer.
     */
    public static class Learner
        extends AbstractLearner
    {
        
        /**
         * Default constructor.
         */
        public Learner()
        {
        }

        @Override
        public BurrowsDeltaCategorizer learn(
            final Collection> trainingSet)
        {
            // Get training set
            this.trainingSet = trainingSet;
            
            // Map for storing stddev for each feature for fast lookup
            ArrayList featureStddev = new ArrayList();

            // Get size of feature vectors (all feature vectors have the same size)
            int vectorSize = 0;
            for (InputOutputPair pair : trainingSet) 
            {
                    Vector vector = pair.getInput();
                    vectorSize = vector.getDimensionality();
                    break;
            }

            // Learn stats about the relative frequencies of each feature in the feature vector
            for (int featureIndex = 0; featureIndex < vectorSize; featureIndex++)
            {
                List observations = new ArrayList();
                for (InputOutputPair pair : trainingSet) 
                {
                    Vector vector = pair.getInput();
                    observations.add(vector.get(featureIndex));
                }
                Double stddev = UnivariateStatisticsUtil.computeStandardDeviation(observations);
                stddev = (stddev.equals(0.0)) ? 1E10 : stddev;
                featureStddev.add(stddev);
            }

            // Reaturn evaluator
            BurrowsDeltaCategorizer bdc = 
                new BurrowsDeltaCategorizer(this, featureStddev);
            return bdc;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy