All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.cleartk.classifier.viterbi.ViterbiClassifier Maven / Gradle / Ivy

There is a newer version: 3.0.0
Show newest version
/** 
 * Copyright (c) 2007-2008, Regents of the University of Colorado 
 * All rights reserved.
 * 
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 * 
 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 
 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 
 * Neither the name of the University of Colorado at Boulder nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. 
 * 
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE. 
 */
package org.cleartk.classifier.viterbi;

import java.lang.reflect.Type;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.uima.UimaContext;
import org.apache.uima.resource.ResourceInitializationException;
import org.cleartk.classifier.Classifier;
import org.cleartk.classifier.CleartkProcessingException;
import org.cleartk.classifier.Feature;
import org.cleartk.classifier.ScoredOutcome;
import org.cleartk.classifier.SequenceClassifier;
import org.cleartk.util.CleartkInitializationException;
import org.cleartk.util.ReflectionUtil;
import org.cleartk.util.ReflectionUtil.TypeArgumentDelegator;
import org.uimafit.component.initialize.ConfigurationParameterInitializer;
import org.uimafit.descriptor.ConfigurationParameter;
import org.uimafit.factory.ConfigurationParameterFactory;
import org.uimafit.factory.initializable.Initializable;

/**
 * 
* Copyright (c) 2007-2008, Regents of the University of Colorado
* All rights reserved. */ public class ViterbiClassifier implements SequenceClassifier, Initializable, TypeArgumentDelegator { protected Classifier delegatedClassifier; protected OutcomeFeatureExtractor[] outcomeFeatureExtractors; public static final String PARAM_STACK_SIZE = ConfigurationParameterFactory.createConfigurationParameterName( ViterbiClassifier.class, "stackSize"); @ConfigurationParameter( description = "specifies the maximum number of candidate paths to " + "keep track of. In general, this number should be higher than the number " + "of possible classifications at any given point in the sequence. This " + "guarantees that highest-possible scoring sequence will be returned. If, " + "however, the number of possible classifications is quite high and/or you " + "are concerned about throughput performance, then you may want to reduce the number " + "of candidate paths to maintain. If Classifier.score is not implemented for the given delegated classifier, then " + "the value of this parameter must be 1. ", defaultValue = "1") protected int stackSize; public static final String PARAM_ADD_SCORES = ConfigurationParameterFactory.createConfigurationParameterName( ViterbiClassifier.class, "addScores"); @ConfigurationParameter( description = "specifies whether the scores of candidate sequence classifications should be " + "calculated by summing classfication scores for each member of the sequence or by multiplying them. A value of " + "true means that the scores will be summed. A value of false means that the scores will be multiplied. ", defaultValue = "false") protected boolean addScores = false; public ViterbiClassifier( Classifier delegatedClassifier, OutcomeFeatureExtractor[] outcomeFeatureExtractors) { this.delegatedClassifier = delegatedClassifier; this.outcomeFeatureExtractors = outcomeFeatureExtractors; } public void initialize(UimaContext context) throws ResourceInitializationException { ConfigurationParameterInitializer.initialize(this, context); if (stackSize < 1) { throw CleartkInitializationException.parameterLessThan(PARAM_STACK_SIZE, 1, stackSize); } } public List classify(List> features) throws CleartkProcessingException { if (stackSize == 1) { List outcomes = new ArrayList(); List returnValues = new ArrayList(); for (List instanceFeatures : features) { for (OutcomeFeatureExtractor outcomeFeatureExtractor : outcomeFeatureExtractors) { instanceFeatures.addAll(outcomeFeatureExtractor.extractFeatures(outcomes)); } OUTCOME_TYPE outcome = delegatedClassifier.classify(instanceFeatures); outcomes.add(outcome); returnValues.add(outcome); } return returnValues; } else { try { return viterbi(features); } catch (UnsupportedOperationException uoe) { throw CleartkProcessingException.unsupportedOperationSetParameter( delegatedClassifier, "score", PARAM_STACK_SIZE, 1); } } } /** * This implementation of Viterbi requires at most stackSize * sequenceLength calls to the * classifier. If this proves to be too expensive, then consider using a smaller stack size. * * @param features * a sequence-worth of features. Each List in features should corresond to all * of the features for a given element in a sequence to be classified. * @return a list of outcomes (classifications) - one classification for each member of the * sequence. * @see #PARAM_STACK_SIZE * @see OutcomeFeatureExtractor */ public List viterbi(List> features) throws CleartkProcessingException { List>> nbestSequences = new ArrayList>>(); if (features == null || features.size() == 0) { return Collections.emptyList(); } List> scoredOutcomes = delegatedClassifier.score( features.get(0), stackSize); for (ScoredOutcome scoredOutcome : scoredOutcomes) { double score = scoredOutcome.getScore(); List sequence = new ArrayList(); sequence.add(scoredOutcome.getOutcome()); nbestSequences.add(new ScoredOutcome>(sequence, score)); } Map l = new HashMap(); Map> m = new HashMap>(); for (int i = 1; i < features.size(); i++) { List instanceFeatures = features.get(i); l.clear(); m.clear(); for (ScoredOutcome> scoredSequence : nbestSequences) { // add features from previous outcomes from each scoredSequence // in returnValues int outcomeFeaturesCount = 0; List previousOutcomes = new ArrayList(scoredSequence.getOutcome()); for (OutcomeFeatureExtractor outcomeFeatureExtractor : outcomeFeatureExtractors) { List outcomeFeatures = outcomeFeatureExtractor.extractFeatures(previousOutcomes); instanceFeatures.addAll(outcomeFeatures); outcomeFeaturesCount += outcomeFeatures.size(); } // score the instance features using the features added by the // outcomeFeatureExtractors scoredOutcomes = delegatedClassifier.score(instanceFeatures, stackSize); // remove the added features from previous outcomes for this // scoredSequence instanceFeatures = instanceFeatures.subList(0, instanceFeatures.size() - outcomeFeaturesCount); for (ScoredOutcome scoredOutcome : scoredOutcomes) { if (!l.containsKey(scoredOutcome.getOutcome())) { double score = scoredSequence.getScore(); if (addScores) { score = score + scoredOutcome.getScore(); } else { score = score * scoredOutcome.getScore(); } l.put(scoredOutcome.getOutcome(), score); m.put( scoredOutcome.getOutcome(), new ArrayList(scoredSequence.getOutcome())); } else { double newScore = scoredSequence.getScore(); if (addScores) { newScore = newScore + scoredOutcome.getScore(); } else { newScore = newScore * scoredOutcome.getScore(); } double bestScore = l.get(scoredOutcome.getOutcome()); if (newScore > bestScore) { l.put(scoredOutcome.getOutcome(), newScore); m.put( scoredOutcome.getOutcome(), new ArrayList(scoredSequence.getOutcome())); } } } } nbestSequences.clear(); for (OUTCOME_TYPE outcome : l.keySet()) { List outcomeSequence = m.get(outcome); outcomeSequence.add(outcome); double score = l.get(outcome); ScoredOutcome> returnValue = new ScoredOutcome>( outcomeSequence, score); nbestSequences.add(returnValue); } Collections.sort(nbestSequences); } Collections.sort(nbestSequences); if (nbestSequences.size() > 0) { return nbestSequences.get(0).getOutcome(); } return null; } public List>> score(List> features, int maxResults) throws CleartkProcessingException { // TODO Auto-generated method stub return null; } public Map getTypeArguments(Class genericType) { if (genericType.equals(SequenceClassifier.class)) { genericType = Classifier.class; } return ReflectionUtil.getTypeArguments(genericType, this.delegatedClassifier); } }