All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.mallet.classify.ClassifierTrainer Maven / Gradle / Ivy

Go to download

MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.

The newest version!
/* Copyright (C) 2002 Univ. of Massachusetts Amherst, Computer Science Dept.
   This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
   http://www.cs.umass.edu/~mccallum/mallet
   This software is provided under the terms of the Common Public License,
   version 1.0, as published by http://www.opensource.org.  For further
   information, see the file `LICENSE' included with this distribution. */





package cc.mallet.classify;

import cc.mallet.classify.Classifier;
import cc.mallet.optimize.Optimizer;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;
import cc.mallet.types.Labeler;

/** Each ClassifierTrainer trains one Classifier based on various interfaces for consuming training data.
 * If you want an object that can train be asked to train on multiple different training sets and 
 * yield different classifiers, you probably want a ClassifierTrainer.Factory. */
public abstract class ClassifierTrainer
{
	protected InstanceList validationSet;
	protected boolean finishedTraining = false;
	
	public boolean isFinishedTraining() { return finishedTraining; } // Careful to set this properly in subclasses!  Consider removing -akm 1/08
	public abstract C getClassifier();
	public abstract C train (InstanceList trainingSet);  
	public void setValidationInstances (InstanceList validationSet) { this.validationSet = validationSet; }
	public InstanceList getValidationInstances () { return this.validationSet; }
	
	/* No, it is fine if these can be set in the constructor only.  
	 * Individual ClassifierTrainer subclasses could provide this interface if desired. 
	public C setInitialClassifier (C initialClassifier) { return null; }
	public C getInitialClassifier () { return null; } 
	*/
	
		
	public interface ByOptimization {
		public C train (InstanceList trainingSet, int numIterations);
		public Optimizer getOptimizer ();	
		public abstract int getIteration();
	}
	
	/** For active learning, in which this trainer will select certain instances and 
	 * request that the Labeler instance label them. 
	 * @param trainingAndUnlabeledSet the instances on which to train; some may be labeled; unlabeled ones may have their label requested from the labeler.
	 * @param labeler  
	 * @param numLabelRequests the number of times to call labeler.label(). */
	public interface ByActiveLearning {
		public C train (InstanceList trainingAndUnlabeledSet, Labeler labeler, int numLabelRequests);
	}

	/** For various kinds of online learning by batches, where training instances are presented,
	 * consumed for learning immediately.  The same instances may be presented more than once to 
	 * this interface.  For example, StochasticGradient, etc conforms to this interface. */
	public interface ByIncrements {
		public C trainIncremental (InstanceList trainingInstancesToAdd);
	}
	
	/** For online learning that can operate on one instance at a time.  For example, Perceptron. */
	public interface ByInstanceIncrements extends ByIncrements {
		public C trainIncremental (Instance instanceToAdd);
	}

	/** Instances of a Factory know how to create new ClassifierTrainers to apply to new Classifiers. */
	public static abstract class Factory>
	{
		// This is recommended (but cannot be enforced in Java) that subclasses implement
		// public static Classifier train (InstanceList trainingSet)
		// public static Classifier train (InstanceList trainingSet, InstanceList validationSet)
		// public static Classifier train (InstanceList trainingSet, InstanceList validationSet, Classifier initialClassifier)
		// which call 
		
		public abstract CT newClassifierTrainer (Classifier initialClassifier);
		public CT newClassifierTrainer () { return newClassifierTrainer (null); }
				
		public String toString() {
			return this.getClass().getName();
		}

	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy