All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cc.mallet.fst.TransducerTrainer Maven / Gradle / Ivy

Go to download

MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.

The newest version!
package cc.mallet.fst;

import java.util.ArrayList;
import java.util.Collection;

import cc.mallet.optimize.Optimizer;
import cc.mallet.types.Instance;
import cc.mallet.types.InstanceList;

/**
 * An abstract class to train and evaluate a transducer model.
 */
public abstract class TransducerTrainer {
	// The list of evaluators to be run every once in a while
	ArrayList evaluators = new ArrayList();
	
	public abstract Transducer getTransducer();
	public abstract int getIteration();
	public abstract boolean isFinishedTraining();
	
	public boolean train (InstanceList trainingSet) {
		return train (trainingSet, Integer.MAX_VALUE);
	}
	
	/** Train the transducer associated with this TransducerTrainer.  
	 * You should be able to call this method with different trainingSet objects.
	 * Whether this causes the TransducerTrainer to combine both trainingSets or
	 * to view the second as a new alternative is at the discretion of the particular
	 * TransducerTrainer subclass involved. */
	public abstract boolean train (InstanceList trainingSet, int numIterations);

	// TODO Consider adding or removing these
	//public abstract boolean train ();
	//public abstract boolean train (int numIterations);
	
	// Management of evaluators
	public TransducerTrainer addEvaluator (TransducerEvaluator te) {	evaluators.add(te);	return this; }
	public TransducerTrainer addEvaluators (Collection tes) { evaluators.addAll(tes); return this; }
	public TransducerTrainer removeEvaluator (TransducerEvaluator te) { evaluators.remove(te);	return this; }
	/** This method should be called by subclasses whenever evaluators should be run.
	 * Do not worry too much about them being run too often, because the evaluators
	 * themselves can control/limit when they actually do their work with TransducerEvaluator.precondition(). */
	protected void runEvaluators () {
		for (TransducerEvaluator te : evaluators) 
			te.evaluate(this);
	}
	
	public interface ByOptimization {
		public Optimizer getOptimizer ();
		// Remove the above, and only have public Optimizer getOptimizer (InstanceList trainingSet); 
	}
	
	// Implied above; can always make a per-instance training method use a batch instance list
	//public interface ByBatch {}
	
	// TODO Consider making this an interface also, like ByOptimization
	public static abstract class ByIncrements extends TransducerTrainer {
		public abstract boolean trainIncremental (InstanceList incrementalTrainingSet);
	}
		
	public static abstract class ByInstanceIncrements extends ByIncrements {
		public abstract boolean trainIncremental (Instance trainingInstance);

	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy