
cc.mallet.grmm.learning.GenericAcrfTui Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jcore-mallet-2.0.9 Show documentation
Show all versions of jcore-mallet-2.0.9 Show documentation
MALLET is a Java-based package for statistical natural language processing, document classification, clustering, topic modeling, information extraction, and other machine learning applications to text.
The newest version!
/* Copyright (C) 2003 Univ. of Massachusetts Amherst, Computer Science Dept.
This file is part of "MALLET" (MAchine Learning for LanguagE Toolkit).
http://www.cs.umass.edu/~mccallum/mallet
This software is provided under the terms of the Common Public License,
version 1.0, as published by http://www.opensource.org. For further
information, see the file `LICENSE' included with this distribution. */
package cc.mallet.grmm.learning;
/**
*
* Created: Aug 23, 2005
*
* @author trainSource = new LineGroupIterator (new FileReader (trainFile.value), Pattern.compile ("^\\s*$"), true);
Iterator testSource;
if (testFile.wasInvoked ()) {
testSource = new LineGroupIterator (new FileReader (testFile.value), Pattern.compile ("^\\s*$"), true);
} else {
testSource = null;
}
InstanceList training = new InstanceList (pipe);
training.addThruPipe (trainSource);
InstanceList testing = new InstanceList (pipe);
testing.addThruPipe (testSource);
ACRF.Template[] tmpls = parseModelFile (modelFile.value);
ACRFEvaluator eval = createEvaluator (evalOption.value);
Inferencer inf = createInferencer (inferencerOption.value);
Inferencer maxInf = createInferencer (maxInferencerOption.value);
ACRF acrf = new ACRF (pipe, tmpls);
acrf.setInferencer (inf);
acrf.setViterbiInferencer (maxInf);
ACRFTrainer trainer = createTrainer();
System.err.println ("ACRF Trainer = "+trainer);
trainer.train (acrf, training, null, testing, eval, 9999);
timing.tick ("Training");
FileUtils.writeGzippedObject (new File ("acrf.ser.gz"), acrf);
timing.tick ("Serializing");
System.err.println ("Total time (ms) = " + timing.elapsedTime ());
}
private static BshInterpreter setupInterpreter ()
{
BshInterpreter interpreter = CommandOption.getInterpreter ();
try {
interpreter.eval ("import cc.mallet.base.extract.*");
interpreter.eval ("import cc.mallet.grmm.inference.*");
interpreter.eval ("import cc.mallet.grmm.learning.*");
interpreter.eval ("import cc.mallet.grmm.learning.templates.*");
} catch (EvalError e) {
throw new RuntimeException (e);
}
return interpreter;
}
public static ACRFEvaluator createEvaluator (String spec) throws EvalError
{
if (spec.indexOf ('(') >= 0) {
// assume it's Java code, and don't screw with it.
return (ACRFEvaluator) interpreter.eval (spec);
} else {
LinkedList toks = new LinkedList (Arrays.asList (spec.split ("\\s+")));
return createEvaluator (toks);
}
}
private static ACRFEvaluator createEvaluator (LinkedList toks)
{
String type = (String) toks.removeFirst ();
if (type.equalsIgnoreCase ("SEGMENT")) {
int slice = Integer.parseInt ((String) toks.removeFirst ());
if (toks.size() % 2 != 0)
throw new RuntimeException ("Error in --eval "+evalOption.value+": Every start tag must have a continue.");
int numTags = toks.size () / 2;
String[] startTags = new String [numTags];
String[] continueTags = new String [numTags];
for (int i = 0; i < numTags; i++) {
startTags[i] = (String) toks.removeFirst ();
continueTags[i] = (String) toks.removeFirst ();
}
return new MultiSegmentationEvaluatorACRF (startTags, continueTags, slice);
} else if (type.equalsIgnoreCase ("LOG")) {
return new DefaultAcrfTrainer.LogEvaluator ();
} else if (type.equalsIgnoreCase ("SERIAL")) {
List evals = new ArrayList ();
while (!toks.isEmpty ()) {
evals.add (createEvaluator (toks));
}
return new AcrfSerialEvaluator (evals);
} else {
throw new RuntimeException ("Error in --eval "+evalOption.value+": illegal evaluator "+type);
}
}
private static Inferencer createInferencer (String spec) throws EvalError
{
String cmd;
if (spec.indexOf ('(') >= 0) {
// assume it's Java code, and don't screw with it.
cmd = spec;
} else {
cmd = "new "+spec+"()";
}
// Return whatever the Java code says to
Object inf = interpreter.eval (cmd);
if (inf instanceof Inferencer)
return (Inferencer) inf;
else throw new RuntimeException ("Don't know what to do with inferencer "+inf);
}
public static void doProcessOptions (Class childClass, String[] args)
{
CommandOption.List options = new CommandOption.List ("", new CommandOption[0]);
options.add (childClass);
options.process (args);
options.logOptions (Logger.getLogger (""));
}
private static ACRF.Template[] parseModelFile (File mdlFile) throws IOException, EvalError
{
BufferedReader in = new BufferedReader (new FileReader (mdlFile));
List tmpls = new ArrayList ();
String line = in.readLine ();
while (line != null) {
Object tmpl = interpreter.eval (line);
if (!(tmpl instanceof ACRF.Template)) {
throw new RuntimeException ("Error in "+mdlFile+" line "+in.toString ()+":\n Object "+tmpl+" not a template");
}
tmpls.add (tmpl);
line = in.readLine ();
}
return (ACRF.Template[]) tmpls.toArray (new ACRF.Template [0]);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy