com.aliasi.test.unit.classify.DynamicLMClassifierTest Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of aliasi-lingpipe Show documentation
Show all versions of aliasi-lingpipe Show documentation
This is the original Lingpipe:
http://alias-i.com/lingpipe/web/download.html
There were not made any changes to the source code.
package com.aliasi.test.unit.classify;
import static junit.framework.Assert.*;
import java.io.File;
import java.io.IOException;
import org.junit.Ignore;
import org.junit.Test;
import com.aliasi.classify.DynamicLMClassifier;
import com.aliasi.classify.LMClassifier;
import com.aliasi.stats.MultivariateEstimator;
import com.aliasi.tokenizer.RegExTokenizerFactory;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.Files;
import com.aliasi.util.Strings;
public class DynamicLMClassifierTest {
@Test(expected = IllegalArgumentException.class)
public void testLargeCategorySet() throws IOException, ClassNotFoundException {
TokenizerFactory factory = new RegExTokenizerFactory("\\S+");
String[] categories = { "1", "2", "2" };
DynamicLMClassifier classifier = DynamicLMClassifier.createTokenized(categories, factory, 2);
}
@Ignore
@Test
public void testCompileOneCategory() throws IOException, ClassNotFoundException {
File dataFile = new File("src/com/aliasi/test/unit/classify/testFile1.txt");
String data = Files.readFromFile(dataFile, Strings.UTF8);
String[] categories = { "Foo", "Bar" };
DynamicLMClassifier classifier = DynamicLMClassifier.createNGramBoundary(categories, 32);
classifier.train("Foo", data, 1);
LMClassifier compiledClassifier = (LMClassifier) AbstractExternalizable.compile(classifier);
assertTrue(null != compiledClassifier);
}
@Test
public void testOne() throws IOException, ClassNotFoundException {
String[] categories = { "Foo", "Bar" };
DynamicLMClassifier classifier = DynamicLMClassifier.createNGramProcess(categories, 2);
String oneStr = "The rain in Spain falls mainly on the ground.";
char[] oneChar = oneStr.toCharArray();
classifier.train("Foo", new String(oneChar, 0, oneChar.length), 1);
String barStr = "The rain in Madrid is made of water.";
char[] barChar = barStr.toCharArray();
classifier.train("Bar", new String(barChar, 0, barChar.length), 1);
// should do this more elegantly with right type on classifier
MultivariateEstimator est = (MultivariateEstimator) classifier.categoryDistribution();
assertEquals(2, est.getCount(est.outcome("Foo")));
assertEquals(4, est.trainingSampleCount());
assertEquals("Foo", classifier.classify("falls mainly").bestCategory());
assertEquals("Bar", classifier.classify("Madrid is made of water").bestCategory());
/*
* classifier.resetLanguageModel("Foo",2,256); assertEquals(est.getCount(est.outcome("Foo")),0);
* assertEquals(est.trainingSampleCount(),1);
*/
LMClassifier compiledCassifier = (LMClassifier) AbstractExternalizable.compile(classifier);
assertEquals("Foo", compiledCassifier.classify("falls mainly").bestCategory());
assertEquals("Bar", compiledCassifier.classify("Madrid is made of water").bestCategory());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy