All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliasi.test.unit.classify.DynamicLMClassifierTest Maven / Gradle / Ivy

Go to download

This is the original Lingpipe: http://alias-i.com/lingpipe/web/download.html There were not made any changes to the source code.

There is a newer version: 4.1.2-JL1.0
Show newest version
package com.aliasi.test.unit.classify;

import static junit.framework.Assert.*;

import java.io.File;
import java.io.IOException;

import org.junit.Ignore;
import org.junit.Test;

import com.aliasi.classify.DynamicLMClassifier;
import com.aliasi.classify.LMClassifier;
import com.aliasi.stats.MultivariateEstimator;
import com.aliasi.tokenizer.RegExTokenizerFactory;
import com.aliasi.tokenizer.TokenizerFactory;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.Files;
import com.aliasi.util.Strings;

public class DynamicLMClassifierTest {

    @Test(expected = IllegalArgumentException.class)
    public void testLargeCategorySet() throws IOException, ClassNotFoundException {

        TokenizerFactory factory = new RegExTokenizerFactory("\\S+");

        String[] categories = { "1", "2", "2" };
        DynamicLMClassifier classifier = DynamicLMClassifier.createTokenized(categories, factory, 2);
    }

    @Ignore
    @Test
    public void testCompileOneCategory() throws IOException, ClassNotFoundException {
        File dataFile = new File("src/com/aliasi/test/unit/classify/testFile1.txt");
        String data = Files.readFromFile(dataFile, Strings.UTF8);

        String[] categories = { "Foo", "Bar" };
        DynamicLMClassifier classifier = DynamicLMClassifier.createNGramBoundary(categories, 32);

        classifier.train("Foo", data, 1);

        LMClassifier compiledClassifier = (LMClassifier) AbstractExternalizable.compile(classifier);
        assertTrue(null != compiledClassifier);

    }

    @Test
    public void testOne() throws IOException, ClassNotFoundException {

        String[] categories = { "Foo", "Bar" };
        DynamicLMClassifier classifier = DynamicLMClassifier.createNGramProcess(categories, 2);

        String oneStr = "The rain in Spain falls mainly on the ground.";

        char[] oneChar = oneStr.toCharArray();
        classifier.train("Foo", new String(oneChar, 0, oneChar.length), 1);

        String barStr = "The rain in Madrid is made of water.";

        char[] barChar = barStr.toCharArray();
        classifier.train("Bar", new String(barChar, 0, barChar.length), 1);

        // should do this more elegantly with right type on classifier
        MultivariateEstimator est = (MultivariateEstimator) classifier.categoryDistribution();
        assertEquals(2, est.getCount(est.outcome("Foo")));
        assertEquals(4, est.trainingSampleCount());

        assertEquals("Foo", classifier.classify("falls mainly").bestCategory());
        assertEquals("Bar", classifier.classify("Madrid is made of water").bestCategory());

        /*
         * classifier.resetLanguageModel("Foo",2,256); assertEquals(est.getCount(est.outcome("Foo")),0);
         * assertEquals(est.trainingSampleCount(),1);
         */

        LMClassifier compiledCassifier = (LMClassifier) AbstractExternalizable.compile(classifier);

        assertEquals("Foo", compiledCassifier.classify("falls mainly").bestCategory());
        assertEquals("Bar", compiledCassifier.classify("Madrid is made of water").bestCategory());
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy