All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliasi.test.unit.spell.AutoCompleterTest Maven / Gradle / Ivy

Go to download

This is the original Lingpipe: http://alias-i.com/lingpipe/web/download.html There were not made any changes to the source code.

There is a newer version: 4.1.2-JL1.0
Show newest version
package com.aliasi.test.unit.spell;

import com.aliasi.spell.AutoCompleter;
import com.aliasi.spell.FixedWeightEditDistance;
import com.aliasi.spell.WeightedEditDistance;

import com.aliasi.util.BoundedPriorityQueue;
import com.aliasi.util.Scored;
import com.aliasi.util.ScoredObject;

import org.junit.Test;

import static junit.framework.Assert.assertEquals;
import static junit.framework.Assert.assertNotNull;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Random;
import java.util.SortedSet;

public class AutoCompleterTest {

    static final WeightedEditDistance EDIT_DISTANCE
        = new FixedWeightEditDistance(0.0, -10.0, -10.0, -10.0, Double.NEGATIVE_INFINITY);
    static final int MAX_QUEUE_SIZE = 1000;
    static final double MIN_SCORE = -40.0;

    @Test
    public void testOne() {
        Random random = new Random();
        long seed = random.nextLong();
        random = new Random(seed);
        String[] phrases = new String[] {
            "a",
            "abe",
            "able",
            "ace",
            "aces",
            "acing",
            "ad",
            "add",
            "be",
            "ben",
            "c"
        };
        Map phraseCounts = new HashMap();
        for (String phrase : phrases)
            phraseCounts.put(phrase,random.nextDouble());

        int maxResultsPerPrefix = 3;
        AutoCompleter completer
            = new AutoCompleter(phraseCounts,EDIT_DISTANCE,maxResultsPerPrefix,MAX_QUEUE_SIZE,MIN_SCORE);
        assertNotNull(completer);

    }

    @Test
    public void testBruteForce() {
        Random random = new Random();
        // long seed = random.nextLong();
        long seed = -3652569214004964184L;
        random = new Random(seed);
        int numPhrases = 5 + random.nextInt(500);
        Map phraseCounter = new HashMap();
        for (int i = 0; i < numPhrases; ++i) {
            float randomScore = random.nextFloat();
            while (true) {
                String phrase = randomPhrase(random,12); // dict 1-12
                if (phraseCounter.containsKey(phrase))
                    continue;
                phraseCounter.put(phrase,randomScore);
                break;
            }
        }
        String[] queries = new String[100];
        for (int i = 0; i < queries.length; ++i)
            queries[i] = randomPhrase(random,16);  // query 1-16 (to get bigger ones)

        assertBruteForce(phraseCounter, EDIT_DISTANCE,
                         4, 5000000, queries);
    }

    static String randomPhrase(Random random, int size) {
        char[] cs = new char[1 + random.nextInt(size)]; // 1 to size long
        for (int i = 0; i < cs.length; ++i)
            cs[i] = (char) (65 + random.nextInt(25));
        return new String(cs);
    }

    void assertBruteForce(Map phraseCounter,
                          WeightedEditDistance editDistance,
                          int maxResults, int maxQueue,
                          String[] queries) {
        String[] phrases = new String[phraseCounter.size()];
        float[] counts = new float[phraseCounter.size()];
        int k = 0;
        for (Map.Entry entry : phraseCounter.entrySet()) {
            phrases[k] = entry.getKey();
            counts[k] = entry.getValue().floatValue();
            ++k;
        }

        double totalCount = 0.0;
        for (int i = 0; i < counts.length; ++i)
            totalCount += counts[i];


        double[] logProbs = new double[counts.length];
        for (int i = 0; i < counts.length; ++i)
            logProbs[i] = com.aliasi.util.Math.log2(counts[i]/totalCount);

        AutoCompleter completer
            = new AutoCompleter(phraseCounter, editDistance,
                                maxResults, maxQueue, MIN_SCORE);

        for (String query : queries) {
            SortedSet> expectedResults = bruteForce(query,phrases,logProbs,editDistance,maxResults);
            SortedSet> results = completer.complete(query);

            assertEquals(expectedResults.size(), results.size());
            Iterator> expectedIt = expectedResults.iterator();
            Iterator> resultsIt = results.iterator();
            for (int i = 0; i < expectedResults.size(); ++i) {
                ScoredObject expectedSo = expectedIt.next();
                ScoredObject resultSo = resultsIt.next();
                assertEquals(expectedSo.getObject(), resultSo.getObject());
            }
        }
    }

    static SortedSet> bruteForce(String query,
                                                      String[] phrases,
                                                      double[] logProbs,
                                                      WeightedEditDistance editDistance,
                                                      int maxResults) {
        Map resultMap = new HashMap();

        for (int k = 0; k < phrases.length; ++k) {
            for (int i = 0; i <= phrases[k].length(); ++i) {
                String prefix = phrases[k].substring(0,i);
                double proximity = editDistance.proximity(query,prefix);
                double score = logProbs[k] + proximity;
                if (score >= MIN_SCORE
                    && (!resultMap.containsKey(phrases[k])
                        || resultMap.get(phrases[k]) < score))
                    resultMap.put(phrases[k],score);
            }
        }
        BoundedPriorityQueue> results
            = new BoundedPriorityQueue>(ScoredObject.comparator(),maxResults);
        for (Map.Entry entry : resultMap.entrySet())
            results.offer(new ScoredObject(entry.getKey(), entry.getValue()));

        return results;
    }


}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy