All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aliasi.dict.TrieDictionary Maven / Gradle / Ivy

Go to download

This is the original Lingpipe: http://alias-i.com/lingpipe/web/download.html There were not made any changes to the source code.

There is a newer version: 4.1.2-JL1.0
Show newest version
/*
 * LingPipe v. 4.1.0
 * Copyright (C) 2003-2011 Alias-i
 *
 * This program is licensed under the Alias-i Royalty Free License
 * Version 1 WITHOUT ANY WARRANTY, without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the Alias-i
 * Royalty Free License Version 1 for more details.
 *
 * You should have received a copy of the Alias-i Royalty Free License
 * Version 1 along with this program; if not, visit
 * http://alias-i.com/lingpipe/licenses/lingpipe-license-1.txt or contact
 * Alias-i, Inc. at 181 North 11th Street, Suite 401, Brooklyn, NY 11211,
 * +1 (718) 290-9170.
 */

package com.aliasi.dict;

// import com.aliasi.util.Arrays;
import com.aliasi.util.AbstractExternalizable;
import com.aliasi.util.Compilable;
import com.aliasi.util.Iterators;

import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.io.Serializable;

import java.util.Iterator;
import java.util.LinkedList;

/**
 * A TrieDictionary stores a dictionary using a character
 * trie structure.  This requires a constant amount of space for each
 * entry and each prefix of an entry's string.  Lookups take an amount
 * of time proportional to the length of the string being looked up,
 * with each character requiring a lookup in a map.  The lookup is
 * done with binary search in this implementation in time proportional
 * to the log of the number of characters, for a total lookup time
 * of O(n log c) where n is the
 * number of characters in the string being looked up and c
 * is the number of charactes.
 *
 * 

Tries are a popular data structure; see the Wikipedia Trie topic for * examples and references. Tries are also used in the language model * classes {@link com.aliasi.lm.TrieCharSeqCounter} and {@link * com.aliasi.lm.TrieIntSeqCounter} and the compiled forms of all of * the language models. * *

Compilation and Serialization

* * The trie dictionary implements both the Java {@link Serializable} * and LingPipe {@link Compilable} interfaces to write the contents * of a trie dictionary to an object output. Both approaches produce * the same result and the dictionary read back in will be an instance * of TrieDictionary and equivalent to the dictionary that * was serialized or compiled. * * @author Bob Carpenter * @version 4.0.0 * @since LingPipe2.1 * @param the type of object stored in this dictionary */ public class TrieDictionary extends AbstractDictionary implements Serializable, Compilable { static final long serialVersionUID = -6772406715071883449L; Node mRootNode = new Node(); /** * Construct a trie-based dictionary. */ public TrieDictionary() { /* do ntohing */ } @Override DictionaryEntry[] phraseEntries(String phrase) { Node node = mRootNode; for (int i = 0; i < phrase.length(); ++i) { node = node.getDtr(phrase.charAt(i)); if (node == null) return Node.emptyEntries(); } return node.mEntries; } @Override public Iterator> phraseEntryIt(String phrase) { return Iterators.>array(phraseEntries(phrase)); } /** * Equal entries will be ignored. */ @Override public void addEntry(DictionaryEntry entry) { String phrase = entry.phrase(); Node node = mRootNode; for (int i = 0; i < phrase.length(); ++i) node = node.getOrAddDtr(phrase.charAt(i)); node.addEntry(entry); } /** * Returns an iterator over all of the dictionary entries * for this dictioniary. This is the implementation of the iterator * view of this dictionary as a collection (set of entries). * * @return An iterator over all of the dictionary entries for this * dictioniary. */ @Override public Iterator> iterator() { return new TrieIterator(mRootNode); } private Object writeReplace() { return new Externalizer(this); } /** * Compile the entries in this dictionary to the specified object output. * * @param out Object output to which to write the dictionary. * @throws IOException If there is an underlying I/O error during * the write. */ @Override public void compileTo(ObjectOutput out) throws IOException { out.writeObject(new Externalizer(this)); } private static class Externalizer extends AbstractExternalizable { static final long serialVersionUID = -6351978792499636468L; private final TrieDictionary mDictionary; public Externalizer(TrieDictionary dict) { mDictionary = dict; } public Externalizer() { this(null); } @Override public Object read(ObjectInput in) throws IOException, ClassNotFoundException { TrieDictionary dict = new TrieDictionary(); int numEntries = in.readInt(); for (int i = 0; i < numEntries; ++i) { // required for readObject; safe with good serialization @SuppressWarnings("unchecked") DictionaryEntry entry = (DictionaryEntry) in.readObject(); dict.addEntry(entry); } return dict; } @Override public void writeExternal(ObjectOutput out) throws IOException { int count = mDictionary.size(); out.writeInt(count); for (DictionaryEntry entry : mDictionary) entry.compileTo(out); } } private static class TrieIterator extends Iterators.Buffered> { LinkedList> mQueue = new LinkedList>(); DictionaryEntry[] mEntries; int mNextEntry = -1; TrieIterator(Node root) { mQueue.add(root); } @Override protected DictionaryEntry bufferNext() { while (mEntries == null && !mQueue.isEmpty()) { Node node = mQueue.removeFirst(); addDtrs(node.mDtrNodes); if (node.mEntries.length > 0) { mEntries = node.mEntries; mNextEntry = 0; } } if (mEntries == null) return null; DictionaryEntry result = mEntries[mNextEntry++]; if (mNextEntry >= mEntries.length) mEntries = null; return result; } void addDtrs(Node[] dtrs) { for (int i = dtrs.length; --i >= 0; ) { if (dtrs[i] == null) System.out.println("ADDING=" + i); mQueue.addFirst(dtrs[i]); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy