com.swabunga.spell.engine.SpellDictionaryDichoDisk Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of jazzy Show documentation
Show all versions of jazzy Show documentation
This is a fork of the jazzy dictionary
The newest version!
/*
Jazzy - a Java library for Spell Checking
Copyright (C) 2001 Mindaugas Idzelis
Full text of license can be found in LICENSE.txt
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.
This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
Lesser General Public License for more details.
You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package com.swabunga.spell.engine;
import java.io.EOFException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.util.LinkedList;
import java.util.List;
/**
* Another implementation of SpellDictionary
that doesn't cache any
* words in memory. Avoids the huge footprint of
* SpellDictionaryHashMap
at the cost of relatively minor latency.
* A future version of this class that implements some caching strategies might
* be a good idea in the future, if there's any demand for it.
*
* This implementation requires a special dictionary file, with "code*word"
* lines sorted by code. It's using a dichotomy algorithm to search for words in
* the dictionary
*
* @author Damien Guillaume
* @version 0.1
*/
public class SpellDictionaryDichoDisk extends SpellDictionaryASpell {
/** Holds the dictionary file for reading */
private RandomAccessFile dictFile = null;
/** dictionary and phonetic file encoding */
private String encoding = null;
/**
* Dictionary convenience Constructor.
*
* @param wordList The file containing the words list for the dictionary
* @throws java.io.FileNotFoundException indicates problems locating the
* words list file on the system
* @throws java.io.IOException indicates problems reading the words list
* file
*/
public SpellDictionaryDichoDisk(File wordList)
throws FileNotFoundException, IOException {
super((File) null);
dictFile = new RandomAccessFile(wordList, "r");
}
/**
* Dictionary convenience Constructor.
*
* @param wordList The file containing the words list for the dictionary
* @param encoding Uses the character set encoding specified
* @throws java.io.FileNotFoundException indicates problems locating the
* words list file on the system
* @throws java.io.IOException indicates problems reading the words list
* file
*/
public SpellDictionaryDichoDisk(File wordList, String encoding)
throws FileNotFoundException, IOException {
super((File) null);
this.encoding = encoding;
dictFile = new RandomAccessFile(wordList, "r");
}
/**
* Dictionary constructor that uses an aspell phonetic file to build the
* transformation table.
*
* @param wordList The file containing the words list for the dictionary
* @param phonetic The file to use for phonetic transformation of the
* wordlist.
* @throws java.io.FileNotFoundException indicates problems locating the
* file on the system
* @throws java.io.IOException indicates problems reading the words list
* file
*/
public SpellDictionaryDichoDisk(File wordList, File phonetic)
throws FileNotFoundException, IOException {
super(phonetic);
dictFile = new RandomAccessFile(wordList, "r");
}
/**
* Dictionary constructor that uses an aspell phonetic file to build the
* transformation table.
*
* @param wordList The file containing the words list for the dictionary
* @param phonetic The file to use for phonetic transformation of the
* wordlist.
* @param encoding Uses the character set encoding specified
* @throws java.io.FileNotFoundException indicates problems locating the
* file on the system
* @throws java.io.IOException indicates problems reading the words list
* file
*/
public SpellDictionaryDichoDisk(File wordList, File phonetic,
String encoding) throws FileNotFoundException, IOException {
super(phonetic, encoding);
this.encoding = encoding;
dictFile = new RandomAccessFile(wordList, "r");
}
/**
* Add a word permanently to the dictionary (and the dictionary file).
* not implemented !
*
* @param word The word to add.
*/
public void addWord(String word) {
System.err
.println("error: addWord is not implemented for SpellDictionaryDichoDisk");
}
/**
* Search the dictionary file for the words corresponding to the code within
* positions p1 - p2
*/
private List dichoFind(String code, long p1, long p2)
throws IOException {
// System.out.println("dichoFind("+code+","+p1+","+p2+")");
long pm = (p1 + p2) / 2;
dictFile.seek(pm);
String l;
if (encoding == null)
l = dictFile.readLine();
else
l = dictReadLine();
pm = dictFile.getFilePointer();
if (encoding == null)
l = dictFile.readLine();
else
l = dictReadLine();
long pm2 = dictFile.getFilePointer();
if (pm2 >= p2)
return (seqFind(code, p1, p2));
int istar = l.indexOf('*');
if (istar == -1)
throw new IOException("bad format: no * !");
String testcode = l.substring(0, istar);
int comp = code.compareTo(testcode);
if (comp < 0)
return (dichoFind(code, p1, pm - 1));
else if (comp > 0)
return (dichoFind(code, pm2, p2));
else {
List l1 = dichoFind(code, p1, pm - 1);
List l2 = dichoFind(code, pm2, p2);
String word = l.substring(istar + 1);
l1.add(word);
l1.addAll(l2);
return (l1);
}
}
private List seqFind(String code, long p1, long p2)
throws IOException {
// System.out.println("seqFind("+code+","+p1+","+p2+")");
List list = new LinkedList();
dictFile.seek(p1);
while (dictFile.getFilePointer() < p2) {
String l;
if (encoding == null)
l = dictFile.readLine();
else
l = dictReadLine();
int istar = l.indexOf('*');
if (istar == -1)
throw new IOException("bad format: no * !");
String testcode = l.substring(0, istar);
if (testcode.equals(code)) {
String word = l.substring(istar + 1);
list.add(word);
}
}
return (list);
}
/**
* Read a line of dictFile with a specific encoding
*/
private String dictReadLine() throws IOException {
int max = 255;
byte b = 0;
byte[] buf = new byte[max];
int i = 0;
try {
for (; b != '\n' && b != '\r' && i < max - 1; i++) {
b = dictFile.readByte();
buf[i] = b;
}
} catch (EOFException ex) {
}
if (i == 0)
return ("");
String s = new String(buf, 0, i - 1, encoding);
return (s);
}
/**
* Returns a list of strings (words) for the code.
*
* @param code The phonetic code common to the list of words
* @return A list of words having the same phonetic code
*/
public List getWords(String code) {
// System.out.println("getWords("+code+")");
List list;
try {
list = dichoFind(code, 0, dictFile.length() - 1);
// System.out.println(list);
} catch (IOException ex) {
System.err.println("IOException: " + ex.getMessage());
list = new LinkedList();
}
return list;
}
}