com.swabunga.spell.engine.SpellDictionaryHashMap Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jazzy Show documentation
This is a fork of the jazzy dictionary
The newest version!
/*
Jazzy - a Java library for Spell Checking
Copyright (C) 2001 Mindaugas Idzelis
Full text of license can be found in LICENSE.txt

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */
/*
 * put your module comment here
 * formatted with JxBeauty (c) [email protected]
 */

package com.swabunga.spell.engine;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * The SpellDictionaryHashMap holds the dictionary
 * 
 * This class is thread safe. Derived classes should ensure that this preserved.
 * 

 * There are many open source dictionary files. For just a few see:
 * http://wordlist.sourceforge.net/
 * 

 * This dictionary class reads words one per line. Make sure that your word list
 * is formatted in this way (most are).
 * 

 * Note that you must create the dictionary with a word list for the added words
 * to persist.
 */
public class SpellDictionaryHashMap extends SpellDictionaryASpell {
    /**
     * A field indicating the initial hash map capacity (16KB) for the main
     * dictionary hash map. Interested to see what the performance of a smaller
     * initial capacity is like.
     */
    private final static int INITIAL_CAPACITY = 16 * 1024;

    /**
     * The hashmap that contains the word dictionary. The map is hashed on the
     * doublemeta code. The map entry contains a LinkedList of words that have
     * the same double meta code.
     */
    protected Map> mainDictionary = new HashMap>(INITIAL_CAPACITY);

    /** Holds the dictionary file for appending */
    private File dictFile = null;

    /**
     * Dictionary Constructor.
     * 
     * @throws java.io.IOException indicates a problem with the file system
     */
    public SpellDictionaryHashMap() throws IOException {
        super((File) null);
    }

    /**
     * Dictionary Constructor.
     * 
     * @param wordList The file containing the words list for the dictionary
     * @throws java.io.IOException indicates problems reading the words list
     *             file
     */
    public SpellDictionaryHashMap(Reader wordList) throws IOException {
        super((File) null);
        createDictionary(new BufferedReader(wordList));
    }

    /**
     * Dictionary convenience Constructor.
     * 
     * @param wordList The file containing the words list for the dictionary
     * @throws java.io.FileNotFoundException indicates problems locating the
     *             words list file on the system
     * @throws java.io.IOException indicates problems reading the words list
     *             file
     */
    public SpellDictionaryHashMap(File wordList) throws FileNotFoundException,
            IOException {
        this(new FileReader(wordList));
        dictFile = wordList;
    }

    /**
     * Dictionary constructor that uses an aspell phonetic file to build the
     * transformation table.
     * 
     * @param wordList The file containing the words list for the dictionary
     * @param phonetic The file to use for phonetic transformation of the
     *            wordlist.
     * @throws java.io.FileNotFoundException indicates problems locating the
     *             file on the system
     * @throws java.io.IOException indicates problems reading the words list
     *             file
     */
    public SpellDictionaryHashMap(File wordList, File phonetic)
            throws FileNotFoundException, IOException {
        super(phonetic);
        dictFile = wordList;
        createDictionary(new BufferedReader(new FileReader(wordList)));
    }

    /**
     * Dictionary constructor that uses an aspell phonetic file to build the
     * transformation table. Encoding is used for phonetic file only; default
     * encoding is used for wordList
     * 
     * @param wordList The file containing the words list for the dictionary
     * @param phonetic The file to use for phonetic transformation of the
     *            wordlist.
     * @param phoneticEncoding Uses the character set encoding specified
     * @throws java.io.FileNotFoundException indicates problems locating the
     *             file on the system
     * @throws java.io.IOException indicates problems reading the words list or
     *             phonetic information
     */
    public SpellDictionaryHashMap(File wordList, File phonetic,
            String phoneticEncoding) throws FileNotFoundException, IOException {
        super(phonetic, phoneticEncoding);
        dictFile = wordList;
        createDictionary(new BufferedReader(new FileReader(wordList)));
    }

    /**
     * Dictionary constructor that uses an aspell phonetic file to build the
     * transformation table.
     * 
     * @param wordList The file containing the words list for the dictionary
     * @param phonetic The reader to use for phonetic transformation of the
     *            wordlist.
     * @throws java.io.IOException indicates problems reading the words list or
     *             phonetic information
     */
    public SpellDictionaryHashMap(Reader wordList, Reader phonetic)
            throws IOException {
        super(phonetic);
        dictFile = null;
        createDictionary(new BufferedReader(wordList));
    }

    /**
     * Add words from a file to existing dictionary hashmap. This function can
     * be called as many times as needed to build the internal word list.
     * Duplicates are not added.
     * 

     * Note that adding a dictionary does not affect the target dictionary file
     * for the addWord method. That is, addWord() continues to make additions to
     * the dictionary file specified in createDictionary()
     * 

     * 
     * @param wordList a File object that contains the words, on word per line.
     * @throws FileNotFoundException
     * @throws IOException
     */
    public void addDictionary(File wordList) throws FileNotFoundException,
            IOException {
        addDictionaryHelper(new BufferedReader(new FileReader(wordList)));
    }

    /**
     * Add words from a Reader to existing dictionary hashmap. This function can
     * be called as many times as needed to build the internal word list.
     * Duplicates are not added.
     * 

     * Note that adding a dictionary does not affect the target dictionary file
     * for the addWord method. That is, addWord() continues to make additions to
     * the dictionary file specified in createDictionary()
     * 

     * 
     * @param wordList a Reader object that contains the words, on word per
     *            line.
     * @throws IOException
     */
    public void addDictionary(Reader wordList) throws IOException {
        addDictionaryHelper(new BufferedReader(wordList));
    }

    /**
     * Add a word permanently to the dictionary (and the dictionary file).
     * 

     * This needs to be made thread safe (synchronized)
     * 
     */
    public void addWord(String word) {
        putWord(word);
        if (dictFile == null)
            return;
        try {
            FileWriter w = new FileWriter(dictFile.toString(), true);
            // Open with append.
            w.write(word);
            w.write("\n");
            w.close();
        } catch (IOException ex) {
            System.out.println("Error writing to dictionary file");
        }
    }

    /**
     * Constructs the dictionary from a word list file.
     * 
     * Each word in the reader should be on a separate line.
     * 

     * This is a very slow function. On my machine it takes quite a while to
     * load the data in. I suspect that we could speed this up quite allot.
     */
    protected void createDictionary(BufferedReader in) throws IOException {
        String line = "";
        while (line != null) {
            line = in.readLine();
            if (line != null && line.length() > 0) {
                line = new String(line.toCharArray());
                putWord(line);
            }
        }
    }

    /**
     * Adds to the existing dictionary from a word list file. If the word
     * already exists in the dictionary, a new entry is not added.
     * 

     * Each word in the reader should be on a separate line.
     * 
     * Note: for whatever reason that I haven't yet looked into, the phonetic
     * codes for a particular word map to a vector of words rather than a hash
     * table. This is a drag since in order to check for duplicates you have to
     * iterate through all the words that use the phonetic code. If the
     * vector-based implementation is important, it may be better to subclass
     * for the cases where duplicates are bad.
     */
    protected void addDictionaryHelper(BufferedReader in) throws IOException {

        String line = "";
        while (line != null) {
            line = in.readLine();
            if (line != null && line.length() > 0) {
                line = new String(line.toCharArray());
                putWordUnique(line);
            }
        }
    }

    /**
     * Allocates a word in the dictionary
     * 
     * @param word The word to add
     */
    protected void putWord(String word) {
        String code = getCode(word);
        List list = mainDictionary.get(code);
        if (list != null) {
            list.add(word);
        } else {
            list = new ArrayList();
            list.add(word);
            mainDictionary.put(code, list);
        }
    }

    /**
     * Allocates a word, if it is not already present in the dictionary. A word
     * with a different case is considered the same.
     * 
     * @param word The word to add
     */
    protected void putWordUnique(String word) {

        String code = getCode(word);
        List list = mainDictionary.get(code);

        if (list != null) {

            boolean isAlready = false;

            for (int i = 0; i < list.size(); i++) {

                if (word.equalsIgnoreCase((String) list.get(i))) {
                    isAlready = true;
                    break;
                }
            }

            if (!isAlready)
                list.add(word);

        } else {

            list = new ArrayList();
            list.add(word);
            mainDictionary.put(code, list);

        }
    }

    /**
     * Returns a list of strings (words) for the code.
     */
    public List getWords(String code) {
        // Check the main dictionary.
        List mainDictResult = mainDictionary.get(code);
        if (mainDictResult == null)
            return new ArrayList();
        return mainDictResult;
    }

    /**
     * Returns true if the word is correctly spelled against the current word
     * list.
     */
    public boolean isCorrect(String word) {
        List possible = getWords(getCode(word));
        if (possible.contains(word))
            return true;
        // JMH should we always try the lowercase version. If I dont then
        // capitalised
        // words are always returned as incorrect.
        else if (possible.contains(word.toLowerCase()))
            return true;
        return false;
    }
}