All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.swabunga.spell.engine.SpellDictionaryHashMap Maven / Gradle / Ivy

The newest version!
/*
Jazzy - a Java library for Spell Checking
Copyright (C) 2001 Mindaugas Idzelis
Full text of license can be found in LICENSE.txt

This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
License as published by the Free Software Foundation; either
version 2.1 of the License, or (at your option) any later version.

This library is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
License along with this library; if not, write to the Free Software
Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA
 */
/*
 * put your module comment here
 * formatted with JxBeauty (c) [email protected]
 */

package com.swabunga.spell.engine;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.FileWriter;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * The SpellDictionaryHashMap holds the dictionary
 * 

* This class is thread safe. Derived classes should ensure that this preserved. *

* There are many open source dictionary files. For just a few see: * http://wordlist.sourceforge.net/ *

* This dictionary class reads words one per line. Make sure that your word list * is formatted in this way (most are). *

* Note that you must create the dictionary with a word list for the added words * to persist. */ public class SpellDictionaryHashMap extends SpellDictionaryASpell { /** * A field indicating the initial hash map capacity (16KB) for the main * dictionary hash map. Interested to see what the performance of a smaller * initial capacity is like. */ private final static int INITIAL_CAPACITY = 16 * 1024; /** * The hashmap that contains the word dictionary. The map is hashed on the * doublemeta code. The map entry contains a LinkedList of words that have * the same double meta code. */ protected Map> mainDictionary = new HashMap>(INITIAL_CAPACITY); /** Holds the dictionary file for appending */ private File dictFile = null; /** * Dictionary Constructor. * * @throws java.io.IOException indicates a problem with the file system */ public SpellDictionaryHashMap() throws IOException { super((File) null); } /** * Dictionary Constructor. * * @param wordList The file containing the words list for the dictionary * @throws java.io.IOException indicates problems reading the words list * file */ public SpellDictionaryHashMap(Reader wordList) throws IOException { super((File) null); createDictionary(new BufferedReader(wordList)); } /** * Dictionary convenience Constructor. * * @param wordList The file containing the words list for the dictionary * @throws java.io.FileNotFoundException indicates problems locating the * words list file on the system * @throws java.io.IOException indicates problems reading the words list * file */ public SpellDictionaryHashMap(File wordList) throws FileNotFoundException, IOException { this(new FileReader(wordList)); dictFile = wordList; } /** * Dictionary constructor that uses an aspell phonetic file to build the * transformation table. * * @param wordList The file containing the words list for the dictionary * @param phonetic The file to use for phonetic transformation of the * wordlist. * @throws java.io.FileNotFoundException indicates problems locating the * file on the system * @throws java.io.IOException indicates problems reading the words list * file */ public SpellDictionaryHashMap(File wordList, File phonetic) throws FileNotFoundException, IOException { super(phonetic); dictFile = wordList; createDictionary(new BufferedReader(new FileReader(wordList))); } /** * Dictionary constructor that uses an aspell phonetic file to build the * transformation table. Encoding is used for phonetic file only; default * encoding is used for wordList * * @param wordList The file containing the words list for the dictionary * @param phonetic The file to use for phonetic transformation of the * wordlist. * @param phoneticEncoding Uses the character set encoding specified * @throws java.io.FileNotFoundException indicates problems locating the * file on the system * @throws java.io.IOException indicates problems reading the words list or * phonetic information */ public SpellDictionaryHashMap(File wordList, File phonetic, String phoneticEncoding) throws FileNotFoundException, IOException { super(phonetic, phoneticEncoding); dictFile = wordList; createDictionary(new BufferedReader(new FileReader(wordList))); } /** * Dictionary constructor that uses an aspell phonetic file to build the * transformation table. * * @param wordList The file containing the words list for the dictionary * @param phonetic The reader to use for phonetic transformation of the * wordlist. * @throws java.io.IOException indicates problems reading the words list or * phonetic information */ public SpellDictionaryHashMap(Reader wordList, Reader phonetic) throws IOException { super(phonetic); dictFile = null; createDictionary(new BufferedReader(wordList)); } /** * Add words from a file to existing dictionary hashmap. This function can * be called as many times as needed to build the internal word list. * Duplicates are not added. *

* Note that adding a dictionary does not affect the target dictionary file * for the addWord method. That is, addWord() continues to make additions to * the dictionary file specified in createDictionary() *

* * @param wordList a File object that contains the words, on word per line. * @throws FileNotFoundException * @throws IOException */ public void addDictionary(File wordList) throws FileNotFoundException, IOException { addDictionaryHelper(new BufferedReader(new FileReader(wordList))); } /** * Add words from a Reader to existing dictionary hashmap. This function can * be called as many times as needed to build the internal word list. * Duplicates are not added. *

* Note that adding a dictionary does not affect the target dictionary file * for the addWord method. That is, addWord() continues to make additions to * the dictionary file specified in createDictionary() *

* * @param wordList a Reader object that contains the words, on word per * line. * @throws IOException */ public void addDictionary(Reader wordList) throws IOException { addDictionaryHelper(new BufferedReader(wordList)); } /** * Add a word permanently to the dictionary (and the dictionary file). *

* This needs to be made thread safe (synchronized) *

*/ public void addWord(String word) { putWord(word); if (dictFile == null) return; try { FileWriter w = new FileWriter(dictFile.toString(), true); // Open with append. w.write(word); w.write("\n"); w.close(); } catch (IOException ex) { System.out.println("Error writing to dictionary file"); } } /** * Constructs the dictionary from a word list file. *

* Each word in the reader should be on a separate line. *

* This is a very slow function. On my machine it takes quite a while to * load the data in. I suspect that we could speed this up quite allot. */ protected void createDictionary(BufferedReader in) throws IOException { String line = ""; while (line != null) { line = in.readLine(); if (line != null && line.length() > 0) { line = new String(line.toCharArray()); putWord(line); } } } /** * Adds to the existing dictionary from a word list file. If the word * already exists in the dictionary, a new entry is not added. *

* Each word in the reader should be on a separate line. *

* Note: for whatever reason that I haven't yet looked into, the phonetic * codes for a particular word map to a vector of words rather than a hash * table. This is a drag since in order to check for duplicates you have to * iterate through all the words that use the phonetic code. If the * vector-based implementation is important, it may be better to subclass * for the cases where duplicates are bad. */ protected void addDictionaryHelper(BufferedReader in) throws IOException { String line = ""; while (line != null) { line = in.readLine(); if (line != null && line.length() > 0) { line = new String(line.toCharArray()); putWordUnique(line); } } } /** * Allocates a word in the dictionary * * @param word The word to add */ protected void putWord(String word) { String code = getCode(word); List list = mainDictionary.get(code); if (list != null) { list.add(word); } else { list = new ArrayList(); list.add(word); mainDictionary.put(code, list); } } /** * Allocates a word, if it is not already present in the dictionary. A word * with a different case is considered the same. * * @param word The word to add */ protected void putWordUnique(String word) { String code = getCode(word); List list = mainDictionary.get(code); if (list != null) { boolean isAlready = false; for (int i = 0; i < list.size(); i++) { if (word.equalsIgnoreCase((String) list.get(i))) { isAlready = true; break; } } if (!isAlready) list.add(word); } else { list = new ArrayList(); list.add(word); mainDictionary.put(code, list); } } /** * Returns a list of strings (words) for the code. */ public List getWords(String code) { // Check the main dictionary. List mainDictResult = mainDictionary.get(code); if (mainDictResult == null) return new ArrayList(); return mainDictResult; } /** * Returns true if the word is correctly spelled against the current word * list. */ public boolean isCorrect(String word) { List possible = getWords(getCode(word)); if (possible.contains(word)) return true; // JMH should we always try the lowercase version. If I dont then // capitalised // words are always returned as incorrect. else if (possible.contains(word.toLowerCase())) return true; return false; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy