All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.atlascopco.hunspell.Hunspell Maven / Gradle / Ivy

Go to download

This project is a binding for the Hunspell library for Java, using BridJ for the bindings.

There is a newer version: 1.0.4
Show newest version
package com.atlascopco.hunspell;

import java.io.Closeable;
import java.nio.charset.Charset;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import org.bridj.Pointer;
import org.bridj.Pointer.Releaser;
import org.bridj.Pointer.StringType;
import org.bridj.PointerIO;

import com.atlascopco.hunspell.HunspellLibrary.Hunhandle;

/**
 * This class implements an object-oriented interface to the C API for Hunspell.
 * 
 * @author Thomas Joiner
 * 
 */
public class Hunspell implements Closeable {
	
	private Pointer handle;
	private Exception closedAt;
	
	/**
	 * Instantiate a hunspell object with the given dictionary and affix file
	 * @param dictionaryPath the path to the dictionary
	 * @param affixPath the path to the affix file
	 */
	public Hunspell(String dictionaryPath, String affixPath) {
		Pointer affpath = Pointer.pointerToCString(affixPath);
		Pointer dpath = Pointer.pointerToCString(dictionaryPath);
		
		handle = HunspellLibrary.Hunspell_create(affpath, dpath);
		
		if ( this.handle == null ) {
			throw new RuntimeException("Unable to instantiate Hunspell handle.");
		}
	}
	
	/**
	 * 

* Instantiate a hunspell object with the given hunzipped dictionary and * affix files. *

* *

* This is, however more complicated than it looks. Note that the paths * aren't actually to the hunzipped dictionary and affix files, they are the * paths to what they would be named if they weren't hunzipped. In other * words, if you have the files {@code /path/to/dictionary.dic.hz} and * {@code /path/to/dictionary.aff.hz} you would call * {@code new Hunspell("/path/to/dictionary.dic", "/path/to/dictionary.aff", "password")} * . Note, however, that if the paths that you give actually exist, those * will be prioritized over the hunzipped versions and will be used instead. *

* * @param dictionaryPath the path to the dictionary * @param affixPath the path to the affix file * @param key the key used to encrypt the dictionary files */ public Hunspell(String dictionaryPath, String affixPath, String key) { Pointer affpath = Pointer.pointerToCString(affixPath); Pointer dpath = Pointer.pointerToCString(dictionaryPath); Pointer keyCString = Pointer.pointerToCString(key); handle = HunspellLibrary.Hunspell_create_key(affpath, dpath, keyCString); if ( this.handle == null ) { throw new RuntimeException("Unable to instantiate Hunspell handle."); } } /** * Spellcheck the given word. * @param word the word to check * @return true if it is spelled correctly * @see HunspellLibrary#Hunspell_spell(Pointer, Pointer) */ public boolean spell(String word) { // check handle before attempting to operate on checkHandle(); checkWord("word", word); Pointer wordCString = toEncodedCString(word); int result = HunspellLibrary.Hunspell_spell(handle, wordCString); return result != 0; } /** * Same as {@link #spell(String)} * @param word the word to check. * @return true if it correct * @see #spell(String) */ public boolean isCorrect(String word) { return spell(word); } private Charset getDictionaryCharset() { return Charset.forName(getDictionaryEncoding()); } /** * Get the dictionary encoding for this object. * @return the encoding for the dictionary * @see HunspellLibrary#Hunspell_get_dic_encoding(Pointer) */ public String getDictionaryEncoding() { // check handle before attempting to operate on checkHandle(); Pointer dictionaryEncoding = HunspellLibrary.Hunspell_get_dic_encoding(handle); return dictionaryEncoding.getCString(); } /** * Suggest a list of corrections for the given word. * @param word the word to get suggestions for * @return the list of suggestions * @see HunspellLibrary#Hunspell_suggest(Pointer, Pointer, Pointer) */ public List suggest(String word) { // check handle before attempting to operate on checkHandle(); checkWord("word", word); Pointer wordCString = toEncodedCString(word); Pointer>> slst = Pointer.allocatePointerPointer(Byte.class); int numResults = 0; List suggestions = Collections.emptyList(); try { numResults = HunspellLibrary.Hunspell_suggest(handle, slst, wordCString); suggestions = encodedCStringListToStringList(slst, numResults); } finally { if ( slst != null ) { this.free_list(slst, numResults); } } return suggestions; } /** * Morphological analysis of the given word. * @param word the word to analyze * @return the analysis * @see HunspellLibrary#Hunspell_analyze(Pointer, Pointer, Pointer) */ public List analyze(String word) { // check handle before attempting to operate on checkHandle(); checkWord("word", word); Pointer wordCString = toEncodedCString(word); Pointer>> slst = Pointer.allocatePointerPointer(Byte.class); int numResults = 0; List suggestions = Collections.emptyList(); try { numResults = HunspellLibrary.Hunspell_analyze(handle, slst, wordCString); suggestions = encodedCStringListToStringList(slst, numResults); } finally { if ( slst != null ) { this.free_list(slst, numResults); } } return suggestions; } /** * Gets the stems of the word. * @param word the word * @return stems for the word * @see HunspellLibrary#Hunspell_stem(Pointer, Pointer, Pointer) */ public List stem(String word) { // check handle before attempting to operate on checkHandle(); checkWord("word", word); Pointer wordCString = toEncodedCString(word); Pointer>> slst = Pointer.allocatePointerPointer(Byte.class); int numResults = 0; List suggestions = Collections.emptyList(); try { numResults = HunspellLibrary.Hunspell_stem(handle, slst, wordCString); suggestions = encodedCStringListToStringList(slst, numResults); } finally { if ( slst != null ) { this.free_list(slst, numResults); } } return suggestions; } /** * Gets the stems of a word from the results of {@link #analyze(String)}. * @param analysis the results of {@link #analyze(String)} * @return the stem information * @see HunspellLibrary#Hunspell_stem2(Pointer, Pointer, Pointer, int) */ public List stem(List analysis) { // check handle before attempting to operate on checkHandle(); Pointer>> slst = Pointer.allocatePointerPointer(Byte.class); Pointer> analysisCStrings = toEncodedCStringList(analysis); int numResults = 0; List suggestions = Collections.emptyList(); try { numResults = HunspellLibrary.Hunspell_stem2(handle, slst, analysisCStrings, analysis.size()); suggestions = encodedCStringListToStringList(slst, numResults); } finally { if ( slst != null ) { this.free_list(slst, numResults); } } return suggestions; } /** * Generate a form for the first word based on the second word. * @param word the word to generate the form for * @param basedOn the word to base the generation on * @return the generated form * @see HunspellLibrary#Hunspell_generate(Pointer, Pointer, Pointer, Pointer) */ public List generate(String word, String basedOn) { // check handle before attempting to operate on checkHandle(); checkWord("word", word); checkWord("basedOn", basedOn); Pointer>> slst = Pointer.allocatePointerPointer(Byte.class); Pointer wordCString = toEncodedCString(word); Pointer word2CString = toEncodedCString(basedOn); int numResults = 0; List suggestions = Collections.emptyList(); try { numResults = HunspellLibrary.Hunspell_generate(handle, slst, wordCString, word2CString); suggestions = encodedCStringListToStringList(slst, numResults); } finally { if ( slst != null ) { this.free_list(slst, numResults); } } return suggestions; } /** * Generate a form for the given word based on the analysis of a second word. * @param word the word for which to generate the form * @param basedOnAnalysis the analysis of the word that it is based on * @return the generated form(s) * @see HunspellLibrary#Hunspell_generate2(Pointer, Pointer, Pointer, Pointer, int) */ public List generate(String word, List basedOnAnalysis) { // check handle before attempting to operate on checkHandle(); checkWord("word", word); Pointer>> slst = Pointer.allocatePointerPointer(Byte.class); Pointer wordCString = toEncodedCString(word); Pointer> analysisCStrings = toEncodedCStringList(basedOnAnalysis); int numResults = 0; List suggestions = Collections.emptyList(); try { numResults = HunspellLibrary.Hunspell_generate2(handle, slst, wordCString, analysisCStrings, basedOnAnalysis.size()); suggestions = encodedCStringListToStringList(slst, numResults); } finally { if ( slst != null ) { this.free_list(slst, numResults); } } return suggestions; } /** * Add an additional dictionary file (.dic file only, no affix file) * to the runtime dictionary. * @param dpath the Path to the dictionary file. * @see HunspellLibrary#Hunspell_add_dic(Pointer, Pointer) */ public void addDic(String dpath) { // check handle before attempting to operate on checkHandle(); Pointer dpathCString = Pointer.pointerToCString(dpath); int result = HunspellLibrary.Hunspell_add_dic(handle, dpathCString); if ( result != 0) { throw new RuntimeException("No available slot to add dictionary."); } } /** * Add a word to the runtime dictionary. * @param word the word to add * @see HunspellLibrary#Hunspell_add(Pointer, Pointer) */ public void add(String word) { // check handle before attempting to operate on checkHandle(); checkWord("word", word); Pointer wordCString = toEncodedCString(word); int result = HunspellLibrary.Hunspell_add(handle, wordCString); if ( result != 0 ) { throw new RuntimeException("An error occurred when calling Hunspell_add: "+result); } } /** * Add the word to the runtime dictionary with the affix flags of the given * example word so that affixed versions will be recognized as well. * * @param word the word * @param exampleWord a word that shows an example of what affix rules apply * @see HunspellLibrary#Hunspell_add_with_affix(Pointer, Pointer, Pointer) */ public void addWithAffix(String word, String exampleWord) { // check handle before attempting to operate on checkHandle(); checkWord("word", word); checkWord("exampleWord", exampleWord); Pointer wordCString = toEncodedCString(word); Pointer example = toEncodedCString(exampleWord); int result = HunspellLibrary.Hunspell_add_with_affix(handle, wordCString, example); if ( result != 0 ) { throw new RuntimeException("An error occurred when calling Hunspell_add_with_affix: "+result); } } /** * Remove a word from the runtime dictionary. * * @param word the word to remove * @see HunspellLibrary#Hunspell_remove(Pointer, Pointer) */ public void remove(String word) { // check handle before attempting to operate on checkHandle(); checkWord("word", word); Pointer wordCString = toEncodedCString(word); int result = HunspellLibrary.Hunspell_remove(handle, wordCString); if ( result != 0 ) { throw new RuntimeException("An error occurred when calling Hunspell_remove: "+result); } } /** * This method frees a list that Hunspell allocated. * @param slst the list that hunspell allocated * @param n the number of items in the list * @see HunspellLibrary#Hunspell_free_list(Pointer, Pointer, int) */ private void free_list(Pointer>> slst, int n) { HunspellLibrary.Hunspell_free_list(handle, slst, n); } /** * Ensures the given word is not too long for the library to handle it * @param parameterName the name of the parameter (for the error message) * @param value the value of the parameter */ private void checkWord(String parameterName, String value) { if ( value.length() > HunspellLibrary.MAXWORDUTF8LEN ) { throw new IllegalArgumentException("Word '"+parameterName+"' greater than max acceptable length ("+HunspellLibrary.MAXWORDUTF8LEN+"): "+value); } } /** * Checks the handle to make sure that it is still non-null. */ private void checkHandle() { if ( this.handle == null && this.closedAt != null ) { throw new IllegalStateException("This instance has already been closed.", closedAt); } else if ( this.handle == null ) { throw new IllegalStateException("Hunspell handle is null, but instance has not been closed."); } } /** * This method will handle the destruction of the Hunspell instance and * ensure that the memory is reclaimed. */ @Override public void close() { // Don't attempt to close multiple times if ( this.closedAt != null ) { return; } // Just in case the user has been messing with what they shouldn't if ( this.handle != null ) { HunspellLibrary.Hunspell_destroy(handle); } else { return; } this.handle = null; this.closedAt = new Exception(); } @Override protected void finalize() throws Throwable { if (this.closedAt == null){ this.close(); System.err.println("Hunspell instance was not closed!"); } super.finalize(); } /** * Returns the bytes for a cstring (null terminated string) in the * encoding of the dictionary. * * @param str the string to encode * @return the encoded bytes */ private byte[] encodeCStringBytes(String str) { // get the encoded bytes of the string byte[] strBytes = str.getBytes(getDictionaryCharset()); // allocate a buffer with one more byte so we can create a null-terminated // cstring byte[] cStringBytes = new byte[strBytes.length+1]; // copy the encoded bytes into the cstring buffer System.arraycopy(strBytes, 0, cStringBytes, 0, strBytes.length); // ensure that the final byte is set to null cStringBytes[cStringBytes.length-1] = 0; return cStringBytes; } /** * Returns a BridJ pointer to the encoded cstring of the * provided string. * @param str the string to encode * @return the pointer */ private Pointer toEncodedCString(String str) { byte[] cStringBytes = encodeCStringBytes(str); // convert it for use with BridJ Pointer ptrBytes = Pointer.pointerToBytes(cStringBytes); return ptrBytes; } /** * Convert a list of strings to a list of cstrings in the * dictionary encoding. * @param strings the strings to encode * @return the pointer * @see Pointer#pointerToCStrings(String...) */ @SuppressWarnings("unchecked") public Pointer> toEncodedCStringList(final List strings) { if (strings == null) return null; final int len = strings.size(); final Pointer[] pointers = (Pointer[])new Pointer[len]; Pointer> mem = Pointer.allocateArray(PointerIO.getPointerInstance(Byte.class), len, new Releaser() { //@Override public void release(Pointer p) { Pointer> mem = (Pointer>)p; Charset dictionaryCharset = getDictionaryCharset(); for (int i = 0; i < len; i++) { Pointer pp = mem.get(i); if (pp != null) strings.set(i, pp.getString(StringType.C, dictionaryCharset)); pp = pointers[i]; if (pp != null) pp.release(); } } }); for (int i = 0; i < len; i++) mem.set(i, pointers[i] = toEncodedCString(strings.get(i))); return mem; } /** * Turn a cstring encoded in the dictionary's charset to a Java string. * @param cString the cstring to decode * @return the string */ private String encodedCStringToJavaString(Pointer cString) { return cString.getStringAtOffset(0, StringType.C, getDictionaryCharset()); } /** * Turn a list of cstrings encoded in the dictionary's charset to a Java list of strings. * @param slst the results list * @param numResults the number of results in the list * @return the list of strings */ private List encodedCStringListToStringList(Pointer>> slst, int numResults) { List strings = new ArrayList(); for ( int i = 0; i < numResults; i++) { strings.add(encodedCStringToJavaString(slst.get().get(i))); } return strings; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy