All Downloads are FREE. Search and download functionalities are using the official Maven repository.

morfologik.stemming.Dictionary Maven / Gradle / Ivy

There is a newer version: 2.1.9
Show newest version
package morfologik.stemming;

import java.io.*;
import java.net.URL;
import java.util.*;

import morfologik.fsa.FSA;
import morfologik.util.FileUtils;
import morfologik.util.ResourceUtils;

/**
 * A dictionary combines {@link FSA} automaton and metadata describing the
 * internals of dictionary entries' coding ({@link DictionaryMetadata}.
 * 
 * 

* A dictionary consists of two files: *

    *
  • an actual compressed FSA file, *
  • a metadata file, describing the dictionary. *
* Use static methods in this class to read dictionaries and their metadata. */ public final class Dictionary { /** * Expected metadata file extension. */ public final static String METADATA_FILE_EXTENSION = "info"; /** * {@link FSA} automaton with the compiled dictionary data. */ public final FSA fsa; /** * Metadata associated with the dictionary. */ public final DictionaryMetadata metadata; /** * Default loaded dictionaries. */ public static final WeakHashMap defaultDictionaries = new WeakHashMap(); /** * It is strongly recommended to use static methods in this class for * reading dictionaries. * * @param fsa * An instantiated {@link FSA} instance. * * @param metadata * A map of attributes describing the compression format and * other settings not contained in the FSA automaton. For an * explanation of available attributes and their possible values, * see {@link DictionaryMetadata}. */ public Dictionary(FSA fsa, DictionaryMetadata metadata) { this.fsa = fsa; this.metadata = metadata; } /** * Attempts to load a dictionary using the path to the FSA file and the * expected metadata extension. */ public static Dictionary read(File fsaFile) throws IOException { final File featuresFile = new File(fsaFile.getParent(), getExpectedFeaturesName(fsaFile.getName())); FileUtils.assertExists(featuresFile, true, false); return readAndClose( new FileInputStream(fsaFile), new FileInputStream(featuresFile)); } /** *

* Attempts to load a dictionary using the URL to the FSA file and the * expected metadata extension. * *

* This method can be used to load resource-based dictionaries, but be aware * of JAR resource-locking issues that arise from resource URLs. */ public static Dictionary read(URL fsaURL) throws IOException { final String fsa = fsaURL.toExternalForm(); final String features = getExpectedFeaturesName(fsa); return readAndClose( ResourceUtils.openInputStream(fsa), ResourceUtils.openInputStream(features)); } /** * Attempts to load a dictionary from opened streams of FSA dictionary data * and associated metadata. */ public static Dictionary readAndClose(InputStream fsaData, InputStream featuresData) throws IOException { try { final Properties properties = new Properties(); properties.load(new InputStreamReader(featuresData, "UTF-8")); Map map = new HashMap(); for (Enumeration e = properties.propertyNames(); e.hasMoreElements();) { String key = (String) e.nextElement(); map.put(DictionaryAttribute.fromPropertyName(key), properties.getProperty(key)); } final DictionaryMetadata features = new DictionaryMetadata(map); final FSA fsa = FSA.read(fsaData); return new Dictionary(fsa, features); } finally { FileUtils.close(fsaData, featuresData); } } /** * Returns the expected name of the metadata file, based on the name of the * FSA dictionary file. The expected name is resolved by truncating any * suffix of name and appending * {@link #METADATA_FILE_EXTENSION}. */ public static String getExpectedFeaturesName(String name) { final int dotIndex = name.lastIndexOf('.'); final String featuresName; if (dotIndex >= 0) { featuresName = name.substring(0, dotIndex) + "." + METADATA_FILE_EXTENSION; } else { featuresName = name + "." + METADATA_FILE_EXTENSION; } return featuresName; } /** * Return a built-in dictionary for a given ISO language code. Dictionaries * are cached internally for potential reuse. * * @throws RuntimeException * Throws a {@link RuntimeException} if the dictionary is not * bundled with the library. */ public static Dictionary getForLanguage(String languageCode) { if (languageCode == null || "".equals(languageCode)) { throw new IllegalArgumentException( "Language code must not be empty."); } synchronized (defaultDictionaries) { Dictionary dict = defaultDictionaries.get(languageCode); if (dict != null) return dict; try { final String dictPath = "morfologik/dictionaries/" + languageCode + ".dict"; final String metaPath = Dictionary .getExpectedFeaturesName(dictPath); dict = Dictionary.readAndClose( ResourceUtils.openInputStream(dictPath), ResourceUtils.openInputStream(metaPath)); defaultDictionaries.put(languageCode, dict); return dict; } catch (IOException e) { throw new RuntimeException( "Default dictionary resource for language '" + languageCode + "not found.", e); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy