All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.thihy.es.analysis.paoding.dict.FileDictionariesLoader Maven / Gradle / Ivy

package com.thihy.es.analysis.paoding.dict;

import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.UnsupportedEncodingException;
import java.util.EnumMap;
import java.util.Locale;
import java.util.Map;
import java.util.Map.Entry;

import net.paoding.analysis.dictionary.Dictionary;
import net.paoding.analysis.knife.Dictionaries;

import org.elasticsearch.common.Classes;
import org.elasticsearch.common.collect.ImmutableMap;
import org.elasticsearch.common.collect.Maps;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.inject.assistedinject.Assisted;
import org.elasticsearch.common.logging.ESLogger;
import org.elasticsearch.common.logging.Loggers;
import org.elasticsearch.common.settings.Settings;

public class FileDictionariesLoader implements DictionariesLoader {
	private static final ESLogger LOG = Loggers.getLogger(FileDictionariesLoader.class);

	private static final String DEFAULT_DICT_RESOURCE_DIR = "com/thihy/es/analysis/paoding/dict";

	private static final String DEFAULT_FILE_SUFFIX = ".dic";
	private static final EnumMap FILE_NAME_CONFIGS;
	private static final boolean DEFAULT_FAIL_IF_ERROR = false;

	static {
		EnumMap tmpFileNameConfigs = new EnumMap<>(DictionaryType.class);
		tmpFileNameConfigs.put(DictionaryType.WORDS, "words");
		tmpFileNameConfigs.put(DictionaryType.STOP, "stop");
		tmpFileNameConfigs.put(DictionaryType.NOISE, "noise");
		tmpFileNameConfigs.put(DictionaryType.UNIT, "unit");
		tmpFileNameConfigs.put(DictionaryType.SURNAME, "surname");
		tmpFileNameConfigs.put(DictionaryType.COMB, "comb");
		FILE_NAME_CONFIGS = tmpFileNameConfigs;
	}

	private final DictionaryLoader dictionaryLoader;
	private final String path;
	private final String fileSuffix;
	private final Map fileNames;
	private final boolean failIfError;

	@Inject
	public FileDictionariesLoader(DictionaryLoader dictionaryLoader, @Assisted DictionariesLoadContext context) {
		super();
		Settings settings = context.getDictSettings();
		this.dictionaryLoader = dictionaryLoader;
		this.failIfError = settings.getAsBoolean("fail_if_error", settings.getAsBoolean("failIfError", DEFAULT_FAIL_IF_ERROR));
		this.path = settings.get("path");
		if (this.path == null) {
			this.fileSuffix = DEFAULT_FILE_SUFFIX;
			this.fileNames = ImmutableMap.copyOf(FILE_NAME_CONFIGS);
		} else {
			this.fileSuffix = settings.get("suffix", DEFAULT_FILE_SUFFIX);
			Map tmpFileNames = Maps.newHashMap();
			for (Entry fileNameConfigEntry : FILE_NAME_CONFIGS.entrySet()) {
				String fileName = settings.get(fileNameConfigEntry.getValue(), fileNameConfigEntry.getValue());
				tmpFileNames.put(fileNameConfigEntry.getKey(), fileName);
			}
			fileNames = ImmutableMap.copyOf(tmpFileNames);
		}
	}

	@Override
	public Dictionaries load() throws IOException {
		MappedDictionaries dictionaries = new MappedDictionaries();
		for (DictionaryType dictionaryType : DictionaryType.values()) {
			String fileName = fileNames.get(dictionaryType);
			if (fileName == null) {
				fileName = dictionaryType.name().toLowerCase(Locale.ROOT);
			}
			Dictionary dictionary = loadDictionary(dictionaryType, fileName);
			if (dictionary != null) {
				dictionaries.setDictionary(dictionaryType, dictionary);
			}
		}
		return dictionaries;
	}

	private Dictionary loadDictionary(DictionaryType dictionaryType, String fileName) throws IOException {
		InputStream in = null;
		String localPath = this.path;
		if (localPath == null) {
			localPath = DEFAULT_DICT_RESOURCE_DIR;
		}

		String filePath;
		if (!localPath.endsWith("/")) {
			filePath = localPath + "/" + fileName + fileSuffix;
		} else {
			filePath = localPath + fileName + fileSuffix;
		}

		File file = new File(filePath);
		if (file.exists() && file.isFile()) {
			try {
				in = new FileInputStream(file);
			} catch (FileNotFoundException e) {
				logExceptionWhenLoadDictonary(dictionaryType, file, e);
			}
		} else {
			ClassLoader classLoader = this.getClass().getClassLoader();
			if (classLoader == null) {
				classLoader = Classes.getDefaultClassLoader();
			}
			in = classLoader.getResourceAsStream(filePath);
		}
		if (in == null) {
			LOG.debug("Skip to load [{}] dict from path [{}].", dictionaryType, file);
			return null;
		}
		try {
			LOG.debug("Begin to load [{}] dict from file [{}].", dictionaryType, file);
			BufferedReader reader = new BufferedReader(new InputStreamReader(in, "UTF-8"));
			return dictionaryLoader.loadDictionary(reader);
		} catch (UnsupportedEncodingException e) {
			logExceptionWhenLoadDictonary(dictionaryType, file, e);
		} catch (IOException e) {
			logOrRethrowExceptionWhenLoadDictonary(dictionaryType, file, e);
		} finally {
			try {
				in.close();
			} catch (IOException e) {
				logOrRethrowExceptionWhenLoadDictonary(dictionaryType, file, e);
			} finally {
				LOG.debug("End to load [{}] dict from file [{}].", dictionaryType, file);
			}
		}
		return null;
	}

	private  void logOrRethrowExceptionWhenLoadDictonary(DictionaryType dictionaryType, File file, T exception)
			throws T {
		if (failIfError) {
			throw exception;
		} else {
			logExceptionWhenLoadDictonary(dictionaryType, file, exception);
		}
	}

	private  void logExceptionWhenLoadDictonary(DictionaryType dictionaryType, File file, T exception) {
		LOG.warn("Failed to read [{}] dictionary, the file path is [{}].", exception, dictionaryType, file);
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy