All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.ansj.library.StopLibrary Maven / Gradle / Ivy

The newest version!
package org.ansj.library;

import org.ansj.dic.PathToStream;
import org.ansj.domain.KV;
import org.ansj.recognition.impl.StopRecognition;
import org.ansj.util.MyStaticValue;
import org.nlpcn.commons.lang.util.IOUtil;
import org.nlpcn.commons.lang.util.StringUtil;
import org.nlpcn.commons.lang.util.logging.Log;
import org.nlpcn.commons.lang.util.logging.LogFactory;

import java.io.BufferedReader;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

public class StopLibrary {

	private static final Log LOG = LogFactory.getLog();

	public static final String DEFAULT = "stop";

	// 用户自定义词典
	private static final Map> STOP = new HashMap<>();

	static {
		for (Entry entry : MyStaticValue.ENV.entrySet()) {
			if (entry.getKey().startsWith(DEFAULT)) {
				put(entry.getKey(), entry.getValue());
			}
		}
		putIfAbsent(DEFAULT, "library/stop.dic");
	}

	/**
	 * 词性过滤
	 * 
	 * @param key
	 * @param filterNatures
	 */
	public static void insertStopNatures(String key, String... filterNatures) {
		StopRecognition fr = get(key);
		fr.insertStopNatures(filterNatures);
	}

	/**
	 * 正则过滤
	 * 
	 * @param key
	 * @param regexes
	 */
	public static void insertStopRegexes(String key, String... regexes) {
		StopRecognition fr = get(key);
		fr.insertStopRegexes(regexes);
	}

	/**
	 * 增加停用词
	 * 
	 * @param key
	 * @param regexes
	 */
	public static void insertStopWords(String key, String... stopWords) {
		StopRecognition fr = get(key);
		fr.insertStopWords(stopWords);
	}

	/**
	 * 增加停用词
	 * 
	 * @param key
	 * @param regexes
	 */
	public static void insertStopWords(String key, List stopWords) {
		StopRecognition fr = get(key);
		fr.insertStopWords(stopWords);
	}

	public static StopRecognition get() {
		return get(DEFAULT);
	}

	/**
	 * 根据模型名称获取crf模型
	 * 
	 * @param modelName
	 * @return
	 */
	public static StopRecognition get(String key) {
		KV kv = STOP.get(key);

		if (kv == null) {
			if (MyStaticValue.ENV.containsKey(key)) {
				putIfAbsent(key, MyStaticValue.ENV.get(key));
				return get(key);
			}
			LOG.warn("STOP " + key + " not found in config ");
			return null;
		}
		StopRecognition stopRecognition = kv.getV();
		if (stopRecognition == null) {
			stopRecognition = init(key, kv, false);
		}
		return stopRecognition;

	}

	/**
	 * 用户自定义词典加载
	 * 
	 * @param key
	 * @param path
	 * @return
	 */
	private synchronized static StopRecognition init(String key, KV kv, boolean reload) {
		StopRecognition stopRecognition = kv.getV();

		if (stopRecognition != null) {
			if (reload) {
				stopRecognition.clear();
			} else {
				return stopRecognition;
			}
		} else {
			stopRecognition = new StopRecognition();
		}

		try {
			LOG.debug("begin init FILTER !");
			long start = System.currentTimeMillis();
			String temp = null;
			String[] strs = null;
			try (BufferedReader br = IOUtil.getReader(PathToStream.stream(kv.getK()), "UTF-8")) {
				while ((temp = br.readLine()) != null) {
					if (StringUtil.isNotBlank(temp)) {
						strs = temp.split("\t");

						if (strs.length == 1) {
							stopRecognition.insertStopWords(strs[0]);
						} else {
							switch (strs[1]) {
							case "nature":
								stopRecognition.insertStopNatures(strs[0]);
								break;
							case "regex":
								stopRecognition.insertStopRegexes(strs[0]);
								break;
							default:
								stopRecognition.insertStopWords(strs[0]);
								break;
							}
						}

					}
				}
			}
			LOG.info("load stop use time:" + (System.currentTimeMillis() - start) + " path is : " + kv.getK());
			kv.setV(stopRecognition);
			return stopRecognition;
		} catch (Exception e) {
			LOG.error("Init Stop library error :" + e.getMessage() + ", path: " + kv.getK());
			STOP.remove(key);
			return null;
		}
	}

	/**
	 * 动态添加词典
	 * 
	 * @param FILTERDefault
	 * @param FILTERDefault2
	 * @param FILTER2
	 */
	public static void put(String key, String path, StopRecognition stopRecognition) {
		STOP.put(key, KV.with(path, stopRecognition));
		MyStaticValue.ENV.put(key, path);
	}

	/**
	 * 动态添加词典
	 * 
	 * @param FILTERDefault
	 * @param FILTERDefault2
	 * @param FILTER2
	 */
	public static void putIfAbsent(String key, String path) {
		if (!STOP.containsKey(key)) {
			STOP.put(key, KV.with(path, (StopRecognition) null));
		}
	}

	/**
	 * 动态添加词典
	 * 
	 * @param FILTERDefault
	 * @param FILTERDefault2
	 * @param FILTER2
	 */
	public static void put(String key, String path) {
		put(key, path, null);
	}

	/**
	 * 动态添加词典
	 * 
	 * @param 
	 * @param 
	 * 
	 * @param FILTERDefault
	 * @param FILTERDefault2
	 * @param FILTER2
	 */
	public static synchronized StopRecognition putIfAbsent(String key, String path, StopRecognition stopRecognition) {
		KV kv = STOP.get(key);
		if (kv != null && kv.getV() != null) {
			return kv.getV();
		}
		put(key, path, stopRecognition);
		return stopRecognition;
	}

	public static KV remove(String key) {
		KV kv = STOP.get(key);
		if (kv != null && kv.getV() != null) {
			kv.getV().clear();
		}
		MyStaticValue.ENV.remove(key) ;
		return STOP.remove(key);
	}

	public static Set keys() {
		return STOP.keySet();
	}

	public static void reload(String key) {

		if (!MyStaticValue.ENV.containsKey(key)) { //如果变量中不存在直接删掉这个key不解释了
			remove(key);
		}

		putIfAbsent(key, MyStaticValue.ENV.get(key));

		KV kv = STOP.get(key);

		init(key, kv, true);
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy