All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.ansj.library.CrfLibrary Maven / Gradle / Ivy

The newest version!
package org.ansj.library;

import org.ansj.app.crf.Model;
import org.ansj.app.crf.SplitWord;
import org.ansj.app.crf.model.CRFModel;
import org.ansj.dic.PathToStream;
import org.ansj.domain.KV;
import org.ansj.util.MyStaticValue;
import org.nlpcn.commons.lang.util.logging.Log;

import java.io.InputStream;
import java.util.HashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

public class CrfLibrary {

	private static final Log LOG = MyStaticValue.getLog(CrfLibrary.class);

	// CRF模型
	private static final Map> CRF = new HashMap<>();

	public static final String DEFAULT = "crf";

	static {
		for (Entry entry : MyStaticValue.ENV.entrySet()) {
			if (entry.getKey().startsWith(DEFAULT)) {
				put(entry.getKey(), entry.getValue());
			}
		}
		putIfAbsent(DEFAULT, "jar://crf.model");
	}

	public static SplitWord get() {
		return get(DEFAULT);
	}

	/**
	 * 根据key获取crf分词器
	 * 
	 * @param key
	 * @return crf分词器
	 */
	public static SplitWord get(String key) {

		KV kv = CRF.get(key);

		if (kv == null) {
			if (MyStaticValue.ENV.containsKey(key)) {
				putIfAbsent(key, MyStaticValue.ENV.get(key));
				return get(key);
			}
			LOG.warn("crf " + key + " not found in config ");
			return null;
		}

		SplitWord sw = kv.getV();
		if (sw == null) {
			sw = initCRFModel(kv);
		}
		return sw;
	}

	/**
	 * 加载CRF模型
	 * 
	 * @param modelPath
	 * @return
	 */
	private static synchronized SplitWord initCRFModel(KV kv) {
		try {
			if (kv.getV() != null) {
				return kv.getV();
			}

			long start = System.currentTimeMillis();
			LOG.debug("begin init crf model!");
			try (InputStream is = PathToStream.stream(kv.getK())) {
				SplitWord crfSplitWord = new SplitWord(Model.load(CRFModel.class, is));
				kv.setV(crfSplitWord);
				LOG.info("load crf use time:" + (System.currentTimeMillis() - start) + " path is : " + kv.getK());
				return crfSplitWord;
			}
		} catch (Exception e) {
			LOG.error(kv + " load err " + e.getMessage());
			return null;
		}
	}

	/**
	 * 动态添加
	 * 
	 * @param dicDefault
	 * @param dicDefault2
	 * @param dic2
	 */
	public static void put(String key, String path) {

		put(key, path, null);
	}

	public static void put(String key, String path, SplitWord sw) {
		CRF.put(key, KV.with(path, sw));
		MyStaticValue.ENV.put(key, path);
	}

	/**
	 * 删除一个key
	 * 
	 * @param key
	 * @return
	 */
	public static KV remove(String key) {
		MyStaticValue.ENV.remove(key) ;
		return CRF.remove(key);
	}

	/**
	 * 刷新一个,将值设置为null
	 * 
	 * @param key
	 * @return
	 */
	public static void reload(String key) {
		KV kv = CRF.get(key);
		if (kv != null) {
			CRF.get(key).setV(null);
		}

		LOG.warn("make sure ,this reload not use same obj , it to instance a new model");
	}

	public static Set keys() {
		return CRF.keySet();
	}

	public static void putIfAbsent(String key, String path) {
		if (!CRF.containsKey(key)) {
			CRF.put(key, KV.with(path, (SplitWord) null));
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy