All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.ansj.recognition.arrimpl.UserDefineRecognition Maven / Gradle / Ivy

There is a newer version: 5.1.6
Show newest version
package org.ansj.recognition.arrimpl;

import org.ansj.domain.Term;
import org.ansj.domain.TermNature;
import org.ansj.domain.TermNatures;
import org.ansj.library.UserDefineLibrary;
import org.ansj.recognition.TermArrRecognition;
import org.ansj.util.MyStaticValue;
import org.ansj.util.TermUtil;
import org.ansj.util.TermUtil.InsertTermType;
import org.nlpcn.commons.lang.tire.domain.Forest;
import org.nlpcn.commons.lang.tire.domain.SmartForest;
import org.nlpcn.commons.lang.util.logging.Log;

/**
 * 用户自定义词典.又称补充词典
 * 
 * @author ansj
 * 
 */
public class UserDefineRecognition implements TermArrRecognition {

	public static final Log logger = MyStaticValue.getLog();

	private Term[] terms = null;

	private Forest[] forests = { UserDefineLibrary.FOREST };

	private int offe = -1;
	private int endOffe = -1;
	private int tempFreq = 50;
	private String tempNature;

	private SmartForest branch = null;
	private SmartForest forest = null;

	private InsertTermType type = InsertTermType.SKIP;

	public UserDefineRecognition(InsertTermType type, Forest... forests) {
		this.type = type;
		if (forests != null && forests.length > 0) {
			this.forests = forests;
		}

	}

	public void recognition(Term[] terms) {
		this.terms = terms;
		for (Forest forest : forests) {
			if (forest == null) {
				continue;
			}
			reset();
			this.forest = forest;

			branch = forest;

			int length = terms.length - 1;

			boolean flag = true;
			for (int i = 0; i < length; i++) {
				if (terms[i] == null)
					continue;
				if (branch == forest) {
					flag = false;
				} else {
					flag = true;
				}

				branch = termStatus(branch, terms[i]);
				if (branch == null) {
					if (offe != -1) {
						i = offe;
					}
					reset();
				} else if (branch.getStatus() == 3) {
					endOffe = i;
					tempNature = branch.getParam()[0];
					tempFreq = getInt(branch.getParam()[1], 50);
					if (offe != -1 && offe < endOffe) {
						i = offe;
						makeNewTerm();
						reset();
					} else {
						reset();
					}
				} else if (branch.getStatus() == 2) {
					endOffe = i;
					if (offe == -1) {
						offe = i;
					} else {
						tempNature = branch.getParam()[0];
						tempFreq = getInt(branch.getParam()[1], 50);
						if (flag) {
							makeNewTerm();
						}
					}
				} else if (branch.getStatus() == 1) {
					if (offe == -1) {
						offe = i;
					}
				}
			}
			if (offe != -1 && offe < endOffe) {
				makeNewTerm();
			}
		}
	}

	private int getInt(String str, int def) {
		try {
			return Integer.parseInt(str);
		} catch (NumberFormatException e) {
			logger.warn(str + "不是一个数字", e);
			return def;
		}
	}

	private void makeNewTerm() {
		StringBuilder sb = new StringBuilder();
		for (int j = offe; j <= endOffe; j++) {
			if (terms[j] == null) {
				continue;
			} else {
				sb.append(terms[j].getName());
			}
		}
		TermNatures termNatures = new TermNatures(new TermNature(tempNature, tempFreq));
		Term term = new Term(sb.toString(), offe, termNatures);
		term.selfScore(-1 * tempFreq);
		TermUtil.insertTerm(terms, term, type);
	}

	/**
	 * 重置
	 */
	private void reset() {
		offe = -1;
		endOffe = -1;
		tempFreq = 50;
		tempNature = null;
		branch = forest;
	}

	/**
	 * 传入一个term 返回这个term的状态
	 * 
	 * @param branch
	 * @param term
	 * @return
	 */
	private SmartForest termStatus(SmartForest branch, Term term) {
		String name = term.getName();
		SmartForest sf = branch;
		for (int j = 0; j < name.length(); j++) {
			sf = sf.get(name.charAt(j));
			if (sf == null) {
				return null;
			}
		}
		return sf;
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy