All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.ansj.recognition.arrimpl.UserDefineRecognition Maven / Gradle / Ivy

The newest version!
package org.ansj.recognition.arrimpl;

import org.ansj.domain.Term;
import org.ansj.domain.TermNature;
import org.ansj.domain.TermNatures;
import org.ansj.library.DicLibrary;
import org.ansj.recognition.TermArrRecognition;
import org.ansj.util.Graph;
import org.ansj.util.TermUtil;
import org.ansj.util.TermUtil.InsertTermType;
import org.nlpcn.commons.lang.tire.domain.Forest;
import org.nlpcn.commons.lang.tire.domain.SmartForest;
import org.nlpcn.commons.lang.util.logging.Log;
import org.nlpcn.commons.lang.util.logging.LogFactory;

/**
 * 用户自定义词典.又称补充词典
 *
 * @author ansj
 */
public class UserDefineRecognition implements TermArrRecognition {

	public static final Log logger = LogFactory.getLog(UserDefineRecognition.class);

	private Term[] terms = null;

	private Forest[] forests = {DicLibrary.get()};

	private int offe = -1;
	private int endOffe = -1;
	private int tempFreq = 50;
	private String tempNature;

	private SmartForest branch = null;
	private SmartForest forest = null;

	private InsertTermType type = InsertTermType.SKIP;

	public UserDefineRecognition(InsertTermType type, Forest... forests) {
		this.type = type;
		if (forests != null && forests.length > 0) {
			this.forests = forests;
		}

	}

	@Override
	public void recognition(Graph graph) {
		this.terms = graph.terms;
		for (Forest forest : forests) {
			if (forest == null || forest.branches==null) {
				continue;
			}
			reset();
			this.forest = forest;

			branch = forest;

			int length = terms.length - 1;

			boolean flag = true;
			for (int i = 0; i < length; i++) {
				if (terms[i] == null) {
					continue;
				}
				if (branch == forest) {
					flag = false;
				} else {
					flag = true;
				}

				branch = termStatus(branch, terms[i]);
				if (branch == null) {
					if (offe != -1) {
						i = offe;
					}
					reset();
				} else if (branch.getStatus() == 3) {
					endOffe = i;
					tempNature = branch.getParam()[0];
					tempFreq = getInt(branch.getParam()[1], 50);
					if (offe != -1 && offe < endOffe) {
						i = offe;
						makeNewTerm();
						reset();
					} else {
						reset();
					}
				} else if (branch.getStatus() == 2) {
					endOffe = i;
					if (offe == -1) {
						offe = i;
					} else {
						tempNature = branch.getParam()[0];
						tempFreq = getInt(branch.getParam()[1], 50);
						if (flag) {
							makeNewTerm();
						}
					}
				} else if (branch.getStatus() == 1) {
					if (offe == -1) {
						offe = i;
					}
				}
			}
			if (offe != -1 && offe < endOffe) {
				makeNewTerm();
			}
		}
	}

	private int getInt(String str, int def) {
		try {
			return Integer.parseInt(str);
		} catch (NumberFormatException e) {
			logger.warn(str + "不是一个数字", e);
			return def;
		}
	}

	private void makeNewTerm() {
		StringBuilder sb = new StringBuilder();
		for (int j = offe; j <= endOffe; j++) {
			if (terms[j] == null) {
				continue;
			} else {
				sb.append(terms[j].getName());
			}
		}

		TermNatures termNatures = new TermNatures(new TermNature(tempNature, tempFreq));
		Term term = new Term(sb.toString(), offe, termNatures);
		term.selfScore(-1 * tempFreq);
		TermUtil.insertTerm(terms, term, type);

		if (terms[offe].getRealNameIfnull() != null) { //后面增加了非原生graph的合并,所以需要补充realname
			StringBuilder sb1 = new StringBuilder();
			for (int j = offe; j <= endOffe; j++) {
				if (terms[j] == null) {
					continue;
				} else {
					sb1.append(terms[j].getRealName());
				}
			}
			term.setRealName(sb1.toString());
		}


	}

	/**
	 * 重置
	 */
	private void reset() {
		offe = -1;
		endOffe = -1;
		tempFreq = 50;
		tempNature = null;
		branch = forest;
	}

	/**
	 * 传入一个term 返回这个term的状态
	 *
	 * @param branch
	 * @param term
	 * @return
	 */
	private SmartForest termStatus(SmartForest branch, Term term) {
		String name = term.getName();
		SmartForest sf = branch;
		for (int j = 0; j < name.length(); j++) {
			sf = sf.get(name.charAt(j));
			if (sf == null) {
				return null;
			}
		}
		return sf;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy