All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.ansj.app.newWord.LearnTool Maven / Gradle / Ivy

There is a newer version: 5.1.6
Show newest version
package org.ansj.app.newWord;

import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map.Entry;

import love.cq.domain.SmartForest;
import love.cq.util.CollectionUtil;

import org.ansj.app.newWord.PatHashMap.Node;
import org.ansj.domain.NewWord;
import org.ansj.domain.TermNatures;
import org.ansj.recognition.AsianPersonRecognition;
import org.ansj.recognition.CompanyRecogntion;
import org.ansj.recognition.ForeignPersonRecognition;
import org.ansj.util.Graph;

/**
 * 新词发现,这是个线程安全的.所以可以多个对象公用一个
 * 
 * @author ansj
 * 
 */
public class LearnTool {

	/**
	 * 是否开启学习机
	 */
	public boolean isCompany = true;

	public boolean isNewWord = true;

	public boolean isAsianName = true;

	public boolean isForeignName = true;

	/**
	 * 告诉大家你学习了多少个词了
	 */
	public int count;

	/**
	 * 新词发现的结果集.可以序列化到硬盘.然后可以当做训练集来做.
	 */
	private final SmartForest sf = new SmartForest();

	/**
	 * 公司名称学习.
	 * 
	 * @param graph
	 */
	public void learn(Graph graph) {
		// 机构名识别
		if (isCompany) {
			findCompany(graph);
		}

		// 亚洲人名识别
		if (isAsianName) {
			findAsianPerson(graph);
		}

		// 外国人名识别
		if (isForeignName) {
			findForeignPerson(graph);
		}

		// 新词发现
		if (isNewWord) {
			newWordDetection(graph);
		}

	}

	private void findAsianPerson(Graph graph) {
		List newWords = new AsianPersonRecognition(graph.terms).getNewWords();
		addListToTerm(newWords);
	}

	private void findForeignPerson(Graph graph) {
		List newWords = new ForeignPersonRecognition(graph.terms).getNewWords();
		addListToTerm(newWords);
	}

	/**
	 * 公司名称查找
	 * 
	 * @param graph
	 */
	private void findCompany(Graph graph) {
		List newWords = new CompanyRecogntion(graph.terms).getNewWords();
		addListToTerm(newWords);
	}

	// 批量将新词加入到词典中
	private void addListToTerm(List newWords) {
		if (newWords.size() == 0)
			return;
		for (NewWord newWord : newWords) {
			addTerm(newWord);
		}
	}

	/**
	 * 新词发现
	 * 
	 * @param graph
	 */
	private void newWordDetection(Graph graph) {
		Collection newWords = null;
		try {
			newWords = new NewWordDetection().getNewWords(graph);
		} catch (IOException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
		if (newWords == null)
			return;
		NewWord newWord = null;
		for (Node node : newWords) {
			newWord = new NewWord(node.getName(), TermNatures.NW, node.getScore(), node.getFreq());
			addTerm(newWord);
		}
	}

	/**
	 * 增加一个新词到树中
	 * 
	 * @param newWord
	 */
	public void addTerm(NewWord newWord) {
		NewWord temp = null;
		SmartForest smartForest = null;
		if ((smartForest = sf.getBranch(newWord.getName())) != null && smartForest.getParam() != null) {
			temp = smartForest.getParam();
			temp.update(newWord.getScore(), newWord.getNature(), newWord.getAllFreq());
		} else {
			count++;
			// 设置名字为空,节省内存空间
			synchronized (sf) {
				sf.add(newWord.getName(), newWord);
			}
		}
	}

	public SmartForest getForest() {
		return this.sf;
	}

	/**
	 * 返回学习到的新词.
	 * 
	 * @param num
	 *            返回数目.0为全部返回
	 * @return
	 */
	public List> getTopTree(int num) {
		return getTopTree(num, null);
	}

	public List> getTopTree(int num, TermNatures nature) {
		if (sf.branches == null) {
			return null;
		}
		HashMap hm = new HashMap();
		for (int i = 0; i < sf.branches.length; i++) {
			valueResult(sf.branches[i], hm, nature);
		}
		List> sortMapByValue = CollectionUtil.sortMapByValue(hm, -1);
		if (num == 0) {
			return sortMapByValue;
		} else {
			num = Math.min(num, sortMapByValue.size());
			return sortMapByValue.subList(0, num);
		}
	}

	private void valueResult(SmartForest smartForest, HashMap hm, TermNatures nature) {
		// TODO Auto-generated method stub
		for (int i = 0; i < smartForest.branches.length; i++) {
			NewWord param = smartForest.branches[i].getParam();
			if (smartForest.branches[i].getStatus() == 3) {
				if (nature == null || param.getNature().equals(nature)) {
					hm.put(param.getName(), param.getScore());
				}
			} else if (smartForest.branches[i].getStatus() == 2) {
				if (nature == null || param.getNature().equals(nature)) {
					hm.put(param.getName(), param.getScore());
				}
				valueResult(smartForest.branches[i], hm, nature);
			} else {
				valueResult(smartForest.branches[i], hm, nature);
			}
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy