All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.chenlb.mmseg4j.CharNode Maven / Gradle / Ivy

The newest version!
package com.chenlb.mmseg4j;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

/**
 * 所有词都记录在第一个字的结点下.
 * 
 * @author chenlb 2009-2-20 下午11:30:14
 */
public class CharNode {

	private int freq = -1;	//Degree of Morphemic Freedom of One-Character, 单字才需要
	private int maxLen = 0;	//wordTail的最长

	private KeyTree ktWordTails = new KeyTree();
	private int wordNum = 0;
	
	public CharNode() {
		
	}
	
	public void addWordTail(char[] wordTail) {
		ktWordTails.add(wordTail);
		wordNum++;
		if(wordTail.length > maxLen) {
			maxLen = wordTail.length;
		}
	}
	public int getFreq() {
		return freq;
	}
	
	public void setFreq(int freq) {
		this.freq = freq;
	}
	
	public int wordNum() {
		return wordNum;
	}
	
	/**
	 * @param sen 句子, 一串文本.
	 * @param offset 词在句子中的位置
	 * @param tailLen 词尾的长度, 实际是去掉词的长度.
	 * @author chenlb 2009-4-8 下午11:10:30
	 */
	public int indexOf(char[] sen, int offset, int tailLen) {
		//return binarySearch(wordTails, sen, offset+1, tailLen, casc);
		return ktWordTails.match(sen, offset+1, tailLen) ? 1 : -1;
	}
	
	/**
	 * @param sen 句子, 一串文本.
	 * @param wordTailOffset 词在句子中的位置, 实际是 offset 后面的开始找.
	 * @return 返回词尾长, 没有就是 0
	 * @author chenlb 2009-4-10 下午10:45:51
	 */
	public int maxMatch(char[] sen, int wordTailOffset) {
		return ktWordTails.maxMatch(sen, wordTailOffset);
	}
	
	/**
	 * 
	 * @return 至少返回一个包括 0的int
	 * @author chenlb 2009-4-12 上午10:01:35
	 */
	public ArrayList maxMatch(ArrayList tailLens, char[] sen, int wordTailOffset) {
		return ktWordTails.maxMatch(tailLens, sen, wordTailOffset);
	}
	
	public int getMaxLen() {
		return maxLen;
	}
	public void setMaxLen(int maxLen) {
		this.maxLen = maxLen;
	}
	
	public static class KeyTree {
		TreeNode head = new TreeNode(' ');
		
		public void add(char[] w) {
			if(w.length < 1) {
				return;
			}
			TreeNode p = head;
			for(int i=0; i maxMatch(ArrayList tailLens, char[] sen, int offset) {
			TreeNode node = head;
			for(int i=offset; i subNodes;
		boolean alsoLeaf;
		public TreeNode(char key) {
			this.key = key;
			subNodes = new HashMap();
		}
		
		public void born(char k, TreeNode sub) {
			subNodes.put(k, sub);
		}
		
		public TreeNode subNode(char k) {
			return subNodes.get(k);
		}
		public boolean isAlsoLeaf() {
			return alsoLeaf;
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy