com.chenlb.mmseg4j.CharNode Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mmseg4j-core Show documentation
Show all versions of mmseg4j-core Show documentation
MMSEG cor for java chinese analyzer
The newest version!
package com.chenlb.mmseg4j;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;
/**
* 所有词都记录在第一个字的结点下.
*
* @author chenlb 2009-2-20 下午11:30:14
*/
public class CharNode {
private int freq = -1; //Degree of Morphemic Freedom of One-Character, 单字才需要
private int maxLen = 0; //wordTail的最长
private KeyTree ktWordTails = new KeyTree();
private int wordNum = 0;
public CharNode() {
}
public void addWordTail(char[] wordTail) {
ktWordTails.add(wordTail);
wordNum++;
if(wordTail.length > maxLen) {
maxLen = wordTail.length;
}
}
public int getFreq() {
return freq;
}
public void setFreq(int freq) {
this.freq = freq;
}
public int wordNum() {
return wordNum;
}
/**
* @param sen 句子, 一串文本.
* @param offset 词在句子中的位置
* @param tailLen 词尾的长度, 实际是去掉词的长度.
* @author chenlb 2009-4-8 下午11:10:30
*/
public int indexOf(char[] sen, int offset, int tailLen) {
//return binarySearch(wordTails, sen, offset+1, tailLen, casc);
return ktWordTails.match(sen, offset+1, tailLen) ? 1 : -1;
}
/**
* @param sen 句子, 一串文本.
* @param wordTailOffset 词在句子中的位置, 实际是 offset 后面的开始找.
* @return 返回词尾长, 没有就是 0
* @author chenlb 2009-4-10 下午10:45:51
*/
public int maxMatch(char[] sen, int wordTailOffset) {
return ktWordTails.maxMatch(sen, wordTailOffset);
}
/**
*
* @return 至少返回一个包括 0的int
* @author chenlb 2009-4-12 上午10:01:35
*/
public ArrayList maxMatch(ArrayList tailLens, char[] sen, int wordTailOffset) {
return ktWordTails.maxMatch(tailLens, sen, wordTailOffset);
}
public int getMaxLen() {
return maxLen;
}
public void setMaxLen(int maxLen) {
this.maxLen = maxLen;
}
public static class KeyTree {
TreeNode head = new TreeNode(' ');
public void add(char[] w) {
if(w.length < 1) {
return;
}
TreeNode p = head;
for(int i=0; i maxMatch(ArrayList tailLens, char[] sen, int offset) {
TreeNode node = head;
for(int i=offset; i subNodes;
boolean alsoLeaf;
public TreeNode(char key) {
this.key = key;
subNodes = new HashMap();
}
public void born(char k, TreeNode sub) {
subNodes.put(k, sub);
}
public TreeNode subNode(char k) {
return subNodes.get(k);
}
public boolean isAlsoLeaf() {
return alsoLeaf;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy