All Downloads are FREE. Search and download functionalities are using the official Maven repository.

monad.mmseg.CharNode Maven / Gradle / Ivy

There is a newer version: 5.1.0
Show newest version
package monad.mmseg;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Map;

/**
 * 所有词都记录在第一个字的结点下.
 *
 * @author chenlb 2009-2-20 下午11:30:14
 */
public class CharNode {

    private int freq = -1;    //Degree of Morphemic Freedom of One-Character, 单字才需要
    private int maxLen = 0;    //wordTail的最长

    private KeyTree ktWordTails = new KeyTree();
    private int wordNum = 0;

    public CharNode() {

    }

    public void addWordTail(char[] wordTail) {
        ktWordTails.add(wordTail);
        wordNum++;
        if (wordTail.length > maxLen) {
            maxLen = wordTail.length;
        }
    }

    public int getFreq() {
        return freq;
    }

    public void setFreq(int freq) {
        this.freq = freq;
    }

    public int wordNum() {
        return wordNum;
    }

    /**
     * @param sen     句子, 一串文本.
     * @param offset  词在句子中的位置
     * @param tailLen 词尾的长度, 实际是去掉词的长度.
     * @author chenlb 2009-4-8 下午11:10:30
     */
    public int indexOf(char[] sen, int offset, int tailLen) {
        //return binarySearch(wordTails, sen, offset+1, tailLen, casc);
        return ktWordTails.match(sen, offset + 1, tailLen) ? 1 : -1;
    }

    /**
     * @param sen            句子, 一串文本.
     * @param wordTailOffset 词在句子中的位置, 实际是 offset 后面的开始找.
     * @return 返回词尾长, 没有就是 0
     * @author chenlb 2009-4-10 下午10:45:51
     */
    public int maxMatch(char[] sen, int wordTailOffset) {
        return ktWordTails.maxMatch(sen, wordTailOffset);
    }

    /**
     * @return 至少返回一个包括 0的int
     * @author chenlb 2009-4-12 上午10:01:35
     */
    public ArrayList maxMatch(ArrayList tailLens, char[] sen, int wordTailOffset) {
        return ktWordTails.maxMatch(tailLens, sen, wordTailOffset);
    }

    public int getMaxLen() {
        return maxLen;
    }

    public void setMaxLen(int maxLen) {
        this.maxLen = maxLen;
    }

    public static class KeyTree {
        TreeNode head = new TreeNode(' ');

        public void add(char[] w) {
            if (w.length < 1) {
                return;
            }
            TreeNode p = head;
            for (int i = 0; i < w.length; i++) {
                TreeNode n = p.subNode(w[i]);
                if (n == null) {
                    n = new TreeNode(w[i]);
                    p.born(w[i], n);
                }
                p = n;
            }
            p.alsoLeaf = true;
        }

        /**
         * @return 返回匹配最长词的长度, 没有找到返回 0.
         */
        public int maxMatch(char[] sen, int offset) {
            int idx = offset - 1;
            TreeNode node = head;
            for (int i = offset; i < sen.length; i++) {
                node = node.subNode(sen[i]);
                if (node != null) {
                    if (node.isAlsoLeaf()) {
                        idx = i;
                    }
                } else {
                    break;
                }
            }
            return idx - offset + 1;
        }

        public ArrayList maxMatch(ArrayList tailLens, char[] sen, int offset) {
            TreeNode node = head;
            for (int i = offset; i < sen.length; i++) {
                node = node.subNode(sen[i]);
                if (node != null) {
                    if (node.isAlsoLeaf()) {
                        tailLens.add(i - offset + 1);
                    }
                } else {
                    break;
                }
            }
            return tailLens;
        }

        public boolean match(char[] sen, int offset, int len) {
            TreeNode node = head;
            for (int i = 0; i < len; i++) {
                node = node.subNode(sen[offset + i]);
                if (node == null) {
                    return false;
                }
            }
            return node.isAlsoLeaf();
        }
    }

    private static class TreeNode {
        char key;
        Map subNodes;
        boolean alsoLeaf;

        public TreeNode(char key) {
            this.key = key;
            subNodes = new HashMap();
        }

        public void born(char k, TreeNode sub) {
            subNodes.put(k, sub);
        }

        public TreeNode subNode(char k) {
            return subNodes.get(k);
        }

        public boolean isAlsoLeaf() {
            return alsoLeaf;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy