All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.tinygroup.chinese.parsermanager.WordParserManagerImpl Maven / Gradle / Ivy

The newest version!
package org.tinygroup.chinese.parsermanager;

import java.util.List;

import org.tinygroup.binarytree.AVLTree;
import org.tinygroup.binarytree.impl.AVLTreeImpl;
import org.tinygroup.chinese.Character;
import org.tinygroup.chinese.ParserException;
import org.tinygroup.chinese.SentenceRank;
import org.tinygroup.chinese.Word;
import org.tinygroup.chinese.WordParserManager;
import org.tinygroup.chinese.WordParserType;

/**
 * Created by luog on 15/4/14.
 */
public class WordParserManagerImpl implements WordParserManager {
    private AVLTree characterAVLTree = new AVLTreeImpl();
    private AVLTree wordAscAVLTree = new AVLTreeImpl();
    private AVLTree wordDescAVLTree = new AVLTreeImpl();
    private AVLTree stopWordAVLTree = new AVLTreeImpl();
    //private SentenceRank sentenceRank;


    public void addStopWord(String word) {
        stopWordAVLTree.add(word);
    }

    public void addCharacter(Character character) {
        characterAVLTree.add(character);
    }

    public void addCharacter(char character) {
        characterAVLTree.add(new Character(character));
    }

    public void addStopWords(List stopWordList) {
        for (String stopWord : stopWordList) {
            stopWordAVLTree.add(stopWord);
        }
    }

    public void addWord(Word word) {
        addWord(wordDescAVLTree, null, word, word.getReverseWord().getWord(), 0);
        addWord(wordAscAVLTree, null, word, word.getWord(), 0);
    }


    private void addWord(AVLTree repository, WordDescription wordDesc, Word word, String stringWord, int index) {
        WordDescription wordDescription = null;
        if (index < stringWord.length()) {
            //如果词还有内容
            wordDescription = new WordDescription(stringWord.charAt(index));
        } else if (index == stringWord.length()) {
            //如果词已经处理完毕
            wordDescription = new WordDescription((char) 0);
            wordDescription.setWord(word);
        }else{
            return;
        }

        if (wordDesc == null) {
            //如果是第一个字,则到根上查找是否存在,如果不存在则添加
            WordDescription newWordDescription = repository.contains(wordDescription);
            if(newWordDescription==null){
                repository.add(wordDescription);
            }else{
                wordDescription=newWordDescription;
            }
        } else {
            //查看是否后续有字
            if (wordDesc.getWordDescriptionAVLTree() == null) {
                //如果后续没有字,则添加一个子树
                AVLTree newWordDescTree = new AVLTreeImpl();
                wordDesc.setWordDescriptionAVLTree(newWordDescTree);
                newWordDescTree.add(wordDescription);
            } else {
                //如果有字,则查找是否有这个字
                WordDescription newWordDescription = wordDesc.getWordDescriptionAVLTree().contains(
                        wordDescription);
                if(newWordDescription==null){
                    //如果不存在
                    wordDesc.getWordDescriptionAVLTree().add(wordDescription);
                }else{
                    if(newWordDescription.getCharacter()==0) {//如果是同样的词加入,则更新内容
                        newWordDescription.setWord(word);
                    }
                    wordDescription=newWordDescription;
                }
            }
        }

        addWord(repository, wordDescription, word, stringWord, index + 1);
    }

    public void addWordString(String word) {
        addWord(new Word(word));
    }

    public void addWordString(List words) {
        for (String word : words) {
            addWordString(word);
        }
    }

    public void addWord(List words) {
        for (Word word : words) {
            addWord(word);
        }
    }

    public boolean isStopWord(String word) {
        return stopWordAVLTree.contains(word) != null;
    }

    public String[] getCharacterSpells(char c) {
        Character character = characterAVLTree.contains(new Character(c));
        if (character != null) {
            return character.getSpell();
        }
        return null;
    }

    public String getCharacterSpell(char character, int index) {
        String[] spells = getCharacterSpells(character);
        if (spells != null && spells.length >= index) {  	//因为spell是从1开始,而index是从0开始,所以需要-1
            return spells[index-1];
        }
        return null;
    }

    public String getCharacterSpell(char character) {
        return getCharacterSpell(character, 1);
    }

    public Character getCharacter(char character) throws ParserException {
        Character ch = characterAVLTree.contains(new Character(character));
        if (ch == null) {
            throw new ParserException("词库中不存在指定的字:" + character);
        } else {
            return ch;
        }
    }

    public String[] getWordSpell(String word) throws ParserException {
        Word w = getWord(word);
        String[] result = new String[word.length()];
        if (w != null) {
            int[] spell = w.getSpell();
            for (int i = 0; i < word.length(); i++) {
                if (spell != null) {
                    result[i] = getCharacterSpell(word.charAt(i), spell[i]);
                } else {
                    result[i] = getCharacterSpell(word.charAt(i));
                }
            }
        }
        return result;
    }

    public Word getWord(String word) throws ParserException {
        return getWord(word, 0);
    }

    private Word getWord(String word, int start) throws ParserException {
        char c = word.charAt(start);
        WordDescription find = new WordDescription(c);
        WordDescription endChar = new WordDescription('\0');
        WordDescription locate = wordAscAVLTree.contains(find);
        WordDescription wordDescription = null;
        int ct = 0;
        while (locate != null) {
            ct++;
            wordDescription = locate.getWordDescriptionAVLTree().contains(endChar);
            if (start + ct == word.length()) {
                break;
            } else {
                c = word.charAt(start + ct);
                find.setCharacter(c);
                locate = locate.getWordDescriptionAVLTree().contains(find);
            }
        }
        if (wordDescription != null) {
            return wordDescription.getWord();
        } else {
            throw new ParserException("词库中不存在词组:" + word);
        }
    }

    public WordDescription getWordDescription(char c, WordParserType parserType) {
        if (parserType == WordParserType.ASC) {
            return this.wordAscAVLTree.contains(new WordDescription(c));
        } else {
            return this.wordDescAVLTree.contains(new WordDescription(c));
        }
    }

    public String getWordSpellShort(String word) throws ParserException {
        String[] spell = getWordSpell(word);
        if (spell != null) {
            char[] result = new char[word.length()];
            for (int i = 0; i < spell.length; i++) {
                result[i] = spell[i].charAt(0);
            }
            return new String(result);
        }
        throw new ParserException("词库中找不到单词:" + word);
    }

    public void setSentenceRank(SentenceRank sentenceRank) {
        //this.sentenceRank=sentenceRank;
        sentenceRank.setWordParserManager(this);
    }

	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy