All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.mayabot.nlp.segment.plugins.collector.SentenceCollector.kt Maven / Gradle / Ivy

package com.mayabot.nlp.segment.plugins.collector

import com.mayabot.nlp.Mynlp
import com.mayabot.nlp.common.utils.StringUtils
import com.mayabot.nlp.segment.WordTerm
import com.mayabot.nlp.segment.wordnet.Wordnet
import com.mayabot.nlp.segment.wordnet.Wordpath
import java.util.function.Consumer

/**
 * Nlp收集方式,不处理子词
 * 按照WordPath里面描述的唯一切分路径,构建WordTerm序列
 *
 * @author jimichan
 */
class SentenceCollector(val mynlp: Mynlp) : WordTermCollector {

    /**
     * 从wordnet中计算出子词的方法。
     */
    override var pickUpSubword: WordTermCollector.PickUpSubword? = null

    /**
     * 给一个初始化wordnet的机会,填充更多的可能性
     */
    override var fillSubword: WordTermCollector.FillSubword? = null

    override fun collect(txtChars: CharArray?, wordnet: Wordnet, wordPath: Wordpath, consumer: Consumer) {

        val vertexIterator = wordPath.iteratorVertex()

        fillSubword?.fill(wordnet, wordPath)

        while (vertexIterator.hasNext()) {
            val vertex = vertexIterator.next()

            val word = if(txtChars==null){
                vertex.realWord()
            }else {
                String(txtChars, vertex.offset(), vertex.length)
            }

            val term = WordTerm(word, vertex.nature, vertex.offset())

            if (StringUtils.isWhiteSpace(term.word)) {
                continue
            }

            val pick = pickUpSubword

            //给当前的term计算子词
            pick?.pickup(term, wordnet, wordPath)

            consumer.accept(term)
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy