All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.mayabot.nlp.segment.FluentLexerBuilder.kt Maven / Gradle / Ivy

package com.mayabot.nlp.segment

import com.mayabot.nlp.Mynlp
import com.mayabot.nlp.segment.lexer.bigram.BigramLexerPlugin
import com.mayabot.nlp.segment.lexer.bigram.CoreDictionary
import com.mayabot.nlp.segment.lexer.perceptron.PerceptronSegmentPlugin
import com.mayabot.nlp.segment.pipeline.PipelineLexerBuilder
import com.mayabot.nlp.segment.pipeline.PipelineLexerPlugin
import com.mayabot.nlp.segment.plugins.collector.*
import com.mayabot.nlp.segment.plugins.correction.CorrectionDictionary
import com.mayabot.nlp.segment.plugins.correction.CorrectionPlugin
import com.mayabot.nlp.segment.plugins.customwords.CustomDictionary
import com.mayabot.nlp.segment.plugins.customwords.CustomDictionaryPlugin
import com.mayabot.nlp.segment.plugins.ner.NerPlugin
import com.mayabot.nlp.segment.plugins.personname.PersonNamePlugin
import com.mayabot.nlp.segment.plugins.pos.PosPlugin

/**
 * Fluent style
 * @author jimichan
 */
open class FluentLexerBuilder(val mynlp: Mynlp = Mynlp.instance()) : LexerBuilder {

    private val builder = PipelineLexerBuilder(mynlp)

    override fun build(): Lexer {
        return builder.build()
    }

    fun install(plugin: PipelineLexerPlugin) {
        builder.install(plugin)
    }

    @Deprecated(message = "使用bigram方法", replaceWith = ReplaceWith("bigram"), level = DeprecationLevel.WARNING)
    fun core(): FluentLexerBuilder {
        builder.install(BigramLexerPlugin(mynlp))
        return this@FluentLexerBuilder
    }

    @Deprecated(message = "使用bigram方法", replaceWith = ReplaceWith("bigram(dict)"), level = DeprecationLevel.WARNING)
    fun coreByDict(dict: CoreDictionary): FluentLexerBuilder {
        builder.install(BigramLexerPlugin(dict))
        return this@FluentLexerBuilder
    }

    fun bigram(): FluentLexerBuilder {
        builder.install(BigramLexerPlugin(mynlp))
        return this@FluentLexerBuilder
    }

    fun bigram(dict: CoreDictionary): FluentLexerBuilder {
        builder.install(BigramLexerPlugin(dict))
        return this@FluentLexerBuilder
    }

    fun perceptron(): FluentLexerBuilder {
        builder.install(PerceptronSegmentPlugin())
        return this@FluentLexerBuilder
    }

    fun withCorrection(): FluentLexerBuilder {
        builder.install(CorrectionPlugin())
        return this;
    }

    fun withCorrection(dict: CorrectionDictionary): FluentLexerBuilder {
        builder.install(CorrectionPlugin(dict))
        return this;
    }

    fun withPos(): FluentLexerBuilder {
        builder.install(PosPlugin())
        return this
    }

    fun withPersonName(): FluentLexerBuilder {
        builder.install(PersonNamePlugin())
        return this
    }

    fun withNer(): FluentLexerBuilder {
        builder.install(NerPlugin())
        return this
    }

    fun withCustomDictionary(dict: CustomDictionary): FluentLexerBuilder {
        builder.install(CustomDictionaryPlugin(dict))
        return this
    }

    fun withCustomDictionary(): FluentLexerBuilder {
        builder.install(CustomDictionaryPlugin())
        return this
    }

    fun with(plugin: PipelineLexerPlugin) : FluentLexerBuilder {
        builder.install(plugin)
        return this
    }

    /**
     * 保持字符原样输出
     */
    fun keepOriCharOutput() : FluentLexerBuilder {
        builder.isKeepOriCharOutput = true
        return this
    }

    fun collector(): CollectorBlock {
        return CollectorBlock()
    }

    inner class CollectorBlock {

        val collector: WordTermCollector = SentenceCollector(mynlp)

        fun pickUpSubword(pickUpSubword: WordTermCollector.PickUpSubword): CollectorBlock {
            collector.pickUpSubword = pickUpSubword
            return this
        }

        fun fillSubword(fillSubword: WordTermCollector.FillSubword): CollectorBlock {
            collector.fillSubword = fillSubword
            return this
        }

        @JvmOverloads
        fun indexPickup(minWordLen: Int = 2): CollectorBlock {
            val indexd = IndexPickUpSubword()
            indexd.minWordLength = minWordLen
            collector.pickUpSubword = indexd
            return this
        }

        @JvmOverloads
        fun smartPickup(block: (x: SmartPickUpSubword) -> Unit
                        = { _ -> Unit }
        ): CollectorBlock {
            val p = SmartPickUpSubword(mynlp)
            block(p)
            collector.pickUpSubword = p
            return this
        }

        @JvmOverloads
        fun fillSubwordDict(dbcms: CoreDictionary = mynlp.getInstance(CoreDictionary::class.java)): CollectorBlock {
            collector.fillSubword = DictBasedFillSubword(dbcms)
            return this
        }

        fun with(collector: WordTermCollector): CollectorBlock {
            builder.termCollector = collector
            return this
        }

        fun done(): FluentLexerBuilder {
            builder.termCollector = collector
            return this@FluentLexerBuilder
        }
    }
}






© 2015 - 2024 Weber Informatics LLC | Privacy Policy