All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.mayabot.nlp.segment.reader.StopWordDict.kt Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
package com.mayabot.nlp.segment.reader

import com.mayabot.nlp.MynlpEnv
import com.mayabot.nlp.Mynlps
import com.mayabot.nlp.collection.dat.DoubleArrayTrieMap
import com.mayabot.nlp.injector.ImplementedBy
import com.mayabot.nlp.injector.Singleton
import java.util.*

const val StopWordDictPath = "stopwords.txt"

/**
 * 停用词接口
 *
 * Guice默认注入SystemStopWordDict
 *
 * @author jimichan
 */
@ImplementedBy(SystemStopWordDict::class)
interface StopWordDict {
    fun contains(word: String): Boolean
    fun add(word: String)
    fun remove(word: String)
    fun commit()
}


/**
 * 停用词词典,基于DAT的实现
 *
 * 可以动态新增、删减停用词。修改后需要[commit]操作。
 *
 * @author jimichan
 */
class DefaultStopWordDict(set: Set) : StopWordDict {

    private var stopWordSet = HashSet(set)

    private var dat: DoubleArrayTrieMap = DoubleArrayTrieMap(
            TreeMap().apply { put("This Is Empty Flag", true) })
    private var isEmpty = false

    init {
        commit()
    }

    override fun commit() {
        if (stopWordSet.isEmpty()) {
            isEmpty = true
            return
        }
        isEmpty = false
        val treeMap = TreeMap()
        stopWordSet.forEach {
            treeMap[it] = true
        }
        dat = DoubleArrayTrieMap(treeMap)
    }

    override fun add(word: String) {
        stopWordSet.add(word)
    }

    override fun remove(word: String) {
        stopWordSet.remove(word)
    }


    override fun contains(word: String) = dat.containsKey(word)

}

/**
 * 停用词词典,从系统中加载停用词词典
 *
 * @author jimichan
 */
@Singleton
class SystemStopWordDict constructor(val env: MynlpEnv) : StopWordDict {

    private val stopDict = DefaultStopWordDict(loadStopword())

    override fun contains(word: String): Boolean {
        return stopDict.contains(word)
    }

    override fun commit() {
        stopDict.commit()
    }

    override fun add(word: String) {
        stopDict.add(word)
    }

    override fun remove(word: String) {
        stopDict.remove(word)
    }

    private fun loadStopword(): Set {

        try {
            val resource = env.tryLoadResource(StopWordDictPath)

            resource?.let { re ->
                return re.inputStream().bufferedReader().readLines().asSequence()
                        .map { it.trim() }.filter { it.isNotBlank() }.toSet()

            }
        } catch (e: Exception) {
            Mynlps.logger.error("", e)
        }

        return emptySet()
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy