All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.mayabot.nlp.module.nwd.ValueObjects.kt Maven / Gradle / Ivy

package com.mayabot.nlp.module.nwd

import kotlin.math.*

class IntCount {
    var value : Int = 1
}

/**
 * 词和词数量
 */
class WordCount(val word: String, val count: Int) : Comparable {
    override fun compareTo(other: WordCount): Int {
        return other.count.compareTo(count)
    }
}

data class NewWord(
        val word: String,
        val len: Int,
        val freq: Int,
        val docFreq: Int,
        val mi: Float,
        val avg_mi: Float,
        val entropy: Float,
        val le: Float,
        val re: Float,
        val idf: Float,
        val isBlock: Boolean
) {
    var score: Float = 0f

    /**
     * 内置打分公式
     */
    fun doScore() {
        var ac = abs(le - re)
        if (ac == 0.0f) {
            ac = 0.00000000001f
        }
        score = avg_mi + log2((le * exp(re) + re * exp(le)) / ac)
//        score = avg_mi + entropy + idf * 1.5f + len * freq / docFreq
        if (isBlock) score += 1000
    }
}

fun HashMap.addTo(key: Char, count: Int) {
    this.getOrPut(key) {
        IntCount()
    }.value += count
}

class WordInfo(val word: String) {

    companion object {
        val empty = HashMap()
        val emptySet = HashSet()
    }

    var count = 0
    var mi = 0f

    var mi_avg = 0f

    // 是否被双引号 书名号包围
    var isBlock = false

    var entropy = 0f

    var left = HashMap(10)
    var right = HashMap(10)

    var docSet = HashSet()

    var score = 0f

    var idf = 0f

    var doc = 0

    var le = 0f
    var re = 0f

//    var tfIdf =0f

    fun tfIdf(docCount: Double, ziCount: Double) {
        val doc = docSet.size + 1
        idf = log10(docCount / doc).toFloat()
//        tfIdf = (idf * (count/ziCount)).toFloat()
        docSet = emptySet
        this.doc = doc - 1
    }

    fun entropy() {

        var leftEntropy = 0f
        for (entry in left) {
            val p = entry.value.value / count.toFloat()
            leftEntropy -= (p * ln(p.toDouble())).toFloat()
        }

        var rightEntropy = 0f
        for (entry in right) {
            val p = entry.value.value / count.toFloat()
            rightEntropy -= (p * ln(p.toDouble())).toFloat()
        }

        le = leftEntropy
        re = rightEntropy

        entropy = min(leftEntropy, rightEntropy)

        left = empty
        right = empty
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy