All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.mayabot.nlp.segment.kotlin.KotlinLexers.kt Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
package com.mayabot.nlp.segment.kotlin

import com.google.common.io.Files
import com.mayabot.nlp.segment.Lexers
import com.mayabot.nlp.segment.Sentence
import java.io.File

private val defaultLexer = Lexers.core()

fun String.segment(): List = defaultLexer.scan(this).toWordList()
fun String.lexer(): Sentence = defaultLexer.scan(this)


/**
 */
fun File.segment(outPath: String) {
    val lexerReader = defaultLexer.reader()

    val file = File(outPath)
    Files.createParentDirs(file)

    val lines = inputStream().bufferedReader().lines()

    file.outputStream().bufferedWriter().use { writer ->
        lines.filter { it.isNotBlank() }
                .map {
                    lexerReader.scan(it).toWordSequence()
                }.forEach { x ->
                    writer.write(x.joinToString(separator = " "))
                    writer.newLine()
                }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy