All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.kotlinnlp.nlpserver.commands.DetectLanguage.kt Maven / Gradle / Ivy

Go to download

NLPServer implements a server listening http requests on different routes, to query different modules of the KotlinNLP library.

There is a newer version: 0.6.3
Show newest version
/* Copyright 2016-present The KotlinNLP Authors. All Rights Reserved.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, you can obtain one at http://mozilla.org/MPL/2.0/.
 * ------------------------------------------------------------------*/

package com.kotlinnlp.nlpserver.commands

import com.beust.klaxon.JsonArray
import com.beust.klaxon.json
import com.kotlinnlp.languagedetector.LanguageDetector
import com.kotlinnlp.linguisticdescription.language.Language
import com.kotlinnlp.simplednn.simplemath.ndarray.dense.DenseNDArray

/**
 * The command executed on the route '/detect-language'.
 *
 * @property languageDetector a [LanguageDetector]
 */
class DetectLanguage(private val languageDetector: LanguageDetector) {

  /**
   * Detect the language of the given [text].
   * The return value is A JSON string containing the detected language iso-a2 code and the complete classification.
   *
   * The template of the JSON object:
   *  {
   *    "language": , // iso-a2 code
   *    "classification": {
   *      "en": ,
   *      "ar": ,
   *      ...
   *    }
   *  }
   *
   * @param text the input text
   *
   * @return a [String] with a JSON object containing the detected language iso-a2 code and the complete classification
   */
  operator fun invoke(text: String): String {

    val prediction: DenseNDArray = this.languageDetector.predict(text)
    val language: Language = this.languageDetector.getLanguage(prediction)

    return json {
      obj(
        "language" to language.isoCode,
        "classification" to obj(*prediction.toLanguageScorePairs())
      )
    }.toJsonString()
  }

  /**
   * Classify the language for each token of the given [text].
   *
   * The return value is A JSON string containing a list of token_classifications.
   * Each token token_classification is a list containing the token itself as first element and its classification as
   * second element.
   * The classification is an object containing the probability of each language mapped to its iso-code.
   *
   * The template of the JSON object:
   *  {
   *    "word": ,
   *    "classification": {
   *      "languages": {
   *        "en": ,
   *        "ar": ,
   *        ...
   *      },
   *      "charsImportance": [, , ...] // same length of the token
   *    }
   *  }
   *
   * @param text the input text
   *
   * @return a [String] with a JSON list containing the language classification of each token
   */
  fun perToken(text: String): String {

    val tokensClassifications: List>
      = this.languageDetector.classifyTokens(text)

    return json {
      array(tokensClassifications.map {
        obj(
          "word" to it.first,
          "classification" to obj(
            "languages" to obj(*it.second.languages.toLanguageScorePairs()),
            "charsImportance" to it.second.charsImportance.toJSONArray()
          )
        )
      })
    }.toJsonString()
  }

  /**
   *
   */
  private fun DenseNDArray.toJSONArray(): JsonArray = json { array([email protected]().toList()) }

  /**
   * Convert a [DenseNDArray] representing a languages classification to an [Array] of [Pair]s .
   *
   * @return an [Array] of [Pair]s 
   */
  private fun DenseNDArray.toLanguageScorePairs(): Array> {

    val languages = [email protected]

    require(this.length == languages.size) {
      "Invalid this (length %d, supported languages %d)".format(this.length, languages.size)
    }

    return this.toDoubleArray().mapIndexed { i, score ->
      Pair(languages[i].isoCode, score)
    }.toTypedArray()
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy