All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.kotlinnlp.neuralparser.helpers.labelerselector.MorphoSelector.kt Maven / Gradle / Ivy

/* Copyright 2018-present KotlinNLP Authors. All Rights Reserved.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 * -----------------------------------------------------------------------------*/

package com.kotlinnlp.neuralparser.helpers.labelerselector

import com.kotlinnlp.linguisticdescription.GrammaticalConfiguration
import com.kotlinnlp.linguisticdescription.POSTag
import com.kotlinnlp.linguisticdescription.morphology.*
import com.kotlinnlp.linguisticdescription.syntax.SyntacticDependency
import com.kotlinnlp.linguisticdescription.syntax.dependencies.Unknown
import com.kotlinnlp.neuralparser.language.ParsingSentence
import com.kotlinnlp.neuralparser.parsers.lhrparser.neuralmodules.labeler.utils.ScoredGrammar
import com.kotlinnlp.utils.notEmptyOr

/**
 * The selector to use when the labeler predictions are defined as combinations of POS and syntactic types in the
 * Base format.
 */
object MorphoSelector : LabelerSelector {

  /**
   * Private val used to serialize the class (needed by Serializable).
   */
  @Suppress("unused")
  private const val serialVersionUID: Long = 1L

  /**
   * Get the list of scored grammatical configurations that are valid for a given attachment.
   *
   * @param configurations the list of grammatical configurations, sorted by descending score
   * @param sentence the input sentence
   * @param tokenIndex the index of the token to which the deprel must be assigned
   * @param headIndex the index of the token head (can be null)
   *
   * @return the valid grammatical configurations for the given attachment
   */
  override fun getValidConfigurations(configurations: List,
                                      sentence: ParsingSentence,
                                      tokenIndex: Int,
                                      headIndex: Int?): List {

    val possibleMorphologies: Morphologies = sentence.morphoAnalysis!!.allMorphologies[tokenIndex]
    val correctDirection = SyntacticDependency.Direction(tokenIndex = tokenIndex, headIndex = headIndex)
    val possibleConfigurations: List = configurations.filter { it.config.direction == correctDirection }
    val worstScore: Double = configurations.last().score

    return if (possibleMorphologies.isNotEmpty())
      possibleConfigurations
        .filter { sentence.areConfigurationCompatible(c = it.config, tokenIndex = tokenIndex) }
        .notEmptyOr {
          listOf(ScoredGrammar(
            config = possibleMorphologies.first().buildUnknownConfig(correctDirection),
            score = worstScore))
        }
    else
      possibleConfigurations.filter { it.config.isSingleContentWord() }.notEmptyOr {
        listOf(ScoredGrammar(
          config = GrammaticalConfiguration(GrammaticalConfiguration.Component(
            syntacticDependency = Unknown(correctDirection),
            pos = POSTag.Base(POS.Noun))),
          score = worstScore))
      }
  }

  /**
   * Get the morphologies of a given token that are compatible with the given grammatical configuration.
   *
   * @param sentence the input sentence
   * @param tokenIndex the index of a token of the sentence
   * @param configuration the grammatical configuration of the token
   *
   * @return the morphologies compatible with the given deprel
   */
  override fun getValidMorphologies(sentence: ParsingSentence,
                                    tokenIndex: Int,
                                    configuration: GrammaticalConfiguration): Morphologies {

    val possibleMorphologies = sentence.getCompatibleMorphologies(c = configuration, tokenIndex = tokenIndex)

    return when {

      possibleMorphologies.isNotEmpty() -> possibleMorphologies

      configuration.type == GrammaticalConfiguration.Type.Single -> {

        val pos: POSTag.Base = checkNotNull(configuration.components.single().pos as? POSTag.Base) {
          "The POS must be not null."
        }

        require(pos.type.isContentWord) {
          "Grammatical configuration for tokens without morphological analysis must define a content word."
        }

        Morphologies(Morphology(SingleMorphology(
          lemma = sentence.tokens[tokenIndex].form,
          pos = pos.type,
          allowIncompleteProperties = true)))
      }

      else -> Morphologies()
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy