All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.kotlinnlp.geolocation.helpers.ConfidenceHelper.kt Maven / Gradle / Ivy

Go to download

GeoLocation is a Kotlin library designed to support the identification of geo-locations in a text.

The newest version!
/* Copyright 2018-present The KotlinNLP Authors. All Rights Reserved.
 *
 * This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
 * ------------------------------------------------------------------*/

package com.kotlinnlp.geolocation.helpers

import com.kotlinnlp.geolocation.structures.ExtendedLocation
import com.kotlinnlp.geolocation.structures.Location
import java.lang.Math.pow

/**
 * The helper that calculates the confidence scores of the best locations.
 *
 * @param bestLocations the list of best locations
 */
internal class ConfidenceHelper(private val bestLocations: List) {

  /**
   * The set of best locations ids.
   */
  private val bestLocationsIds: Set = this.bestLocations.map { it.location.id }.toSet()

  /**
   * A map of best locations counts associated by type.
   */
  private val bestLocationsCountPerType: Map =
    this.bestLocations.groupBy { it.location.type }.mapValues { it.value.size }

  /**
   * A map of sub-levels counts associated by location id.
   */
  private val subLevelsCountPerLocation: MutableMap =
    this.bestLocationsIds.associateTo(mutableMapOf()) { it to 0 }

  /**
   * A map of sub-levels types associated by location id.
   */
  private val subLevelsTypesPerLocation: MutableMap> =
    this.bestLocationsIds.associateTo(mutableMapOf()) { it to mutableSetOf() }

  /**
   * Init internal variables.
   */
  init {

    this.bestLocations.forEach {
      it.parents.forEach { parent ->
        this.subLevelsCountPerLocation.computeIfPresent(parent.id) { _, count -> count + 1 }
        this.subLevelsTypesPerLocation[parent.id]?.add(it.location.type)
      }
    }
  }

  /**
   * Set the confidence of the [bestLocations].
   */
  fun setConfidences() {

    this.bestLocations.forEach {

      this.boostByParents(it)
      this.boostBySubLevels(it)
      this.boostByBrothers(it)
      this.boostByOtherRelatives(it)

      if (it.location.isCountry) this.boostByBorders(it)

      this.normalize(it)
    }
  }

  /**
   * Boost the location confidence by its parents.
   *
   * @param location an extended location within the [bestLocations]
   */
  private fun boostByParents(location: ExtendedLocation) {

    val bestParents: List = location.parents.filter { it.id in this.bestLocationsIds }
    val bestParentsTypes: Set = bestParents.map { it.type }.toSet()
    val sameTypeParentsCount: Int = this.bestLocationsCountPerType.filterKeys { it in bestParentsTypes }.values.sum()

    if (sameTypeParentsCount > 0) location.confidence += bestParents.size.toFloat() / sameTypeParentsCount
  }

  /**
   * Boost the location confidence by its sub-levels.
   *
   * @param location an extended location within the [bestLocations]
   */
  private fun boostBySubLevels(location: ExtendedLocation) {

    val locId: String = location.location.id

    val subLevelsTypes: Set = this.subLevelsTypesPerLocation.getValue(locId)

    // The count of best locations with type equal to the sub-levels of the given location
    val sameSubLevelsTypesCount: Int = this.bestLocationsCountPerType.filterKeys { it in subLevelsTypes }.values.sum()

    if (sameSubLevelsTypesCount > 0) {

      val subLevelsCount: Int = this.subLevelsCountPerLocation.getValue(locId)

      location.confidence += subLevelsCount.toFloat() / sameSubLevelsTypesCount
    }
  }

  /**
   * Boost the location confidence by brother locations.
   *
   * @param location an extended location within the [bestLocations]
   */
  private fun boostByBrothers(location: ExtendedLocation) {

    this.bestLocationsCountPerType.getValue(location.location.type).let { sameTypeCount ->

      if (sameTypeCount > 1) {

        // The brothers of the given location that are best locations
        val bestBrothersCount: Int = this.bestLocations.count { it.isBrother(location) }

        // sameTypeCount - 1 = the count of best locations of the same type of the given location except itself
        location.confidence += bestBrothersCount.toFloat() / (sameTypeCount - 1)
      }
    }
  }

  /**
   * Boost the location confidence by other relatives.
   *
   * @param location an extended location within the [bestLocations]
   */
  private fun boostByOtherRelatives(location: ExtendedLocation) {

    val possibleRelativesCount: Int = this.bestLocations.count { it.location.isInsideCountry }

    if (possibleRelativesCount > 0) {

      val relativesCount: Int = this.bestLocations.count { it.isRelative(location) }

      location.confidence += relativesCount.toFloat() / possibleRelativesCount
    }
  }

  /**
   * Boost the confidence of a country by its borders.
   *
   * @param location an extended location within the [bestLocations]
   */
  private fun boostByBorders(location: ExtendedLocation) {

    this.bestLocationsCountPerType.getValue(Location.Type.Country).let { bestCountriesCount ->

      if (bestCountriesCount > 1) {

        // The border countries of the given location that are best locations
        val bestBordersCount: Int = location.location.borders.count { it in this.bestLocationsIds }

        // bestCountriesCount - 1 = the count of best countries except the given location itself
        location.confidence += bestBordersCount.toFloat() / (bestCountriesCount - 1)
      }
    }
  }

  /**
   * Normalize the confidence of a location.
   *
   * @param location an extended location within the [bestLocations]
   */
  private fun normalize(location: ExtendedLocation) {

    location.confidence /= 5 // average of all the contributions

    location.confidence = pow(location.confidence, 0.333)
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy