com.kotlinnlp.tokensencoder.morpho.FeaturesCollector.kt Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tokensencoder Show documentation
Show all versions of tokensencoder Show documentation
TokensEncoder is a very simple to use tokens encoder library which uses neural networks from SimpleDNN.
/* Copyright 2017-present The KotlinNLP Authors. All Rights Reserved.
*
* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/.
* ------------------------------------------------------------------*/
package com.kotlinnlp.tokensencoder.morpho
import com.kotlinnlp.linguisticdescription.lexicon.LexiconDictionary
import com.kotlinnlp.linguisticdescription.sentence.MorphoSentence
import com.kotlinnlp.utils.DictionarySet
import com.kotlinnlp.utils.progressindicator.ProgressIndicatorBar
/**
* Collect all the possible features from the given given [sentences].
*
* @param lexicalDictionary the lexicon dictionary (can be null)
* @param sentences the list of sentences
*/
class FeaturesCollector(
private val lexicalDictionary: LexiconDictionary?,
private val sentences: List>
) {
/**
* @return the set of features collected from the sentences
*/
fun collect(): DictionarySet {
val featuresDictionary = DictionarySet()
val progress = ProgressIndicatorBar(total = sentences.size)
this.sentences.forEach { sentence ->
progress.tick()
val tokenFeatures: List> = FeaturesExtractor(
sentence = sentence,
lexicalDictionary = lexicalDictionary).extractFeatures()
tokenFeatures.forEach { featuresDictionary.addAll(it) }
}
return featuresDictionary
}
/**
* Add the given [features] to the dictionary set.
*
* @param features a set of features
*/
private fun DictionarySet.addAll(features: Set) {
features.forEach { this.add(it) }
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy