org.nlpcraft.common.nlp.NCNlpSentenceToken.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of nlpcraft Show documentation
Show all versions of nlpcraft Show documentation
An API to convert natural language into actions.
/*
* "Commons Clause" License, https://commonsclause.com/
*
* The Software is provided to you by the Licensor under the License,
* as defined below, subject to the following condition.
*
* Without limiting other conditions in the License, the grant of rights
* under the License will not include, and the License does not grant to
* you, the right to Sell the Software.
*
* For purposes of the foregoing, "Sell" means practicing any or all of
* the rights granted to you under the License to provide to third parties,
* for a fee or other consideration (including without limitation fees for
* hosting or consulting/support services related to the Software), a
* product or service whose value derives, entirely or substantially, from
* the functionality of the Software. Any license notice or attribution
* required by the License must also include this Commons Clause License
* Condition notice.
*
* Software: NLPCraft
* License: Apache 2.0, https://www.apache.org/licenses/LICENSE-2.0
* Licensor: Copyright (C) 2018 DataLingvo, Inc. https://www.datalingvo.com
*
* _ ____ ______ ______
* / | / / /___ / ____/________ _/ __/ /_
* / |/ / / __ \/ / / ___/ __ `/ /_/ __/
* / /| / / /_/ / /___/ / / /_/ / __/ /_
* /_/ |_/_/ .___/\____/_/ \__,_/_/ \__/
* /_/
*/
package org.nlpcraft.common.nlp
import java.util.{List ⇒ JList}
import org.nlpcraft.common.nlp.pos._
import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.language.implicitConversions
/**
* NLP token is a collection of NLP notes associated with that token.
*/
case class NCNlpSentenceToken(
index: Int,
notes: mutable.HashMap[String, NCNlpSentenceNote] = mutable.HashMap.empty[String, NCNlpSentenceNote]
) extends java.io.Serializable {
private var nlpNote: NCNlpSentenceNote = notes.values.find(_.isNlp).orNull
/**
* Simple word is a non synthetic word that's also not part of any domain-specific note type.
*/
// TODO: review all usage places. How is it correlated with user tokens?
def isSimpleWord: Boolean = notes.size == 1
def words: Int = origText.split(" ").length
// Shortcuts for some frequently used *mandatory* notes.
def normText: String = getNlpValue[String]("normText")
def startCharIndex: Int = getNlpValue[Int]("start").intValue() // Start character index.
def endCharIndex: Int = getNlpValue[Int]("end").intValue() // End character index.
def origText: String = getNlpValue[String]("origText")
def wordLength: Int = getNlpValue[Int]("wordLength").intValue()
def wordIndexes: Seq[Int] = getNlpValue[JList[Int]]("wordIndexes").asScala
def pos: String = getNlpValue[String]("pos")
def posDescription: String = getNlpValue[String]( "posDesc")
def lemma: String = getNlpValue[String]("lemma")
def stem: String = getNlpValue[String]("stem")
def isStopword: Boolean = getNlpValue[Boolean]("stopWord")
def isBracketed: Boolean = getNlpValue[Boolean]("bracketed")
def isDirect: Boolean = getNlpValue[Boolean]("direct")
def isQuoted: Boolean = getNlpValue[Boolean]("quoted")
def isSynthetic: Boolean = NCPennTreebank.isSynthetic(pos)
def isKnownWord: Boolean = getNlpValue[Boolean]("dict")
/**
*
* @param noteType Note type.
*/
def getNotes(noteType: String): Iterable[NCNlpSentenceNote] = notes.values.filter(_.noteType == noteType)
/**
* Clones note.
* Shallow copy.
*/
def clone(index: Int): NCNlpSentenceToken = NCNlpSentenceToken(index, notes.clone())
/**
* Clones note.
* Shallow copy.
*/
override def clone(): NCNlpSentenceToken = clone(index)
/**
* Removes note with given ID. No-op if ID wasn't found.
*
* @param id Note ID.
*/
def remove(id: String): Unit = notes -= id
/**
* Removes notes with given IDs. No-op if ID wasn't found.
*
* @param ids Note IDs.
*/
def remove(ids: Iterable[String]): Unit = notes --= ids
/**
* Tests whether or not this token contains note with given ID.
*/
def contains(id: String): Boolean = notes.contains(id)
/**
*
* @param noteType Note type.
* @param noteName Note name.
*/
def getNoteOpt(noteType: String, noteName: String): Option[NCNlpSentenceNote] = {
val ns = getNotes(noteType).filter(_.contains(noteName))
ns.size match {
case 0 ⇒ None
case 1 ⇒ Some(ns.head)
case _ ⇒
throw new AssertionError(
s"Multiple notes found [type=$noteType, name=$noteName, token=$notes]"
)
}
}
/**
* Gets note with given type and name.
*
* @param noteType Note type.
* @param noteName Note name.
*/
def getNote(noteType: String, noteName: String): NCNlpSentenceNote =
getNoteOpt(noteType, noteName) match {
case Some(n) ⇒ n
case None ⇒
throw new AssertionError(s"Note not found [type=$noteType, name=$noteName, token=$notes]")
}
/**
* Gets NLP note.
*/
def getNlpNote: NCNlpSentenceNote = {
require(nlpNote != null)
nlpNote
}
/**
*
* @param noteName Note name.
* @tparam T Type of the note value.
*/
def getNlpValueOpt[T: Manifest](noteName: String): Option[T] =
getNlpNote.get(noteName) match {
case Some(v) ⇒ Some(v.asInstanceOf[T])
case None ⇒ None
}
/**
*
* @param noteName Note name.
* @tparam T Type of the note value.
*/
def getNlpValue[T: Manifest](noteName: String): T = getNlpNote(noteName).asInstanceOf[T]
/**
* Tests if this token has any notes of given type(s).
*
* @param nodeTypes Note type(s) to check.
*/
def isTypeOf(nodeTypes: String*): Boolean = {
var found = false
for (noteType ← nodeTypes if !found)
if (getNotes(noteType).nonEmpty)
found = true
found
}
/**
* Adds element.
*
* @param elem Element.
*/
def add(elem: NCNlpSentenceNote): Unit = {
notes += elem.id → elem
if (elem.isNlp)
nlpNote = elem
}
override def toString: String =
notes.values.toSeq.sortBy(t ⇒ (if (t.isNlp) 0 else 1, t.noteType)).mkString("NLP token [", "|", "]")
}
object NCNlpSentenceToken {
implicit def toNotes(x: NCNlpSentenceToken): Iterable[NCNlpSentenceNote] = x.notes.values
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy