All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.johnsnowlabs.nlp.annotators.common.DependencyParsed.scala Maven / Gradle / Ivy

/*
 * Copyright 2017-2022 John Snow Labs
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.johnsnowlabs.nlp.annotators.common

import com.johnsnowlabs.nlp.{Annotation, AnnotatorType}

case class DependencyParsedSentence(tokens: Array[WordWithDependency])

case class WordWithDependency(word: String, begin: Int, end: Int, head: Int)

object DependencyParsed extends Annotated[DependencyParsedSentence] {

  override def annotatorType: String = AnnotatorType.DEPENDENCY

  override def unpack(annotations: Seq[Annotation]): Seq[DependencyParsedSentence] = {
    val sentences = TokenizedWithSentence.unpack(annotations)
    val depAnnotations = annotations
      .filter(a => a.annotatorType == annotatorType)
      .sortBy(a => a.begin)

    var last = 0
    sentences.map { sentence =>
      val sorted = sentence.indexedTokens.sortBy(t => t.begin)
      val dependencies = (last until (last + sorted.length)).map { i =>
        depAnnotations(i).metadata("head").toInt
      }

      last += sorted.length

      val words = sorted.zip(dependencies).map { case (token, dependency) =>
        WordWithDependency(token.token, token.begin, token.end, dependency)
      }

      DependencyParsedSentence(words)
    }
  }

  override def pack(items: Seq[DependencyParsedSentence]): Seq[Annotation] = {
    items.zipWithIndex.flatMap { case (sentence, index) =>
      sentence.tokens.map { token =>
        val headData = getHeadData(token.head, sentence)
        val realHead = if (token.head == sentence.tokens.length) 0 else token.head + 1
        Annotation(
          annotatorType,
          token.begin,
          token.end,
          headData.word,
          Map(
            "head" -> realHead.toString,
            "head.begin" -> headData.begin.toString,
            "head.end" -> headData.end.toString,
            "sentence" -> index.toString))
      }
    }
  }

  def getHeadData(head: Int, sentence: DependencyParsedSentence): WordWithDependency = {
    val root: WordWithDependency = WordWithDependency("ROOT", -1, -1, -1)
    sentence.tokens.lift(head).getOrElse(root)
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy