All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.johnsnowlabs.nlp.pretrained.pipelines.PretrainedPipeline.scala Maven / Gradle / Ivy

There is a newer version: 1.6.2
Show newest version
package com.johnsnowlabs.nlp.pretrained.pipelines

import com.johnsnowlabs.nlp.pretrained.ResourceDownloader
import com.johnsnowlabs.nlp.{Finisher, LightPipeline}
import org.apache.spark.ml.PipelineModel
import org.apache.spark.sql.DataFrame

abstract class PretrainedPipeline(downloadName: String, language: Option[String] = None, folder: String = ResourceDownloader.publicFolder) {

  lazy protected val modelCache: PipelineModel = ResourceDownloader
    .downloadPipeline(downloadName, language, folder)

  def annotate(dataset: DataFrame, inputColumn: String): DataFrame = {
    modelCache
      .transform(dataset.withColumnRenamed(inputColumn, "text"))
  }

  def annotate(target: String): Map[String, Seq[String]] = new LightPipeline(modelCache).annotate(target)

  def annotate(target: Array[String]): Array[Map[String, Seq[String]]] = new LightPipeline(modelCache).annotate(target)

  def pretrained(): PipelineModel = modelCache

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy