All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.allenai.pipeline.WriteHelpers.scala Maven / Gradle / Ivy

The newest version!
package org.allenai.pipeline

import spray.json.JsonFormat

import scala.reflect.ClassTag

import java.io.File

trait WriteHelpers {

  /** Factory interface for creating flat Artifact instances. */
  trait FlatArtifactFactory[T] {
    def flatArtifact(input: T): FlatArtifact
  }

  /** Factory interface for creating structured Artifact instances. */
  trait StructuredArtifactFactory[T] {
    def structuredArtifact(input: T): StructuredArtifact
  }

  trait ArtifactFactory[T] extends FlatArtifactFactory[T] with StructuredArtifactFactory[T]

  class RelativeFileSystem(rootDir: File)
      extends ArtifactFactory[String] {
    private def toFile(path: String): File = new File(rootDir, path)

    override def flatArtifact(name: String): FlatArtifact = new FileArtifact(toFile(name))

    override def structuredArtifact(name: String): StructuredArtifact = {
      val file = toFile(name)
      if (file.exists && file.isDirectory) {
        new DirectoryArtifact(file)
      } else {
        new ZipFileArtifact(file)
      }
    }
  }

  object AbsoluteFileSystem extends ArtifactFactory[File] {
    override def flatArtifact(file: File): FlatArtifact = new FileArtifact(file)

    override def structuredArtifact(file: File): StructuredArtifact = {
      if (file.exists && file.isDirectory) {
        new DirectoryArtifact(file)
      } else {
        new ZipFileArtifact(file)
      }
    }

    def usingPaths: ArtifactFactory[String] =
      new ArtifactFactory[String] {
        override def flatArtifact(path: String): FlatArtifact =
          AbsoluteFileSystem.flatArtifact(new File(path))

        override def structuredArtifact(path: String): StructuredArtifact =
          AbsoluteFileSystem.structuredArtifact(new File(path))
      }
  }

  implicit object IdentityFlatArtifactFactory
      extends FlatArtifactFactory[FlatArtifact] {
    override def flatArtifact(a: FlatArtifact): FlatArtifact = a
  }

  implicit object IdentityStructuredArtifactFactory
      extends StructuredArtifactFactory[StructuredArtifact] {
    override def structuredArtifact(a: StructuredArtifact): StructuredArtifact = a
  }

  class S3(config: S3Config, rootPath: Option[String] = None)
      extends ArtifactFactory[String] {
    // Drop leading and training slashes
    private def toPath(path: String): String = rootPath match {
      case None => path
      case Some(dir) =>
        val base = dir.dropWhile(_ == '/').reverse.dropWhile(_ == '/').reverse
        s"$base/$path"
    }

    override def flatArtifact(path: String): FlatArtifact =
      new S3FlatArtifact(toPath(path), config)

    override def structuredArtifact(path: String): StructuredArtifact =
      new S3ZipArtifact(toPath(path), config)
  }

  object Persist {
    // Reduce line length
    type FAF[T] = FlatArtifactFactory[T]

    object Iterator {
      def asText[T: StringSerializable: ClassTag](step: Producer[Iterator[T]], path: String)(
        implicit
        faf: FAF[String]
      ): PersistedProducer[Iterator[T], FlatArtifact] = {
        step.persisted(
          LineIteratorIo.text[T],
          faf.flatArtifact(path)
        )
      }

      def asJson[T: JsonFormat: ClassTag](step: Producer[Iterator[T]], path: String)(
        implicit
        faf: FAF[String]
      ): PersistedProducer[Iterator[T], FlatArtifact] = {
        step.persisted(
          LineIteratorIo.json[T],
          faf.flatArtifact(path)
        )
      }

      def asText[T: StringSerializable: ClassTag](step: Producer[Iterator[T]])(
        implicit
        faf: FAF[(Signature, String)]
      ): PersistedProducer[Iterator[T], FlatArtifact] = {
        step.persisted(
          LineIteratorIo.text[T],
          faf.flatArtifact((step.signature, "txt"))
        )
      }

      def asJson[T: JsonFormat: ClassTag](step: Producer[Iterator[T]])(
        implicit
        faf: FAF[(Signature, String)]
      ): PersistedProducer[Iterator[T], FlatArtifact] = {
        step.persisted(
          LineIteratorIo.json[T],
          faf.flatArtifact((step.signature, "json"))
        )
      }
    }

    object Collection {
      def asText[T: StringSerializable: ClassTag](step: Producer[Iterable[T]], path: String)(
        implicit
        faf: FAF[String]
      ): PersistedProducer[Iterable[T], FlatArtifact] = {
        step.persisted(
          LineCollectionIo.text[T],
          faf.flatArtifact(path)
        )
      }

      def asJson[T: JsonFormat: ClassTag](step: Producer[Iterable[T]], path: String)(
        implicit
        faf: FAF[String]
      ): PersistedProducer[Iterable[T], FlatArtifact] = {
        step.persisted(
          LineCollectionIo.json[T],
          faf.flatArtifact(path)
        )
      }

      def asText[T: StringSerializable: ClassTag](step: Producer[Iterable[T]])(
        implicit
        faf: FAF[(Signature, String)]
      ): PersistedProducer[Iterable[T], FlatArtifact] = {
        step.persisted(
          LineCollectionIo.text[T],
          faf.flatArtifact((step.signature, "txt"))
        )
      }

      def asJson[T: JsonFormat: ClassTag](step: Producer[Iterable[T]])(
        implicit
        faf: FAF[(Signature, String)]
      ): PersistedProducer[Iterable[T], FlatArtifact] = {
        step.persisted(
          LineCollectionIo.json[T],
          faf.flatArtifact((step.signature, "json"))
        )
      }

    }

    object Singleton {
      def asText[T: StringSerializable: ClassTag](step: Producer[T], path: String)(
        implicit
        factory: FAF[String]
      ): PersistedProducer[T, FlatArtifact] = {
        step.persisted(
          SingletonIo.text[T],
          factory.flatArtifact(path)
        )
      }

      def asJson[T: JsonFormat: ClassTag](step: Producer[T], path: String)(
        implicit
        factory: FAF[String]
      ): PersistedProducer[T, FlatArtifact] = {
        step.persisted(
          SingletonIo.json[T],
          factory.flatArtifact(path)
        )
      }

      def asText[T: StringSerializable: ClassTag](step: Producer[T])(
        implicit
        faf: FAF[(Signature, String)]
      ): PersistedProducer[T, FlatArtifact] = {
        step.persisted(
          SingletonIo.text[T],
          faf.flatArtifact((step.signature, "txt"))
        )
      }

      def asJson[T: JsonFormat: ClassTag](step: Producer[T])(
        implicit
        faf: FAF[(Signature, String)]
      ): PersistedProducer[T, FlatArtifact] = {
        step.persisted(
          SingletonIo.json[T],
          faf.flatArtifact((step.signature, "json"))
        )
      }

    }

  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy