All Downloads are FREE. Search and download functionalities are using the official Maven repository.

geotrellis.spark.io.hadoop.SaveToHadoopMethods.scala Maven / Gradle / Ivy

Go to download

GeoTrellis is an open source geographic data processing engine for high performance applications.

The newest version!
package geotrellis.spark.io.hadoop

import geotrellis.spark.render._
import geotrellis.spark.{LayerId, SpatialKey}

import java.net.URI
import org.apache.hadoop.conf.Configuration
import org.apache.hadoop.fs.{FileSystem, Path}
import org.apache.spark.rdd.RDD


class SaveToHadoopMethods[K, V](rdd: RDD[(K, V)]) {
  /** Sets up saving to Hadoop, but returns an RDD so that writes can be chained.
    *
    * @param keyToUri      maps each key to full hadoop supported path
    * @param getBytes  K and V both provided in case K contains required information, like extent.
    */
  def setupSaveToHadoop(keyToUri: K => String)(getBytes: (K, V) => Array[Byte]): RDD[(K, V)] =
    SaveToHadoop.setup(rdd, keyToUri, getBytes)

  /** Saves to Hadoop, but returns a count of records saved.
    *
    * @param keyToUri      maps each key to full hadoop supported path
    * @param getBytes  K and V both provided in case K contains required information, like extent.
    */
  def saveToHadoop(keyToUri: K => String)(getBytes: (K, V) => Array[Byte]): Long =
    SaveToHadoop(rdd, keyToUri, getBytes)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy