All Downloads are FREE. Search and download functionalities are using the official Maven repository.

geotrellis.spark.io.s3.slippy.S3SlippyTileReader.scala Maven / Gradle / Ivy

Go to download

GeoTrellis is an open source geographic data processing engine for high performance applications.

The newest version!
package geotrellis.spark.io.slippy

import geotrellis.vector._
import geotrellis.raster._
import geotrellis.raster.io.geotiff._
import geotrellis.spark._
import geotrellis.spark.io.s3._
import geotrellis.util.Filesystem

import org.apache.commons.io.FileUtils
import org.apache.commons.io.filefilter._
import org.apache.spark._
import org.apache.spark.rdd._
import java.io.File


class S3SlippyTileReader[T](uri: String)(fromBytes: (SpatialKey, Array[Byte]) => T) extends SlippyTileReader[T] {
  import SlippyTileReader.TilePath

  val client = S3Client.default
  val parsed = new java.net.URI(uri)
  val bucket = parsed.getHost
  val prefix = {
    val path = parsed.getPath
    path.substring(1, path.length)
  }

  def read(zoom: Int, key: SpatialKey): T = {
    val s3key = new File(prefix, s"$zoom/${key.col}/${key.row}").getPath

    S3Client.default.listKeys(bucket, s3key) match {
      case Seq() => sys.error(s"KeyNotFound: $s3key not found in bucket $bucket")
      case Seq(tileKey) => fromBytes(key, client.readBytes(bucket, tileKey))
      case _ => sys.error(s"Multiple keys found for prefix $s3key in bucket $bucket")
    }
  }

  def read(zoom: Int)(implicit sc: SparkContext): RDD[(SpatialKey, T)] = {
    val keys = {
      client.listKeys(bucket, new File(prefix, zoom.toString).getPath)
        .map { key =>
          key match {
            case TilePath(x, y) => Some((SpatialKey(x.toInt, y.toInt), key))
            case _ => None
          }
        }
        .flatten
        .toSeq
    }

    val numPartitions = math.min(keys.size, math.max(keys.size / 10, 50)).toInt
    sc.parallelize(keys)
      .partitionBy(new HashPartitioner(numPartitions))
      .mapPartitions({ partition =>
        val client = S3Client.default

        partition.map { case (spatialKey, s3Key) =>

          (spatialKey, fromBytes(spatialKey, client.readBytes(bucket, s3Key)))
        }
      }, preservesPartitioning = true)
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy