All Downloads are FREE. Search and download functionalities are using the official Maven repository.

geotrellis.spark.partition.SpacePartitioner.scala Maven / Gradle / Ivy

Go to download

GeoTrellis is an open source geographic data processing engine for high performance applications.

The newest version!
package geotrellis.spark.partition

import geotrellis.spark._
import geotrellis.spark.io.index._
import geotrellis.spark.io.index.zcurve.{Z3, Z2, ZSpatialKeyIndex}
import geotrellis.util._

import org.apache.spark._
import org.apache.spark.rdd.{ShuffledRDD, RDD}

import scala.collection.mutable.ArrayBuffer
import scala.reflect._

case class SpacePartitioner[K: Boundable](bounds: Bounds[K])
  (implicit index: PartitionerIndex[K]) extends Partitioner {

  val regions: Array[Long] =
    bounds match {
      case b: KeyBounds[K] =>
        for {
          (start, end) <- index.indexRanges(b).toArray
          p <- start to end
        } yield p

      case EmptyBounds =>
        Array.empty
    }

  def numPartitions = regions.length

  def getPartition(key: Any): Int = {
    val region = index.toIndex(key.asInstanceOf[K])
    val regionIndex = regions.indexOf(region)
    if (regionIndex > -1) regionIndex
    else (region % numPartitions).toInt // overflow for keys, at this point this should no longer be considered spatially partitioned
  }

  def containsKey(key: Any): Boolean = {
    val i = index.toIndex(key.asInstanceOf[K])
    regions.indexOf(i) > -1
  }

  def regionIndex(region: Long): Option[Int] = {
    // Note: Consider future design where region can overlap several partitions, would change Option -> List
    val i = regions.indexOf(region)
    if (i > -1) Some(i) else None
  }

  /**
    * Use this partitioner as a partitioner for rdd.
    * The rdd may have a SpacePartitioner already.
    * If it is in sync with Bounds in the Metadata we assume it to be valid .
    * Otherwise we assume it has degraded to be a hash partitioner and we must perform a shuffle.
    */
  def apply[V, M: GetComponent[?, Bounds[K]]](rdd: RDD[(K, V)] with Metadata[M]): RDD[(K, V)] with Metadata[Bounds[K]] = {
    val kb: Bounds[K] = rdd.metadata.getComponent[Bounds[K]]
    rdd.partitioner match {
      case Some(part: SpacePartitioner[K]) if part.bounds == kb =>
        ContextRDD(
          new ReorderedSpaceRDD(rdd.filter(r => containsKey(r._1)), this),
          bounds)

      case _ =>
        ContextRDD(
          new ShuffledRDD(rdd.filter(r => containsKey(r._1)), this).asInstanceOf[RDD[(K, V)]],
          bounds)
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy