
geotrellis.spark.summary.StatsTileRDDMethods.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of geotrellis-spark_2.10 Show documentation
Show all versions of geotrellis-spark_2.10 Show documentation
GeoTrellis is an open source geographic data processing engine for high performance applications.
The newest version!
package geotrellis.spark.summary
import geotrellis.raster._
import geotrellis.raster.histogram._
import geotrellis.raster.mapalgebra.local._
import geotrellis.raster.summary._
import geotrellis.spark._
import geotrellis.spark.mapalgebra._
import org.apache.spark.Partitioner
import org.apache.spark.rdd.RDD
import org.apache.spark.SparkContext._
trait StatsTileRDDMethods[K] extends TileRDDMethods[K] {
def averageByKey(partitioner: Option[Partitioner] = None): RDD[(K, Tile)] = {
val createCombiner = (tile: Tile) => tile -> 1
val mergeValue = (tup: (Tile, Int), tile2: Tile) => {
val (tile1, count) = tup
tile1 + tile2 -> (count + 1)
}
val mergeCombiners = (tup1: (Tile, Int), tup2: (Tile, Int)) => {
val (tile1, count1) = tup1
val (tile2, count2) = tup2
tile1 + tile2 -> (count1 + count2)
}
partitioner
.fold(self.combineByKey(createCombiner, mergeValue, mergeCombiners))(self.combineByKey(createCombiner, mergeValue, mergeCombiners, _))
.mapValues { case (tile, count) => tile / count}
}
def histogram(): Histogram[Double] =
histogram(StreamingHistogram.DEFAULT_NUM_BUCKETS)
def histogram(numBuckets: Int): Histogram[Double] =
self
.map { case (key, tile) => tile.histogramDouble(numBuckets) }
.reduce { _ merge _ }
/** Gives a histogram that uses exact counts of integer values.
*
* @note This cannot handle counts that are larger than Int.MaxValue, and
* should not be used with very large datasets whose counts will overflow.
* These histograms can get very large with a wide range of values.
*/
def histogramExactInt: Histogram[Int] = {
self
.map { case (key, tile) => tile.histogram }
.reduce { _ merge _ }
}
def classBreaks(numBreaks: Int): Array[Int] =
classBreaksDouble(numBreaks).map(_.toInt)
def classBreaksDouble(numBreaks: Int): Array[Double] =
histogram(numBreaks).quantileBreaks(numBreaks)
/** Gives class breaks using a histogram that uses exact counts of integer values.
*
* @note This cannot handle counts that are larger than Int.MaxValue, and
* should not be used with very large datasets whose counts will overflow.
* These histograms can get very large with a wide range of values.
*/
def classBreaksExactInt(numBreaks: Int): Array[Int] =
histogramExactInt.quantileBreaks(numBreaks)
def minMax: (Int, Int) =
self.map(_._2.findMinMax)
.reduce { (t1, t2) =>
val (min1, max1) = t1
val (min2, max2) = t2
val min =
if(isNoData(min1)) min2
else {
if(isNoData(min2)) min1
else math.min(min1, min2)
}
val max =
if(isNoData(max1)) max2
else {
if(isNoData(max2)) max1
else math.max(max1, max2)
}
(min, max)
}
def minMaxDouble: (Double, Double) =
self
.map(_._2.findMinMaxDouble)
.reduce { (t1, t2) =>
val (min1, max1) = t1
val (min2, max2) = t2
val min =
if(isNoData(min1)) min2
else {
if(isNoData(min2)) min1
else math.min(min1, min2)
}
val max =
if(isNoData(max1)) max2
else {
if(isNoData(max2)) max1
else math.max(max1, max2)
}
(min, max)
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy