All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.ucr.cs.bdlab.beast.cg.CGOperationsMixin.scala Maven / Gradle / Ivy

/*
 * Copyright 2020 University of California, Riverside
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.ucr.cs.bdlab.beast.cg

import edu.ucr.cs.bdlab.beast.cg.Reprojector.TransformationInfo
import edu.ucr.cs.bdlab.beast.cg.SpatialDataTypes.SpatialRDD
import edu.ucr.cs.bdlab.beast.synopses
import edu.ucr.cs.bdlab.beast.synopses._
import edu.ucr.cs.bdlab.beast.geolite.{EnvelopeNDLite, GeometryHelper, IFeature}
import org.apache.spark.beast.CRSServer
import org.opengis.referencing.crs.CoordinateReferenceSystem

/**
 * A mixin that defines implicit functions to run spatial operations on Spatial RDDs
 */
trait CGOperationsMixin {

  /**
   * Additional functions for SpatialRDD
   * @param rdd the underlying RDD
   */
  implicit class RDDCGOperations(rdd: SpatialRDD) {
    /**
     * Compute the geometric summary of a set of features which includes size (in bytes), number of records,
     * number of points, number of non empty geometries, average side length (width and height), and the geometry type.
     *
     * @return the computed summaries
     */
    def summary: Summary = Summary.computeForFeatures(rdd)

    /**
     * Computes a uniform histogram with the given size that counts number of features in each cell
     *
     * @param histogramSize the size of the histogram as the number of partitions along each dimension
     * @param prefixSum     compute the prefix sum on the result to speed up range tests
     * @return the created histogram
     */
    def uniformHistogramCount(histogramSize: Array[Int], prefixSum: Boolean = false): AbstractHistogram =
      rdd.uniformHistogramSize(histogramSize, prefixSum, sizeFunction = _ => 1)

    /**
     * Computes a uniform histogram with the given size that calculates the size of the data in each cell
     *
     * @param histogramSize the size of the histogram as the number of partitions along each dimension
     * @param prefixSum     compute the prefix sum on the result to speed up range tests
     * @param sizeFunction  an optional function that computes the size of a feature.
     * @return the created histogram
     */
    def uniformHistogramSize(histogramSize: Array[Int], prefixSum: Boolean = false,
                             sizeFunction: IFeature => Int = _.getStorageSize): AbstractHistogram = {
      val numBins = histogramSize.reduce(_*_)
      val computationMethod: HistogramOP.ComputationMethod = if (numBins < 100000) HistogramOP.TwoPass else HistogramOP.Sparse
      val histogram = HistogramOP.computeHistogram(rdd, sizeFunction, computationMethod,
        HistogramOP.PointHistogram, histogramSize:_*)
      if (prefixSum)
        new Prefix2DHistogram(histogram.asInstanceOf[UniformHistogram])
      else
        histogram
    }

    /**
     * Computes an Euler histogram that works better for geometries with extents, i.e., envelopes,
     * which calculates the number of records in each cell
     *
     * @param histogramSize the size of the histogram as the number of partitions along each dimension
     * @param prefixSum     compute the prefix sum on the result to speed up range tests
     * @return the created histogram
     */
    def eulerHistogramCount(histogramSize: Array[Int], prefixSum: Boolean = false): AbstractHistogram =
      rdd.eulerHistogramSize(histogramSize, prefixSum, _ => 1)

    /**
     * Computes an Euler histogram that works better for geometries with extents, i.e., envelopes,
     * which calculates the total size of features in each cell
     *
     * @param histogramSize the size of the histogram as the number of partitions along each dimension
     * @param prefixSum     compute the prefix sum on the result to speed up range tests
     * @return the created histogram
     */
    def eulerHistogramSize(histogramSize: Array[Int], prefixSum: Boolean = false,
                           sizeFunction: IFeature => Int = _.getStorageSize): AbstractHistogram = {
      val histogram = HistogramOP.computeHistogram(rdd, sizeFunction, HistogramOP.TwoPass,
        HistogramOP.EulerHistogram, histogramSize:_*)
      if (prefixSum)
        new synopses.PrefixEulerHistogram2D(histogram.asInstanceOf[EulerHistogram2D])
      else
        histogram
    }


    /**
     * Reproject the geometries in the given RDD to from the source CRS to the target CRS
     * @param sourceCRS the course coordinate reference system (CRS). The function will assume that all geometries
     *                  in this RDD are in the sourceCRS
     * @param targetCRS the target coordinate reference system (CRS). The returned RDD will have all geometries
     *                  in the targetCRS
     * @return the RDD that contains the converted geometries
     */
    def reproject(sourceCRS: CoordinateReferenceSystem, targetCRS: CoordinateReferenceSystem): SpatialRDD = {
      val transform: TransformationInfo = Reprojector.findTransformationInfo(sourceCRS, targetCRS, rdd.sparkContext.getConf)
      rdd.map(f => {
        val geom = f.getGeometry
        val projectedGeometry = Reprojector.reprojectGeometry(geom, transform)
        f.setGeometry(projectedGeometry)
        f
      })
    }

    /**
     * Reprojects the geometries in this SpatialRDD to the target CRS.
     * @param targetCRS the target coordinate reference system
     * @return a new RDD after geometries are transformed
     */
    def reproject(targetCRS: CoordinateReferenceSystem): SpatialRDD = {
      // For efficiency, the MathTransform is created once here and serialized to all worker nodes
      // We assume that all geometries in the RDD have the same SRID which is generally true
      val sourceCRS = CRSServer.sridToCRS(rdd.first().getGeometry.getSRID, rdd.sparkContext.getConf)
      rdd.reproject(sourceCRS, targetCRS)
    }
  }

  /**
   * Additional for arrays of features
   * @param records an array of features
   */
  implicit class TraversableSpatialFunctions(records: Array[_ <: IFeature]) {
    def mbr : EnvelopeNDLite = {
      val iterator = records.iterator
      if (!iterator.hasNext)
        return new EnvelopeNDLite(2)
      val firstRecord = iterator.next()
      val allMBR = new EnvelopeNDLite(GeometryHelper.getCoordinateDimension(firstRecord.getGeometry))
      allMBR.merge(firstRecord.getGeometry)
      while (iterator.hasNext) {
        allMBR.merge(iterator.next().getGeometry)
      }
      allMBR
    }
  }
}

object CGOperationsMixin extends CGOperationsMixin




© 2015 - 2025 Weber Informatics LLC | Privacy Policy