All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.ucr.cs.bdlab.beast.generator.JavaSpatialGeneratorBuilder.scala Maven / Gradle / Ivy

There is a newer version: 0.10.1-RC2
Show newest version
/*
 * Copyright 2021 University of California, Riverside
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package edu.ucr.cs.bdlab.beast.generator

import edu.ucr.cs.bdlab.beast.cg.SpatialDataTypes.JavaSpatialRDD
import edu.ucr.cs.bdlab.beast.common.BeastOptions
import org.apache.spark.SparkContext
import org.apache.spark.api.java.{JavaRDD, JavaSparkContext}

/**
 * A factory method for generating spatial data.
 */
class JavaSpatialGeneratorBuilder(sparkContext: SparkContext, opts: BeastOptions = new BeastOptions()) {
  def this(jsc: JavaSparkContext) {
    this(jsc.sc)
  }

  /**Set configuration of the generated data*/
  def config(key: String, value: Any): JavaSpatialGeneratorBuilder = {
    opts.set(key, value.toString)
    this
  }

  def config(opts: BeastOptions): JavaSpatialGeneratorBuilder = {
    this.opts.mergeWith(opts)
    this
  }

  /**The type of distribution to generate*/
  var distribution: DistributionType = _

  /**Number of partitions to generate*/
  var numPartitions: Int = 0

  /**
   * Set the distribution of the generated data
   * @param distribution the distributed of the generated data as one of {[[UniformDistribution]],
   *                     [[DiagonalDistribution]], [[GaussianDistribution]], [[BitDistribution]],
   *                     [[SierpinskiDistribution]], [[ParcelDistribution]]}
   * @return
   */
  def distribution(distribution: DistributionType): JavaSpatialGeneratorBuilder = {
    this.distribution = distribution
    this
  }

  /**
   * Set the number of partitions in the output. If not set or set to zero, one partition will be generated
   * for each one million records
   * @param num the number of partitions in the generated RDD
   * @return
   */
  def numPartitions(num: Int): JavaSpatialGeneratorBuilder = {
    this.numPartitions = num
    this
  }

  /**
   * Generate boxes around each generated point
   * @param maxSize the maximum size for each side length of the generated box
   * @return
   */
  def makeBoxes(maxSize: Int*): JavaSpatialGeneratorBuilder = {
    this.config(UniformDistribution.GeometryType, "box")
      .config(UniformDistribution.MaxSize, maxSize.mkString(","))
    this
  }

  /**
   * Generate the data as an RDD.
   * @param cardinality the number of records to generate
   * @return the RDD that contains the generated data
   */
  def generate(cardinality: Long): JavaSpatialRDD = {
    require(distribution != null, "Distribution is not specified for generated data")
    JavaRDD.fromRDD(new RandomSpatialRDD(sparkContext, distribution, cardinality, numPartitions, opts))
  }

  /***
   * Generate uniformly distributed data
   * @param cardinality the number of records to generate
   * @return the RDD that contains the generated data
   */
  def uniform(cardinality: Long): JavaSpatialRDD = this.distribution(UniformDistribution).generate(cardinality)

  /**
   * Generate diagonally distributed data
   * @param cardinality the number of records to generate
   * @param percentage the percentage of records exactly on the diagonal line
   * @param buffer the buffer around the diagonal line in which records can be generated
   * @return the RDD that contains the generated data
   */
  def diagonal(cardinality: Long, percentage: Double = 0.5, buffer: Double = 0.2): JavaSpatialRDD =
    this.distribution(DiagonalDistribution)
      .config(DiagonalDistribution.Percentage, percentage)
      .config(DiagonalDistribution.Buffer, buffer)
      .generate(cardinality)

  /**
   * Generate Gaussian distributed data
   * @param cardinality the number of records to generate
   * @return the RDD that contains the generated data
   */
  def gaussian(cardinality: Long): JavaSpatialRDD = {
    this.distribution(GaussianDistribution)
      .generate(cardinality)
  }

  /**
   * Generate data from the Sierpinski distribution
   * @param cardinality the number of records to generate
   * @return the RDD that contains the generated data
   */
  def sierpinski(cardinality: Long): JavaSpatialRDD = {
    this.distribution(SierpinskiDistribution)
      .generate(cardinality)
  }

  /**
   * Generate data from the bit distribution
   * @param cardinality the number of records to generate
   * @param digits the number of digits to set per coordinate
   * @param probability the probability of setting each bit
   * @return the RDD that contains the generated data
   */
  def bit(cardinality: Long, digits: Int = 10, probability: Double = 0.2): JavaSpatialRDD = {
    this.distribution(BitDistribution)
      .config(BitDistribution.Digits, digits)
      .config(BitDistribution.Probability, probability)
      .generate(cardinality)
  }

  /**
   * Generates boxes from the parcel distribution
   * @param cardinality the number of records to generate
   * @param dither the amount of randomization to add to each generated box
   * @param splitRange the range of splitting each box
   * @return the RDD that contains the generated data
   */
  def parcel(cardinality: Long, dither: Double = 0.2, splitRange: Double = 0.2): JavaSpatialRDD = {
    this.distribution(ParcelDistribution)
      .config(ParcelDistribution.Dither, dither)
      .config(ParcelDistribution.SplitRange, splitRange)
      .generate(cardinality)
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy