
vectorpipe.functions.package.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of vectorpipe_2.11 Show documentation
Show all versions of vectorpipe_2.11 Show documentation
Import OSM data and output to VectorTiles with GeoTrellis.
The newest version!
package vectorpipe
import org.apache.spark.sql.Column
import org.apache.spark.sql.expressions.UserDefinedFunction
import org.apache.spark.sql.functions._
import org.apache.spark.sql.types.{DoubleType, FloatType}
import vectorpipe.util._
package object functions {
// A brief note about style
// Spark functions are typically defined using snake_case, therefore so are the UDFs
// internal helper functions use standard Scala naming conventions
@transient lazy val merge_counts: UserDefinedFunction = udf(_mergeCounts)
@transient lazy val sum_counts: UserDefinedFunction = udf { counts: Iterable[Map[String, Int]] =>
counts.reduce(_mergeCounts(_, _))
}
// Convert BigDecimals to doubles
// Reduces size taken for representation at the expense of some precision loss.
def asDouble(value: Column): Column =
when(value.isNotNull, value.cast(DoubleType))
.otherwise(lit(Double.NaN)) as s"asDouble($value)"
// Convert BigDecimals to floats
// Reduces size taken for representation at the expense of more precision loss.
def asFloat(value: Column): Column =
when(value.isNotNull, value.cast(FloatType))
.otherwise(lit(Float.NaN)) as s"asFloat($value)"
@transient lazy val count_values: UserDefinedFunction = udf {
(_: Seq[String]).groupBy(identity).mapValues(_.size)
}
@transient lazy val flatten: UserDefinedFunction = udf {
(_: Seq[Seq[String]]).flatten
}
@transient lazy val flatten_set: UserDefinedFunction = udf {
(_: Seq[Seq[String]]).flatten.distinct
}
@transient lazy val merge_sets: UserDefinedFunction = udf { (a: Iterable[String], b: Iterable[String]) =>
(Option(a).getOrElse(Set.empty).toSet ++ Option(b).getOrElse(Set.empty).toSet).toArray
}
@transient lazy val without: UserDefinedFunction = udf { (list: Seq[String], without: String) =>
list.filterNot(x => x == without)
}
private val _mergeCounts = (a: Map[String, Int], b: Map[String, Int]) =>
mergeMaps(Option(a).getOrElse(Map.empty[String, Int]),
Option(b).getOrElse(Map.empty[String, Int]))(_ + _)
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy