com.sparkutils.quality.impl.mapLookup.MapLookupImports.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of quality_2.4_2.11 Show documentation
Show all versions of quality_2.4_2.11 Show documentation
A Spark library for managing in-process data quality rules via Spark SQL
The newest version!
package com.sparkutils.quality.impl.mapLookup
import com.sparkutils.quality.impl.mapLookup.MapLookupFunctions.MapLookups
import com.sparkutils.quality.{DataFrameLoader, Id}
import com.sparkutils.quality.impl.util.ConfigLoader
import org.apache.spark.sql.functions.lit
import org.apache.spark.sql.{Column, DataFrame}
trait MapLookupImports {
def registerMapLookupsAndFunction(mapLookups: MapLookups) =
MapLookupFunctions.registerMapLookupsAndFunction(mapLookups)
/**
* Used as a param to load the map lookups - note the type of the broadcast is always Map[AnyRef, AnyRef]
*/
type MapLookups = MapLookupFunctions.MapLookups
type MapCreator = MapLookupFunctions.MapCreator
/**
* Loads maps to broadcast, each individual dataframe may have different associated expressions
*
* @param creators a map of string id to MapCreator
* @return a map of id to broadcast variables needed for exact lookup and mapping checks
*/
def mapLookupsFromDFs(creators: Map[String, MapCreator]): MapLookups =
MapLookupFunctions.mapLookupsFromDFs(creators)
import MapLookupFunctions.{factory, mapRowEncoder}
/**
* Loads map configurations from a given DataFrame for ruleSuiteId. Wherever token is present loader will be called and the filter optionally applied.
* @return A tuple of MapConfig's and the names of rows which had unexpected content (either token or sql must be present)
*/
def loadMapConfigs(loader: DataFrameLoader, viewDF: DataFrame,
ruleSuiteIdColumn: Column,
ruleSuiteVersionColumn: Column,
ruleSuiteId: Id,
name: Column,
token: Column,
filter: Column,
sql: Column,
key: Column,
value: Column
): (Seq[MapConfig], Set[String]) =
ConfigLoader.loadConfigs[MapConfig, MapRow](
loader, viewDF,
ruleSuiteIdColumn,
ruleSuiteVersionColumn,
ruleSuiteId,
name,
token,
filter,
sql,
key,
value
)
/**
* Loads map configurations from a given DataFrame. Wherever token is present loader will be called and the filter optionally applied.
* @return A tuple of MapConfig's and the names of rows which had unexpected content (either token or sql must be present)
*/
def loadMapConfigs(loader: DataFrameLoader, viewDF: DataFrame,
name: Column,
token: Column,
filter: Column,
sql: Column,
key: Column,
value: Column
): (Seq[MapConfig], Set[String]) =
ConfigLoader.loadConfigs[MapConfig, MapRow](
loader, viewDF,
name,
token,
filter,
sql,
key,
value
)
def loadMaps(configs: Seq[MapConfig]): MapLookups =
MapLookupFunctions.loadMaps(configs)
}
trait MapLookupFunctionImports {
/**
* Retrieves the stored value from a map via the name mapLookupName and 'key' lookupKey
* @param mapLookupName
* @param lookupKey
* @param mapLookups
* @return
*/
def map_lookup(mapLookupName: String, lookupKey: Column, mapLookups: MapLookups): Column =
MapLookup(lit(mapLookupName), lookupKey, mapLookups)
/**
* Tests if there is a stored value from a map via the name mapLookupName and 'key' lookupKey. Implementation is map_lookup.isNotNull
* @param mapLookupName
* @param lookupKey
* @param mapLookups
* @return
*/
def map_contains(mapLookupName: String, lookupKey: Column, mapLookups: MapLookups): Column =
map_lookup(mapLookupName, lookupKey, mapLookups).isNotNull
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy