Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
package io.prophecy.libs.python
import org.apache.spark.sql._
import org.apache.spark.sql.expressions.UserDefinedFunction
object UDFUtils {
/**
* @param lookupName
* @return Boolean Column
*/
/**
* By default returns only the first matching record
*/
def lookup(lookupName: String, cols: List[Column]): Column =
io.prophecy.libs.lookup(lookupName, cols: _*)
/**
* Returns the last matching record
* @param lookupName
* @param cols
* @return
*/
def lookup_last(lookupName: String, cols: List[Column]): Column =
io.prophecy.libs.lookup_last(lookupName, cols: _*)
/**
* @param lookupName
* @return Boolean Column
*/
def lookup_match(lookupName: String, cols: List[Column]): Column =
io.prophecy.libs.lookup_match(lookupName, cols: _*)
def lookup_count(lookupName: String, cols: List[Column]): Column =
io.prophecy.libs.lookup_count(lookupName, cols: _*)
def lookup_row(lookupName: String, cols: List[Column]): Column =
io.prophecy.libs.lookup_row(lookupName, cols: _*)
def lookup_row_reverse(lookupName: String, cols: List[Column]): Column =
io.prophecy.libs.lookup_row_reverse(lookupName, cols: _*)
def lookup_nth(lookupName: String, cols: List[Column]): Column =
io.prophecy.libs.lookup_nth(lookupName, cols: _*)
/**
* Function registers 4 different UDFs with spark registry. UDF for lookup_match, lookup_count,
* lookup_row and lookup functions are registered. This function stores the data of input dataframe in
* a broadcast variable, then uses this broadcast variable in different lookup functions.
*
* lookup : This function returns the first matching row for given input keys
* lookup_count : This function returns the count of all matching rows for given input keys.
* lookup_match : This function returns 0 if there is no matching row and 1 for some matching rows for given input keys.
* lookup_row : This function returns all the matching rows for given input keys.
*
* This function registers for upto 10 matching keys as input to these lookup functions.
*
* @param name UDF Name
* @param df input dataframe
* @param spark spark session
* @param keyCols columns to be used as keys in lookup functions.
* @param rowCols schema of entire row which will be stored for each matching key.
* @return registered UDF definitions for lookup functions. These UDF functions returns different results depending
* on the lookup function.
*/
def createLookup(
name: String,
df: DataFrame,
spark: SparkSession,
keyCols: List[String],
rowCols: List[String]
): UserDefinedFunction =
io.prophecy.libs.createLookup(name, df, spark, keyCols, rowCols: _*)
/**
* Method to create UDF which looks for passed input double in input dataframe. This function first
* loads the data of dataframe in broadcast variable and then defines a UDF which looks for input double
* value in the data stored in broadcast variable. If input double lies between passed col1 and col2 values
* then it adds corresponding row in the returned result. If value of input double doesn't lie between col1 and
* col2 then it simply returns null for current row in result.
*
* @param name created UDF name
* @param df input dataframe
* @param spark spark session
* @param minColumn column whose value to be considered as minimum in comparison.
* @param maxColumn column whose value to be considered as maximum in comparison.
* @param valueColumns remaining column names to be part of result.
* @return registers UDF which in turn returns rows corresponding to each row in dataframe on which range UDF is called.
*/
def createRangeLookup(
name: String,
df: DataFrame,
spark: SparkSession,
minColumn: String,
maxColumn: String,
valueColumns: List[String]
): UserDefinedFunction =
io.prophecy.libs.createRangeLookup(name, df, spark, minColumn, maxColumn, valueColumns: _*)
}