
com.audienceproject.spark.dynamodb.msd.datasource.DynamoDbScanBuilder.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of mega-spark-diff Show documentation
Show all versions of mega-spark-diff Show documentation
A diff analysis tool that compares data sets at scale of various types and can be executed in CLOUD or
locally
The newest version!
package com.audienceproject.spark.dynamodb.msd.datasource
import com.audienceproject.spark.dynamodb.connector.{DynamoConnector, FilterPushdown}
import org.apache.spark.sql.connector.expressions.filter.Predicate
import org.apache.spark.sql.connector.read.{Scan, ScanBuilder, SupportsPushDownFilters, SupportsPushDownRequiredColumns, SupportsPushDownV2Filters}
import org.apache.spark.sql.sources.Filter
import org.apache.spark.sql.types.StructType
class DynamoDbScanBuilder(connector: DynamoConnector, schema: StructType)
extends ScanBuilder
with SupportsPushDownRequiredColumns
with SupportsPushDownFilters {
private var pushedFilter = Array.empty[Filter]
private var finalSchema = schema
/**
* code from com.audienceproject:spark.dynamodb
* DynamoScanBuilder
*
* @return DynamoDbScan instance
*/
override def build(): Scan = new DynamoDbScan(connector, pushedFilters(), finalSchema)
/**
* code from com.audienceproject:spark.dynamodb
* DynamoScanBuilder
*/
override def pruneColumns(requiredSchema: StructType): Unit = {
val keyColumns = Seq(Some(connector.keySchema.hashKeyName), connector.keySchema.rangeKeyName).flatten
.flatMap(keyName => finalSchema.fields.find(_.name == keyName))
val requiredColumns = keyColumns ++ requiredSchema.fields
val newFields = finalSchema.fields.filter(requiredColumns.contains)
finalSchema = StructType(newFields)
}
/**
* code from com.audienceproject:spark.dynamodb
* DynamoScanBuilder
*
* @return array of filters
*/
override def pushFilters(filters: Array[Filter]): Array[Filter] = {
if (connector.filterPushdownEnabled) {
val (acceptedFilters, postScanFilters) = FilterPushdown.acceptFilters(filters)
this.pushedFilter = acceptedFilters
postScanFilters
} else {
filters
}
}
/**
* code from com.audienceproject:spark.dynamodb
* DynamoScanBuilder
*
* @return array of filters
*/
override def pushedFilters(): Array[Filter] = pushedFilter
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy