templates.data-delivery-pyspark.persist.py.vm Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of foundation-mda Show documentation
Show all versions of foundation-mda Show documentation
Model driven architecture artifacts for aiSSEMBLE
The newest version!
#if ($step.isPersistTypeElasticsearch())
#set ($saveParam = "es_resource: str")
#elseif ($step.isPersistTypeNeo4j())
#set ($saveParam = "labels: list")
#else
#set ($saveParam = "table_name: str")
#end
#if ($step.persist.shortCollectionType)
def save_dataset(self, dataset: ${step.persist.shortCollectionType}[${step.persist.shortRecordType}], ${saveParam}) -> None:
#elseif ($step.isPersistTypeRdbms())
def get_data_source_configs(self) -> dict:
return {
"url": self.spark_rdbms_config.jdbc_url(),
"properties": {
"user": self.spark_rdbms_config.user(),
"password": self.spark_rdbms_config.password(),
"driver": self.spark_rdbms_config.jdbc_driver()
},
}
def save_dataset(self, dataset: ${step.persist.shortRecordType}, ${saveParam}) -> None:
${step.capitalizedName}Base.logger.info('Saving %s To RDBMS...' % self.descriptive_label)
mode = "${step.persist.mode}"
config = self.get_data_source_configs()
dataset.write.jdbc(
url=config.get("url"), table=table_name, mode=mode, properties=config.get("properties")
)
${step.capitalizedName}Base.logger.info('Saved %s to RDBMS' % self.descriptive_label)
#else
def save_dataset(self, dataset: ${step.persist.shortRecordType}, ${saveParam}) -> None:
#end
#if (!$step.isPersistTypeRdbms())
"""
Saves a dataset.
"""
#end
#if ($step.isPersistTypeHive())
${step.capitalizedName}Base.logger.info('Saving %s To Hive...' % self.descriptive_label)
dataset.write.format('hive').mode('${step.persist.mode}').saveAsTable(table_name)
${step.capitalizedName}Base.logger.info('Saved %s to Hive' % self.descriptive_label)
#elseif ($step.isPersistTypeDeltaLake())
${step.capitalizedName}Base.logger.info('Saving %s To Delta Lake...' % self.descriptive_label)
path = self.delta_output_directory + table_name
dataset.write.format('delta').mode('${step.persist.mode}').save(path)
# PySpark 3.0 does not have a tableExists method
if table_name not in [t.name for t in self.spark.catalog.listTables()]:
self.spark.catalog.createTable(table_name, path, "delta")
${step.capitalizedName}Base.logger.info('Saved %s to Delta Lake' % self.descriptive_label)
#elseif ($step.isPersistTypeElasticsearch())
${step.capitalizedName}Base.logger.info('Saving %s To Elasticsearch...' % self.descriptive_label)
options = self.spark_elasticsearch_config.get_es_configs()
dataset.write.format('es').options(**options).mode('${step.persist.mode}').save(es_resource)
${step.capitalizedName}Base.logger.info('Saved %s to Elasticsearch' % self.descriptive_label)
#elseif ($step.isPersistTypeNeo4j())
${step.capitalizedName}Base.logger.info('Saving %s To Neo4j...' % self.descriptive_label)
options = self.spark_neo4j_config.get_spark_options()
dataset.write \
.format(SparkNeo4jConfig.NEO4J_FORMAT) \
.options(**options) \
.option(SparkNeo4jConfig.LABELS_OPTION, ':'.join(labels)) \
.mode('${step.persist.mode}') \
.save()
${step.capitalizedName}Base.logger.info('Saved %s to Neo4j' % self.descriptive_label)
#elseif (!$step.isPersistTypeRdbms())
${step.capitalizedName}Base.logger.warn('Persist type for test "${step.persist.type}" is not yet supported by the aiSSEMBLE Solution Baseline!')
${step.capitalizedName}Base.logger.warn('Please encode persistence logic manually by overriding save_dataset(..)')
#end
© 2015 - 2025 Weber Informatics LLC | Privacy Policy