All Downloads are FREE. Search and download functionalities are using the official Maven repository.

templates.pipeline.base.py.vm Maven / Gradle / Ivy

from krausening.logging import LogManager
## NB: this template is currently used for both data delivery and machine learning, because the needs from each overlap
## significantly.  If we ever need more specialized functionality that differs between the two, we should just split this
## into two templates.
#if ($pipeline.getDataLineage() || $pipeline.isModelLineageSupportNeeded())
from openlineage.client.facet import ParentRunFacet
from uuid import uuid4, UUID
from aissemble_data_lineage import Emitter, LineageUtil
#end

class PipelineBase:
    """
    Performs pipeline level process for ${pipeline.capitalizedName}.

    GENERATED CODE - DO NOT MODIFY

    Generated from: ${templateName}
    """

    _instance = None
#if ($pipeline.getDataLineage() || $pipeline.isModelLineageSupportNeeded())
    _pipeline_run_id: UUID = None
    _pipeline_job_namespace: str = "${pipeline.capitalizedName}"
    _pipeline_name: str = "${pipeline.capitalizedName}"
    _pipeline_event_started = False
    _emitter: Emitter = None
    _lineage_util = LineageUtil()
#end
    logger = LogManager.get_instance().get_logger('PipelineBase')


    def __new__(cls):
        """
        Create a singleton class for pipeline level process
        """
        if cls._instance is None:
            print("Creating the PipelineBase")
            cls._instance = super(PipelineBase, cls).__new__(cls)
        return cls._instance

#if ($pipeline.getDataLineage() || $pipeline.isModelLineageSupportNeeded())
    def record_pipeline_lineage_complete_event(self):
        """
        Record a pipeline lineage Complete event
        """
        if self._pipeline_event_started:
            if self._emitter is None:
                self._emitter = Emitter()

            self._lineage_util.record_lineage(self._emitter, self._lineage_util.create_complete_run_event(run_id=self.get_pipeline_run_id(), job_name=self._pipeline_name, default_namespace=self._pipeline_job_namespace))
            self._pipeline_event_started = False
            self._pipeline_run_id = None
            self.logger.info('Complete pipeline job run..')
        else:
            self.logger.warn("Pipeline hasn't recorded a lineage start event")

    def record_pipeline_lineage_fail_event(self):
        """
        Record a pipeline lineage Fail event
        """
        if self._pipeline_event_started:
            if self._emitter is None:
                self._emitter = Emitter()

            self._lineage_util.record_lineage(self._emitter, self._lineage_util.create_fail_run_event(run_id=self.get_pipeline_run_id(), job_name=self._pipeline_name, default_namespace=self._pipeline_job_namespace))
            self._pipeline_event_started = False
            self._pipeline_run_id = None
            self.logger.info('Fail pipeline job run..')
        else:
            self.logger.warn("Pipeline hasn't recorded a lineage start event")


    def record_pipeline_lineage_start_event(self):
        """
        Record a pipeline lineage Start event
        """
        if self._emitter is None:
            self._emitter = Emitter()

        # always make sure one job run at a time
        if not self._pipeline_event_started:
            run_event = self._lineage_util.create_start_run_event(run_id=self.get_pipeline_run_id(), job_name=self._pipeline_name, default_namespace=self._pipeline_job_namespace)
            self._pipeline_job_namespace = run_event.job.get_open_lineage_job().namespace
            self._lineage_util.record_lineage(self._emitter, run_event)
            self._pipeline_event_started = True
            self.logger.info('Start pipeline job run..')
        else:
            self.logger.warn("Pipeline has recorded a lineage start event");



    def get_pipeline_run_id(self) -> UUID:
        """
        Get the pipeline run id

        Returns:
            pipeline run id
        """
        if self._pipeline_run_id is None:
            self._pipeline_run_id = uuid4()
        return self._pipeline_run_id


    def get_pipeline_name(self) -> str:
        """
        Get the pipeline name

        Returns:
            pipeline name
        """
        return self._pipeline_name


    def get_pipeline_job_namespace(self) -> str:
        """
        Get the pipeline Job's namespace

        Returns:
            Namespace for the pipeline job
        """

        return self._pipeline_job_namespace


    def get_pipeline_run_as_parent_run_facet(self) -> ParentRunFacet:
        """
        Get the pipeline run event information as a ParentRunFacet

        Returns:
            ParentRunFacet created from pipeline run event information
        """

        if self._pipeline_run_id is not None:
            return ParentRunFacet(
                    run={"runId": str(self._pipeline_run_id)},
                    job={
                        "namespace": self._pipeline_job_namespace,
                        "name": self._pipeline_name,
                    },
                )
#end









© 2015 - 2025 Weber Informatics LLC | Privacy Policy