All Downloads are FREE. Search and download functionalities are using the official Maven repository.

json.schema.entity.services.ingestionPipelines.ingestionPipeline.json Maven / Gradle / Ivy

There is a newer version: 1.6.0-rc1
Show newest version
{
  "$id": "https://open-metadata.org/schema/entity/services/ingestionPipelines/ingestionPipeline.json",
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "IngestionPipeline",
  "description": "Ingestion Pipeline Config is used to set up a DAG and deploy. This entity is used to setup metadata/quality pipelines on Apache Airflow.",
  "type": "object",
  "javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.IngestionPipeline",
  "javaInterfaces": ["org.openmetadata.schema.EntityInterface"],
  "definitions": {
    "pipelineType": {
      "description": "Type of Pipeline - metadata, usage",
      "type": "string",
      "javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.PipelineType",
      "enum": ["metadata", "usage", "lineage", "profiler", "autoClassification", "TestSuite", "dataInsight", "elasticSearchReindex", "dbt", "application"]
    },
    "pipelineStatus": {
      "type": "object",
      "javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.PipelineStatus",
      "description": "This defines runtime status of Pipeline.",
      "properties": {
        "runId": {
          "description": "Pipeline unique run ID.",
          "type": "string"
        },
        "pipelineState": {
          "description": "Pipeline status denotes if its failed or succeeded.",
          "type": "string",
          "javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.PipelineStatusType",
          "enum": ["queued","success","failed","running","partialSuccess"]
        },
        "startDate": {
          "description": "startDate of the pipeline run for this particular execution.",
          "$ref": "../../../type/basic.json#/definitions/timestamp"
        },
        "timestamp": {
          "description": "executionDate of the pipeline run for this particular execution.",
          "$ref": "../../../type/basic.json#/definitions/timestamp"
        },
        "endDate": {
          "description": "endDate of the pipeline run for this particular execution.",
          "$ref": "../../../type/basic.json#/definitions/timestamp"
        },
        "status": {
          "description": "Ingestion Pipeline summary status. Informed at the end of the execution.",
          "$ref": "status.json#/definitions/ingestionStatus"
        }
      },
      "additionalProperties": false
    },
    "airflowConfig": {
      "description": "Properties to configure the Airflow pipeline that will run the workflow.",
      "type": "object",
      "javaType": "org.openmetadata.schema.entity.services.ingestionPipelines.AirflowConfig",
      "properties": {
        "pausePipeline": {
          "description": "pause the pipeline from running once the deploy is finished successfully.",
          "type": "boolean",
          "default": false
        },
        "concurrency": {
          "description": "Concurrency of the Pipeline.",
          "type": "integer",
          "default": 1
        },
        "startDate": {
          "description": "Start date of the pipeline.",
          "$ref": "../../../type/basic.json#/definitions/dateTime"
        },
        "endDate": {
          "description": "End Date of the pipeline.",
          "$ref": "../../../type/basic.json#/definitions/dateTime"
        },
        "pipelineTimezone": {
          "description": "Timezone in which pipeline going to be scheduled.",
          "type": "string",
          "default": "UTC"
        },
        "retries": {
          "description": "Retry pipeline in case of failure.",
          "type": "integer",
          "default": 0
        },
        "retryDelay": {
          "description": "Delay between retries in seconds.",
          "type": "integer",
          "default": 300
        },
        "pipelineCatchup": {
          "description": "Run past executions if the start date is in the past.",
          "type": "boolean",
          "default": false
        },
        "scheduleInterval": {
          "description": "Scheduler Interval for the pipeline in cron format.",
          "type": "string"
        },
        "maxActiveRuns": {
          "description": "Maximum Number of active runs.",
          "type": "integer",
          "default": 1
        },
        "workflowTimeout": {
          "description": "Timeout for the workflow in seconds.",
          "type": "integer",
          "default": null
        },
        "workflowDefaultView": {
          "description": "Default view in Airflow.",
          "type": "string",
          "default": "tree"
        },
        "workflowDefaultViewOrientation": {
          "description": "Default view Orientation in Airflow.",
          "type": "string",
          "default": "LR"
        },
        "email": {
          "description": "Email to notify workflow status.",
          "$ref": "../../../type/basic.json#/definitions/email"
        }
      },
      "additionalProperties": false
    }
  },
  "properties": {
    "id": {
      "description": "Unique identifier that identifies this pipeline.",
      "$ref": "../../../type/basic.json#/definitions/uuid"
    },
    "name": {
      "description": "Name that identifies this pipeline instance uniquely.",
      "$ref": "../../../type/basic.json#/definitions/entityName"
    },
    "displayName": {
      "description": "Display Name that identifies this Pipeline.",
      "type": "string"
    },
    "description": {
      "description": "Description of the Pipeline.",
      "$ref": "../../../type/basic.json#/definitions/markdown"
    },
    "pipelineType": {
      "$ref": "#/definitions/pipelineType"
    },
    "owners": {
      "description": "Owners of this Pipeline.",
      "$ref": "../../../type/entityReferenceList.json",
      "default": null
    },
    "fullyQualifiedName": {
      "description": "Name that uniquely identifies a Pipeline.",
      "$ref": "../../../type/basic.json#/definitions/fullyQualifiedEntityName"
    },
    "sourceConfig": {
      "$ref": "../../../metadataIngestion/workflow.json#/definitions/sourceConfig"
    },
    "openMetadataServerConnection": {
      "$ref": "../connections/metadata/openMetadataConnection.json"
    },
    "airflowConfig": {
      "$ref": "#/definitions/airflowConfig"
    },
    "service": {
      "description": "Link to the service (such as database, messaging, storage services, etc. for which this ingestion pipeline ingests the metadata from.",
      "$ref": "../../../type/entityReference.json"
    },
    "pipelineStatuses": {
      "description": "Last of executions and status for the Pipeline.",
      "$ref": "#/definitions/pipelineStatus"
    },
    "loggerLevel": {
      "description": "Set the logging level for the workflow.",
      "$ref": "../../../metadataIngestion/workflow.json#/definitions/logLevels"
    },
    "deployed": {
      "description": "Indicates if the workflow has been successfully deployed to Airflow.",
      "type": "boolean",
      "default": false
    },
    "enabled": {
      "description": "True if the pipeline is ready to be run in the next schedule. False if it is paused.",
      "type": "boolean",
      "default": true
    },
    "href": {
      "description": "Link to this ingestion pipeline resource.",
      "$ref": "../../../type/basic.json#/definitions/href"
    },
    "version": {
      "description": "Metadata version of the entity.",
      "$ref": "../../../type/entityHistory.json#/definitions/entityVersion"
    },
    "updatedAt": {
      "description": "Last update time corresponding to the new version of the entity in Unix epoch time milliseconds.",
      "$ref": "../../../type/basic.json#/definitions/timestamp"
    },
    "updatedBy": {
      "description": "User who made the update.",
      "type": "string"
    },
    "changeDescription": {
      "description": "Change that led to this version of the entity.",
      "$ref": "../../../type/entityHistory.json#/definitions/changeDescription"
    },
    "deleted": {
      "description": "When `true` indicates the entity has been soft deleted.",
      "type": "boolean",
      "default": false
    },
    "provider" : {
      "$ref": "../../../type/basic.json#/definitions/providerType"
    },
    "domain" : {
      "description": "Domain the asset belongs to. When not set, the asset inherits the domain from the parent it belongs to.",
      "$ref": "../../../type/entityReference.json"
    },
    "applicationType": {
      "description": "Type of the application when pipelineType is 'application'.",
      "type": "string"
    }
  },
  "required": [
    "name",
    "pipelineType",
    "sourceConfig",
    "airflowConfig"
  ],
  "additionalProperties": false
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy