json.schema.entity.data.pipeline.json Maven / Gradle / Ivy
{
"$id": "https://open-metadata.org/schema/entity/data/pipeline.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "Pipeline",
"$comment": "@om-entity-type",
"description": "This schema defines the Pipeline entity. A pipeline enables the flow of data from source to destination through a series of processing steps. ETL is a type of pipeline where the series of steps Extract, Transform and Load the data.",
"type": "object",
"javaType": "org.openmetadata.schema.entity.data.Pipeline",
"javaInterfaces": ["org.openmetadata.schema.EntityInterface"],
"definitions": {
"statusType": {
"javaType": "org.openmetadata.schema.type.StatusType",
"description": "Enum defining the possible Status.",
"type": "string",
"enum": ["Successful", "Failed", "Pending", "Skipped"],
"javaEnums": [
{
"name": "Successful"
},
{
"name": "Failed"
},
{
"name": "Pending"
},
{
"name": "Skipped"
}
]
},
"taskStatus": {
"type": "object",
"javaType": "org.openmetadata.schema.type.Status",
"description": "This schema defines a time series of the status of a Pipeline or Task.",
"properties": {
"name": {
"description": "Name of the Task.",
"type": "string"
},
"executionStatus": {
"description": "Status at a specific execution date.",
"$ref": "#/definitions/statusType"
},
"startTime": {
"description": "Task start time",
"$ref": "../../type/basic.json#/definitions/timestamp"
},
"endTime": {
"description": "Task end time",
"$ref": "../../type/basic.json#/definitions/timestamp"
},
"logLink": {
"description": "Task end time",
"type": "string",
"format": "uri"
}
},
"additionalProperties": false,
"required": ["name", "executionStatus"]
},
"task": {
"type": "object",
"javaType": "org.openmetadata.schema.type.Task",
"properties": {
"name": {
"description": "Name that identifies this task instance uniquely.",
"type": "string"
},
"displayName": {
"description": "Display Name that identifies this Task. It could be title or label from the pipeline services.",
"type": "string"
},
"fullyQualifiedName": {
"description": "A unique name that identifies a pipeline in the format 'ServiceName.PipelineName.TaskName'.",
"type": "string"
},
"description": {
"description": "Description of this Task.",
"$ref": "../../type/basic.json#/definitions/markdown"
},
"sourceUrl": {
"description": "Task URL to visit/manage. This URL points to respective pipeline service UI.",
"$ref": "../../type/basic.json#/definitions/sourceUrl"
},
"downstreamTasks": {
"description": "All the tasks that are downstream of this task.",
"type": "array",
"items": {
"type": "string"
},
"default": null
},
"taskType": {
"description": "Type of the Task. Usually refers to the class it implements.",
"type": "string"
},
"taskSQL": {
"description": "SQL used in the task. Can be used to determine the lineage.",
"$ref": "../../type/basic.json#/definitions/sqlQuery"
},
"startDate": {
"description": "start date for the task.",
"type": "string"
},
"endDate": {
"description": "end date for the task.",
"type": "string"
},
"tags": {
"description": "Tags for this task.",
"type": "array",
"items": {
"$ref": "../../type/tagLabel.json"
},
"default": null
},
"owners": {
"description": "Owners of this task.",
"$ref": "../../type/entityReferenceList.json"
}
},
"required": ["name"],
"additionalProperties": false
},
"pipelineStatus": {
"description": "Series of pipeline executions, its status and task status.",
"type": "object",
"properties": {
"timestamp": {
"description": "Timestamp where the job was executed.",
"$ref": "../../type/basic.json#/definitions/timestamp"
},
"executionStatus": {
"description": "Status at a specific execution date.",
"$ref": "#/definitions/statusType"
},
"taskStatus": {
"description": "Series of task executions and its status.",
"type": "array",
"items": {
"$ref": "#/definitions/taskStatus"
},
"default": null
}
},
"additionalProperties": false,
"required": ["timestamp", "executionStatus"]
}
},
"properties": {
"id": {
"description": "Unique identifier that identifies a pipeline instance.",
"$ref": "../../type/basic.json#/definitions/uuid"
},
"name": {
"description": "Name that identifies this pipeline instance uniquely.",
"$ref": "../../type/basic.json#/definitions/entityName"
},
"displayName": {
"description": "Display Name that identifies this Pipeline. It could be title or label from the source services.",
"type": "string"
},
"fullyQualifiedName": {
"description": "A unique name that identifies a pipeline in the format 'ServiceName.PipelineName'.",
"$ref": "../../type/basic.json#/definitions/fullyQualifiedEntityName"
},
"description": {
"description": "Description of this Pipeline.",
"$ref": "../../type/basic.json#/definitions/markdown"
},
"dataProducts" : {
"description": "List of data products this entity is part of.",
"$ref" : "../../type/entityReferenceList.json"
},
"version": {
"description": "Metadata version of the entity.",
"$ref": "../../type/entityHistory.json#/definitions/entityVersion"
},
"updatedAt": {
"description": "Last update time corresponding to the new version of the entity in Unix epoch time milliseconds.",
"$ref": "../../type/basic.json#/definitions/timestamp"
},
"updatedBy": {
"description": "User who made the update.",
"type": "string"
},
"sourceUrl": {
"description": "Pipeline URL to visit/manage. This URL points to respective pipeline service UI.",
"$ref": "../../type/basic.json#/definitions/sourceUrl"
},
"concurrency": {
"description": "Concurrency of the Pipeline.",
"type": "integer"
},
"pipelineLocation": {
"description": "Pipeline Code Location.",
"type": "string"
},
"startDate": {
"description": "Start date of the workflow.",
"$ref": "../../type/basic.json#/definitions/dateTime"
},
"tasks": {
"description": "All the tasks that are part of pipeline.",
"type": "array",
"items": {
"$ref": "#/definitions/task"
},
"default": null
},
"pipelineStatus": {
"description": "Latest Pipeline Status.",
"$ref": "#/definitions/pipelineStatus",
"default": null
},
"followers": {
"description": "Followers of this Pipeline.",
"$ref": "../../type/entityReferenceList.json"
},
"tags": {
"description": "Tags for this Pipeline.",
"type": "array",
"items": {
"$ref": "../../type/tagLabel.json"
},
"default": null
},
"href": {
"description": "Link to the resource corresponding to this entity.",
"$ref": "../../type/basic.json#/definitions/href"
},
"owners": {
"description": "Owners of this pipeline.",
"$ref": "../../type/entityReferenceList.json"
},
"service": {
"description": "Link to service where this pipeline is hosted in.",
"$ref": "../../type/entityReference.json"
},
"serviceType": {
"description": "Service type where this pipeline is hosted in.",
"$ref": "../services/pipelineService.json#/definitions/pipelineServiceType"
},
"changeDescription": {
"description": "Change that lead to this version of the entity.",
"$ref": "../../type/entityHistory.json#/definitions/changeDescription"
},
"deleted": {
"description": "When `true` indicates the entity has been soft deleted.",
"type": "boolean",
"default": false
},
"extension": {
"description": "Entity extension data with custom attributes added to the entity.",
"$ref": "../../type/basic.json#/definitions/entityExtension"
},
"scheduleInterval": {
"description": "Scheduler Interval for the pipeline in cron format.",
"type": "string",
"default": null
},
"domain" : {
"description": "Domain the Pipeline belongs to. When not set, the pipeline inherits the domain from the Pipeline service it belongs to.",
"$ref": "../../type/entityReference.json"
},
"votes" : {
"description": "Votes on the entity.",
"$ref": "../../type/votes.json"
},
"lifeCycle": {
"description": "Life Cycle properties of the entity",
"$ref": "../../type/lifeCycle.json"
},
"sourceHash": {
"description": "Source hash of the entity",
"type": "string",
"minLength": 1,
"maxLength": 32
}
},
"required": ["id", "name", "service"],
"additionalProperties": false
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy