All Downloads are FREE. Search and download functionalities are using the official Maven repository.

json.schema.entity.data.pipeline.json Maven / Gradle / Ivy

There is a newer version: 1.6.1
Show newest version
{
  "$id": "https://open-metadata.org/schema/entity/data/pipeline.json",
  "$schema": "http://json-schema.org/draft-07/schema#",
  "title": "Pipeline",
  "$comment": "@om-entity-type",
  "description": "This schema defines the Pipeline entity. A pipeline enables the flow of data from source to destination through a series of processing steps. ETL is a type of pipeline where the series of steps Extract, Transform and Load the data.",
  "type": "object",
  "javaType": "org.openmetadata.schema.entity.data.Pipeline",
  "javaInterfaces": ["org.openmetadata.schema.EntityInterface"],
  "definitions": {
    "statusType": {
      "javaType": "org.openmetadata.schema.type.StatusType",
      "description": "Enum defining the possible Status.",
      "type": "string",
      "enum": ["Successful", "Failed", "Pending", "Skipped"],
      "javaEnums": [
        {
          "name": "Successful"
        },
        {
          "name": "Failed"
        },
        {
          "name": "Pending"
        },
        {
          "name": "Skipped"
        }
      ]
    },
    "taskStatus": {
      "type": "object",
      "javaType": "org.openmetadata.schema.type.Status",
      "description": "This schema defines a time series of the status of a Pipeline or Task.",
      "properties": {
        "name": {
          "description": "Name of the Task.",
          "type": "string"
        },
        "executionStatus": {
          "description": "Status at a specific execution date.",
          "$ref": "#/definitions/statusType"
        },
        "startTime": {
          "description": "Task start time",
          "$ref": "../../type/basic.json#/definitions/timestamp"
        },
        "endTime": {
          "description": "Task end time",
          "$ref": "../../type/basic.json#/definitions/timestamp"
        },
        "logLink": {
          "description": "Task end time",
          "type": "string",
          "format": "uri"
        }
      },
      "additionalProperties": false,
      "required": ["name", "executionStatus"]
    },
    "task": {
      "type": "object",
      "javaType": "org.openmetadata.schema.type.Task",
      "properties": {
        "name": {
          "description": "Name that identifies this task instance uniquely.",
          "type": "string"
        },
        "displayName": {
          "description": "Display Name that identifies this Task. It could be title or label from the pipeline services.",
          "type": "string"
        },
        "fullyQualifiedName": {
          "description": "A unique name that identifies a pipeline in the format 'ServiceName.PipelineName.TaskName'.",
          "type": "string"
        },
        "description": {
          "description": "Description of this Task.",
          "$ref": "../../type/basic.json#/definitions/markdown"
        },
        "sourceUrl": {
          "description": "Task URL to visit/manage. This URL points to respective pipeline service UI.",
          "$ref": "../../type/basic.json#/definitions/sourceUrl"
        },
        "downstreamTasks": {
          "description": "All the tasks that are downstream of this task.",
          "type": "array",
          "items": {
            "type": "string"
          },
          "default": null
        },
        "taskType": {
          "description": "Type of the Task. Usually refers to the class it implements.",
          "type": "string"
        },
        "taskSQL": {
          "description": "SQL used in the task. Can be used to determine the lineage.",
          "$ref": "../../type/basic.json#/definitions/sqlQuery"
        },
        "startDate": {
          "description": "start date for the task.",
          "type": "string"
        },
        "endDate": {
          "description": "end date for the task.",
          "type": "string"
        },
        "tags": {
          "description": "Tags for this task.",
          "type": "array",
          "items": {
            "$ref": "../../type/tagLabel.json"
          },
          "default": null
        },
        "owners": {
          "description": "Owners of this task.",
          "$ref": "../../type/entityReferenceList.json"
        }
      },
      "required": ["name"],
      "additionalProperties": false
    },
    "pipelineStatus": {
      "description": "Series of pipeline executions, its status and task status.",
      "type": "object",
      "properties": {
        "timestamp": {
          "description": "Timestamp where the job was executed.",
          "$ref": "../../type/basic.json#/definitions/timestamp"
        },
        "executionStatus": {
          "description": "Status at a specific execution date.",
          "$ref": "#/definitions/statusType"
        },
        "taskStatus": {
          "description": "Series of task executions and its status.",
          "type": "array",
          "items": {
            "$ref": "#/definitions/taskStatus"
          },
          "default": null
        }
      },
      "additionalProperties": false,
      "required": ["timestamp", "executionStatus"]
    }
  },
  "properties": {
    "id": {
      "description": "Unique identifier that identifies a pipeline instance.",
      "$ref": "../../type/basic.json#/definitions/uuid"
    },
    "name": {
      "description": "Name that identifies this pipeline instance uniquely.",
      "$ref": "../../type/basic.json#/definitions/entityName"
    },
    "displayName": {
      "description": "Display Name that identifies this Pipeline. It could be title or label from the source services.",
      "type": "string"
    },
    "fullyQualifiedName": {
      "description": "A unique name that identifies a pipeline in the format 'ServiceName.PipelineName'.",
      "$ref": "../../type/basic.json#/definitions/fullyQualifiedEntityName"
    },
    "description": {
      "description": "Description of this Pipeline.",
      "$ref": "../../type/basic.json#/definitions/markdown"
    },
    "dataProducts" : {
      "description": "List of data products this entity is part of.",
      "$ref" : "../../type/entityReferenceList.json"
    },
    "version": {
      "description": "Metadata version of the entity.",
      "$ref": "../../type/entityHistory.json#/definitions/entityVersion"
    },
    "updatedAt": {
      "description": "Last update time corresponding to the new version of the entity in Unix epoch time milliseconds.",
      "$ref": "../../type/basic.json#/definitions/timestamp"
    },
    "updatedBy": {
      "description": "User who made the update.",
      "type": "string"
    },
    "sourceUrl": {
      "description": "Pipeline  URL to visit/manage. This URL points to respective pipeline service UI.",
      "$ref": "../../type/basic.json#/definitions/sourceUrl"
    },
    "concurrency": {
      "description": "Concurrency of the Pipeline.",
      "type": "integer"
    },
    "pipelineLocation": {
      "description": "Pipeline Code Location.",
      "type": "string"
    },
    "startDate": {
      "description": "Start date of the workflow.",
      "$ref": "../../type/basic.json#/definitions/dateTime"
    },
    "tasks": {
      "description": "All the tasks that are part of pipeline.",
      "type": "array",
      "items": {
        "$ref": "#/definitions/task"
      },
      "default": null
    },
    "pipelineStatus": {
      "description": "Latest Pipeline Status.",
      "$ref": "#/definitions/pipelineStatus",
      "default": null
    },
    "followers": {
      "description": "Followers of this Pipeline.",
      "$ref": "../../type/entityReferenceList.json"
    },
    "tags": {
      "description": "Tags for this Pipeline.",
      "type": "array",
      "items": {
        "$ref": "../../type/tagLabel.json"
      },
      "default": null
    },
    "href": {
      "description": "Link to the resource corresponding to this entity.",
      "$ref": "../../type/basic.json#/definitions/href"
    },
    "owners": {
      "description": "Owners of this pipeline.",
      "$ref": "../../type/entityReferenceList.json"
    },
    "service": {
      "description": "Link to service where this pipeline is hosted in.",
      "$ref": "../../type/entityReference.json"
    },
    "serviceType": {
      "description": "Service type where this pipeline is hosted in.",
      "$ref": "../services/pipelineService.json#/definitions/pipelineServiceType"
    },
    "changeDescription": {
      "description": "Change that lead to this version of the entity.",
      "$ref": "../../type/entityHistory.json#/definitions/changeDescription"
    },
    "deleted": {
      "description": "When `true` indicates the entity has been soft deleted.",
      "type": "boolean",
      "default": false
    },
    "extension": {
      "description": "Entity extension data with custom attributes added to the entity.",
      "$ref": "../../type/basic.json#/definitions/entityExtension"
    },
    "scheduleInterval": {
      "description": "Scheduler Interval for the pipeline in cron format.",
      "type": "string",
      "default": null
    },
    "domain" : {
      "description": "Domain the Pipeline belongs to. When not set, the pipeline inherits the domain from the Pipeline service it belongs to.",
      "$ref": "../../type/entityReference.json"
    },
    "votes" : {
      "description": "Votes on the entity.",
      "$ref": "../../type/votes.json"
    },
    "lifeCycle": {
      "description": "Life Cycle properties of the entity",
      "$ref": "../../type/lifeCycle.json"
    },
    "sourceHash": {
      "description": "Source hash of the entity",
      "type": "string",
      "minLength": 1,
      "maxLength": 32
    }
  },
  "required": ["id", "name", "service"],
  "additionalProperties": false
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy