json.schema.metadataIngestion.databaseServiceProfilerPipeline.json Maven / Gradle / Ivy
{
"$id": "https://open-metadata.org/schema/metadataIngestion/databaseServiceProfilerPipeline.json",
"$schema": "http://json-schema.org/draft-07/schema#",
"title": "DatabaseServiceProfilerPipeline",
"description": "DatabaseService Profiler Pipeline Configuration.",
"type": "object",
"definitions": {
"profilerConfigType": {
"description": "Profiler Source Config Pipeline type",
"type": "string",
"enum": ["Profiler"],
"default": "Profiler"
}
},
"properties": {
"type": {
"description": "Pipeline type",
"$ref": "#/definitions/profilerConfigType",
"default": "Profiler"
},
"schemaFilterPattern": {
"description": "Regex to only fetch tables or databases that matches the pattern.",
"$ref": "../type/filterPattern.json#/definitions/filterPattern",
"title": "Schema Filter Pattern"
},
"tableFilterPattern": {
"description": "Regex exclude tables or databases that matches the pattern.",
"$ref": "../type/filterPattern.json#/definitions/filterPattern",
"title": "Table Filter Pattern"
},
"databaseFilterPattern": {
"description": "Regex to only fetch databases that matches the pattern.",
"$ref": "../type/filterPattern.json#/definitions/filterPattern",
"title": "Database Filter Pattern"
},
"includeViews": {
"description": "Optional configuration to turn off fetching metadata for views.",
"type": "boolean",
"default": true,
"title": "Include Views"
},
"useFqnForFiltering": {
"description": "Regex will be applied on fully qualified name (e.g service_name.db_name.schema_name.table_name) instead of raw name (e.g. table_name)",
"type": "boolean",
"default": false,
"title": "Use FQN For Filtering"
},
"generateSampleData": {
"description": "Option to turn on/off generating sample data. If enabled, profiler will ingest sample data for each table.",
"type": "boolean",
"default": true,
"title": "Generate Sample Data"
},
"computeMetrics": {
"description": "Option to turn on/off computing profiler metrics.",
"type": "boolean",
"default": true,
"title": "Compute Metrics"
},
"processPiiSensitive": {
"description": "Optional configuration to automatically tag columns that might contain sensitive information",
"type": "boolean",
"default": false,
"title": "Auto Tag PII"
},
"confidence": {
"description": "Set the Confidence value for which you want the column to be tagged as PII. Confidence value ranges from 0 to 100. A higher number will yield less false positives but more false negatives. A lower number will yield more false positives but less false negatives.",
"type": "number",
"default": 80,
"title": "PII Inference Confidence Level"
},
"profileSampleType": {
"$ref": "../entity/data/table.json#/definitions/profileSampleType",
"title": "Profile Sample Type"
},
"profileSample": {
"description": "Percentage of data or no. of rows used to compute the profiler metrics and run data quality tests",
"type": "number",
"default": null,
"title": "Profile Sample"
},
"sampleDataCount": {
"description": "Number of sample rows to ingest when 'Generate Sample Data' is enabled",
"type": "integer",
"default": 50,
"title": "Sample Data Rows Count"
},
"threadCount": {
"description": "Number of threads to use during metric computations",
"type": "number",
"default": 5,
"title": "Thread Count"
},
"timeoutSeconds": {
"description": "Profiler Timeout in Seconds",
"type": "integer",
"default": 43200,
"title": "Timeout (in sec.)"
}
},
"additionalProperties": false
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy