All Downloads are FREE. Search and download functionalities are using the official Maven repository.

line.agent.spark.agent-core_2.12.2.2.0.source-code.spline.default.yaml Maven / Gradle / Ivy

#
# Copyright 2017 ABSA Group Limited
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# ===================================================================================================
#                      THIS FILE CONTAINS DEFAULT SPLINE SPARK AGENT PROPERTIES.
# ===================================================================================================

spline:

  # Spline mode: ENABLED|DISABLED
  mode: ENABLED

  # The version of UUID that is used for ExecutionPlan ID
  # (DON'T MODIFY UNLESS YOU UNDERSTAND THE IMPLICATIONS)
  internal.execPlan.uuid.version: 5

  # Should the agent capture failed executions:
  # - NONE        (only capture successful executions)
  # - NON_FATAL   (capture successful executions, and failed executions, but only when the error is non-fatal)
  # - ALL         (capture all executions regardless of the error type)
  sql.failure.capture: NON_FATAL

  # Strategy for dealing with writes in ignore mode that doesn't provide info whether the file was actually written
  IWDStrategy:
    ~: default
    default:
      className: za.co.absa.spline.harvester.iwd.DefaultIgnoredWriteDetectionStrategy
      # Options: CAPTURE_LINEAGE | IGNORE_LINEAGE
      onMissingMetrics: IGNORE_LINEAGE

  # ===========================================
  # Lineage Dispatchers
  # ===========================================

  # Root lineage dispatcher.
  # Use one the logical dispatchers names registered below (http, kafka, console etc), or your custom dispatcher name
  lineageDispatcher:
    ~: http

    # -------------------------------------------
    # HTTP dispatcher
    # -------------------------------------------
    http:
      className: za.co.absa.spline.harvester.dispatcher.HttpLineageDispatcher
      # Base URL of the Spline Producer REST API endpoint
      producer.url:
      timeout.connection: 2000
      timeout.read: 120000
      disableSslValidation: false

      # Spline Producer API version to use.
      # If unspecified, the protocol version is determined from the endpoint handshake response.
      # If unspecified and a custom (non-Spline) endpoint is used, the Producer API version is automatically set to 1 (the oldest one supported).
      apiVersion: # 1 | 1.1 | LATEST

      # Should the payload be compressed.
      # If unspecified, the request compression is negotiated with the endpoint, otherwise it's disabled.
      requestCompression: # true | false

      # Authentication config
      authentication:
        type: NONE # NONE | OAUTH
        # For the OAUTH type the following properties should be set:
        # ---
        # grantType: client_credentials
        # clientId: 
        # clientSecret: 
        # scope:  (optional)
        # tokenUrl: 

      # Additional http headers to be sent by dispatcher
      header:
      #  X-CUSTOM-HEADER: custom-header-value

    # -------------------------------------------
    # Kafka dispatcher
    # -------------------------------------------\
    kafka:
      className: za.co.absa.spline.harvester.dispatcher.KafkaLineageDispatcher
      # Spline Producer API version
      apiVersion: "1.2"
      # producer configs as defined by kafka (bootstrap.servers, key.serializer, etc) all kafka configs are supported
      producer.bootstrap.servers:
      producer.key.serializer: org.apache.kafka.common.serialization.StringSerializer
      producer.value.serializer: org.apache.kafka.common.serialization.StringSerializer
      producer.max.in.flight.requests.per.connection: 1
      # topic name
      topic:

    # -------------------------------------------
    # Console dispatcher
    # -------------------------------------------
    console:
      className: za.co.absa.spline.harvester.dispatcher.ConsoleLineageDispatcher
      # (OUT | ERR) - whether the lineage data should be sent to standard output or standard error
      stream: OUT

    # -------------------------------------------
    # Logging dispatcher
    # -------------------------------------------
    logging:
      className: za.co.absa.spline.harvester.dispatcher.LoggingLineageDispatcher
      # (ERROR | WARN | INFO | DEBUG | TRACE) - the logging level on which the lineage data should be logged
      level: INFO

    # -------------------------------------------
    # Composite dispatcher
    # -------------------------------------------
    composite:
      className: za.co.absa.spline.harvester.dispatcher.CompositeLineageDispatcher
      # A comma separated logical dispatcher names to which the lineage data will be delegated
      dispatchers:
      # (true | false) - whether the dispatcher should fail fast on the first error occurred, or log it and continue
      failOnErrors: false

    # -------------------------------------------
    # Fallback dispatcher
    # -------------------------------------------
    fallback:
      className: za.co.absa.spline.harvester.dispatcher.FallbackLineageDispatcher
      # dispatcher to use by default (for example: http)
      primaryDispatcher:
      # dispatcher that will be used only when primary dispatcher fails to send the data (for example: console)
      fallbackDispatcher:

    # -------------------------------------------
    # HDFS dispatcher (experimental, tailored for specific use case, not recommended for general production usage)
    # -------------------------------------------
    hdfs:
      className: za.co.absa.spline.harvester.dispatcher.HDFSLineageDispatcher
      # Output lineage file will be created alongside the target data file
      fileName: _LINEAGE
      # the size of the buffer to be used
      fileBufferSize: 4096
      # file/directory permissions, provided as umask string, either in octal or symbolic format
      filePermissions: 777

    # -------------------------------------------
    # Open Lineage HTTP dispatcher
    # -------------------------------------------
    httpOpenLineage:
      className: za.co.absa.spline.harvester.dispatcher.HttpOpenLineageDispatcher
      timeout.connection: 1000
      timeout.read: 20000
      disableSslValidation: false
      namespace: default
      # Spline Producer API version
      apiVersion: "1.2"
      # Base URL of the Spline Producer REST API endpoint
      api.url:
      # Authentication config
      authentication:
        type: NONE # NONE | OAUTH
        # For the OAUTH type the following properties should be set:
        # ---
        # grantType: client_credentials
        # clientId: 
        # clientSecret: 
        # scope:  (optional)
        # tokenUrl: 


    # -------------------------------------------
    # User Custom dispatcher (example)
    # -------------------------------------------
    # Fully specified class name implementing {LineageDispatcher} trait.
    # The class must have a constructor with a single parameter of type {org.apache.commons.configuration.Configuration}

    # my-dispatcher:
    #   className: org.example.spline.MyDispatcherImpl
    #   prop1: value1
    #   prop2: value2

  # ===========================================
  # Post-Processing Filters
  # ===========================================

  # Root post-processing filter.
  # Note:
  #   When overriding the root filter the default filters (like dsPasswordReplace for example) will not be automatically applied.
  #   If you want to keep default filters active and only add your own filter to the chain then you must use a composite filter,
  #   and add the "default" to the chain explicitly.
  #   For example:
  #     spline.postProcessingFilter=composite
  #     spline.postProcessingFilter.composite.filters=yourFilter, default
  postProcessingFilter:
    ~: default

    # -------------------------------------------
    # Default Filter
    # -------------------------------------------
    default:
      className: za.co.absa.spline.harvester.postprocessing.CompositePostProcessingFilter
      filters:
        - dsPasswordReplace

    # -------------------------------------------
    # Composite Filter
    # -------------------------------------------
    composite:
      className: za.co.absa.spline.harvester.postprocessing.CompositePostProcessingFilter

      # A comma separated list of user lineage filters {za.co.absa.spline.harvester.postprocessing.PostProcessingFilter}
      # The classes must have a constructor with a single parameter of type {org.apache.commons.configuration.Configuration}
      #
      # Example:
      # spline.postProcessingFilter.composite.filters=yourCustomFilter, otherFilter
      filters:

    # -------------------------------------------
    # Password Replacing Filter
    # -------------------------------------------
    # This filter replaces password occurrences in the data source URIs
    dsPasswordReplace:
      className: za.co.absa.spline.harvester.postprocessing.DataSourcePasswordReplacingFilter
      replacement: '*****'
      nameRegexes:
        - (?i)password
        - (?i)passphrase
      valueRegexes:
        - (?i)[?&;](?:password|passphrase)=([^&;]*)
        - /[^:]*:([^@]*)@(?:\w+\.)*\w+

    # -------------------------------------------
    # User extra metadata capturing filter
    # -------------------------------------------
    userExtraMeta:
      className: za.co.absa.spline.harvester.postprocessing.metadata.MetadataCollectingFilter
      # rules are defined in a JSON format
      # can be a json string or url to json file
      rules: { }

  # Plugins configuration.
  # The `key` is the fully specified plugin class name.
  # For example:
  # ---
  #  plugins:
  #    com.example.MyPlugin:
  #      prop1: foo
  #      prop2: bar
  plugins:
    za.co.absa.spline.harvester.plugin.embedded.NonPersistentActionsCapturePlugin:
      enabled: false
      funcNames:
        - head
        - count
        - collect
        - collectAsList
        - toLocalIterator




© 2015 - 2024 Weber Informatics LLC | Privacy Policy