Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
comet-spark3_2.12.0.1.20.source-code.reference.conf Maven / Gradle / Ivy
root = "/tmp"
root = ${?COMET_ROOT}
datasets = ${root}"/datasets"
datasets = ${?COMET_DATASETS}
metadata = ${root}"/metadata"
metadata = ${?COMET_METADATA}
tmpdir = ${root}"/comet_tmp"
tmpdir = ${?COMET_TMPDIR}
archive = true
archive = ${?COMET_ARCHIVE}
launcher = airflow
launcher = simple
launcher = ${?COMET_LAUNCHER}
hive = false
hive = ${?COMET_HIVE}
grouped = false
grouped = ${?COMET_GROUPED}
analyze = true
analyze = ${?COMET_ANALYZE}
# Save Format in CSV with coalesce(1)
csv-output = false
csv-output = ${?COMET_CSV_OUTPUT}
privacy-only = false
privacy-only = ${?COMET_PRIVACY_ONLY}
default-write-format = parquet
default-write-format = ${?COMET_DEFAULT_WRITE_FORMAT}
merge-force-distinct = false
merge-force-distinct = ${?COMET_MERGE_FORCE_DISTINCT}
file-system = "file://"
file-system = ${?COMET_FS}
udfs = ${?COMET_UDFS}
metadata-file-system = ${file-system}
metadata-file-system = ${?COMET_METADATA_FS}
chewer-prefix = "comet.chewer"
chewer-prefix = ${?COMET_CHEWER_PREFIX}
row-validator-class = "com.ebiznext.comet.job.ingest.DsvIngestionUtil"
row-validator-class = ${?COMET_ROW_VALIDATOR_CLASS}
lock {
path = ${root}"/locks"
path = ${?COMET_LOCK_PATH}
ingestion-timeout = -1
ingestion-timeout = ${?COMET_LOCK_INGESTION_TIMEOUT}
metrics-timeout = -1
metrics-timeout = ${?COMET_LOCK_METRICS_TIMEOUT}
}
hadoop {
}
audit {
path = ${root}"/audit"
path = ${?COMET_AUDIT_PATH}
audit-timeout = -1
audit-timeout = ${?COMET_LOCK_AUDIT_TIMEOUT}
max-errors = 100
sink {
type = "NoneSink" # can be BigQuerySink or JdbcSink or NoneSink or EsSink
type = ${?COMET_AUDIT_SINK_TYPE}
name = "audit" // serves as dataset name for BigQuery or Elasticsearch index name
## BigQuery options
# location = "EU"
# timestamp = "_PARTITIONTIME"
# clustering = "???"
# days = 7
# require-partition-filter = false
# Jdbc options
connection = "audit"
partitions = 1
batchSize = 1000
}
}
metrics {
active = false
active = ${?COMET_METRICS_ACTIVE}
# path = ${root}"/metrics/{domain}/{schema}"
path = ${root}"/metrics/{domain}"
path = ${?COMET_METRICS_PATH}
discrete-max-cardinality = 10
discrete-max-cardinality = ${?COMET_METRICS_DISCRETE_MAX_CARDINALITY}
sink {
type = "NoneSink" # can be BigQuerySink or JdbcSink or NoneSink or EsSink
type = ${?COMET_METRICS_SINK_TYPE}
name = "metrics" // serves as dataset name for BigQuery or Elasticsearch index name
## BigQuery options
# location = "EU"
# timestamp = "_PARTITIONTIME"
# clustering = "???"
# days = 7
# require-partition-filter = false
# Jdbc options
connection = "metrics"
partitions = 1
batch-size = 1000
}
}
connections = {
"audit": {
"format" = "jdbc"
# "mode" = "Append"
options = {
"url": "jdbc:postgresql://127.0.0.1:5403/comet?user=postgres&password=ficpug-Podbid-7fobnu",
"user": "postgres",
"password": "ficpug-Podbid-7fobnu",
"driver": "org.postgresql.Driver"
# driver = "org.h2.Driver"
}
}
}
jdbc-engines {
h2 {
tables = {
"audit": {
create-sql = """CREATE TABLE IF NOT EXISTS AUDIT (
jobid VARCHAR(255) not NULL,
paths VARCHAR(255) not NULL,
domain VARCHAR(255) not NULL,
schema VARCHAR(255) not NULL,
success BOOLEAN not NULL,
count BIGINT not NULL,
countAccepted BIGINT not NULL,
countRejected BIGINT not NULL,
timestamp TIMESTAMP not NULL,
duration INTEGER not NULL,
message VARCHAR(255) not NULL
)
"""
},
"rejected": {
create-sql = """CREATE TABLE IF NOT EXISTS REJECTED (
jobid VARCHAR(255) not NULL,
timestamp TIMESTAMP not NULL,
domain VARCHAR(255) not NULL,
schema VARCHAR(255) not NULL,
error VARCHAR(255) not NULL,
path VARCHAR(255) not NULL
)
"""
},
"frequencies": {
create-sql = """CREATE TABLE IF NOT EXISTS frequencies (
attribute VARCHAR(255) not NULL,
category TEXT NULL,
count BIGINT not NULL,
frequency DOUBLE PRECISION NULL,
jobId VARCHAR(255) not NULL,
domain VARCHAR(255) not NULL,
schema VARCHAR(255) not NULL,
cometTime BIGINT not NULL,
cometStage VARCHAR(255) not NULL
)
"""
},
"continuous": {
create-sql = """CREATE TABLE IF NOT EXISTS continuous (
attribute VARCHAR(255) not NULL,
min DOUBLE PRECISION NULL,
max DOUBLE PRECISION NULL,
mean DOUBLE PRECISION NULL,
missingValues BIGINT NULL,
variance DOUBLE PRECISION NULL,
standardDev DOUBLE PRECISION NULL,
sum DOUBLE PRECISION NULL,
skewness DOUBLE PRECISION NULL,
kurtosis DOUBLE PRECISION NULL,
percentile25 DOUBLE PRECISION NULL,
median DOUBLE PRECISION NULL,
percentile75 DOUBLE PRECISION NULL,
cometMetric VARCHAR(255) not NULL,
jobId VARCHAR(255) not NULL,
domain VARCHAR(255) not NULL,
schema VARCHAR(255) not NULL,
count BIGINT not NULL,
cometTime BIGINT not NULL,
cometStage VARCHAR(255) not NULL
)
"""
},
"discrete": {
create-sql = """CREATE TABLE IF NOT EXISTS discrete (
attribute VARCHAR(255) not NULL,
countDistinct BIGINT NULL,
missingValuesDiscrete BIGINT NULL,
cometMetric VARCHAR(255) not NULL,
jobId VARCHAR(255) not NULL,
domain VARCHAR(255) not NULL,
schema VARCHAR(255) not NULL,
count BIGINT not NULL,
cometTime BIGINT not NULL,
cometStage VARCHAR(255) not NULL
)
"""
}
}
}
postgresql {
tables = {
"audit": {
create-sql = """CREATE TABLE IF NOT EXISTS audit (
jobid VARCHAR(255) not NULL,
paths VARCHAR(255) not NULL,
domain VARCHAR(255) not NULL,
schema VARCHAR(255) not NULL,
success BOOLEAN not NULL,
count BIGINT not NULL,
countAccepted BIGINT not NULL,
countRejected BIGINT not NULL,
timestamp TIMESTAMP not NULL,
duration INTEGER not NULL,
message VARCHAR(255) not NULL
)
"""
},
"rejected": {
create-sql = """CREATE TABLE IF NOT EXISTS rejected (
jobid VARCHAR(255) not NULL,
timestamp TIMESTAMP not NULL,
domain VARCHAR(255) not NULL,
schema VARCHAR(255) not NULL,
error VARCHAR(255) not NULL,
path VARCHAR(255) not NULL
)
"""
},
"frequencies": {
create-sql = """CREATE TABLE IF NOT EXISTS frequencies (
attribute VARCHAR(255) not NULL,
category TEXT NULL,
count BIGINT not NULL,
frequency DOUBLE PRECISION NULL,
jobId VARCHAR(255) not NULL,
domain VARCHAR(255) not NULL,
schema VARCHAR(255) not NULL,
cometTime BIGINT not NULL,
cometStage VARCHAR(255) not NULL
)
"""
},
"continuous": {
create-sql = """CREATE TABLE IF NOT EXISTS continuous (
attribute VARCHAR(255) not NULL,
min DOUBLE PRECISION NULL,
max DOUBLE PRECISION NULL,
mean DOUBLE PRECISION NULL,
missingValues BIGINT NULL,
variance DOUBLE PRECISION NULL,
standardDev DOUBLE PRECISION NULL,
sum DOUBLE PRECISION NULL,
skewness DOUBLE PRECISION NULL,
kurtosis DOUBLE PRECISION NULL,
percentile25 DOUBLE PRECISION NULL,
median DOUBLE PRECISION NULL,
percentile75 DOUBLE PRECISION NULL,
cometMetric VARCHAR(255) not NULL,
jobId VARCHAR(255) not NULL,
domain VARCHAR(255) not NULL,
schema VARCHAR(255) not NULL,
count BIGINT not NULL,
cometTime BIGINT not NULL,
cometStage VARCHAR(255) not NULL
)
"""
},
"discrete": {
name = "discrete"
create-sql = """CREATE TABLE IF NOT EXISTS discrete (
attribute VARCHAR(255) not NULL,
countDistinct BIGINT NULL,
missingValuesDiscrete BIGINT NULL,
cometMetric VARCHAR(255) not NULL,
jobId VARCHAR(255) not NULL,
domain VARCHAR(255) not NULL,
schema VARCHAR(255) not NULL,
count BIGINT not NULL,
cometTime BIGINT not NULL,
cometStage VARCHAR(255) not NULL
)
"""
}
}
}
}
area {
pending = "pending"
pending = ${?COMET_AREA_PENDING}
unresolved = "unresolved"
unresolved = ${?COMET_AREA_UNRESOLVED}
archive = "archive"
archive = ${?COMET_AREA_ARCHIVE}
ingesting = "ingesting"
ingesting = ${?COMET_AREA_INGESTING}
accepted = "accepted"
accepted = ${?COMET_AREA_ACCEPTED}
rejected = "rejected"
rejected = ${?COMET_AREA_REJECTED}
business = "business"
business = ${?COMET_AREA_BUSINESS}
}
privacy {
options = {
"none": "com.ebiznext.comet.privacy.No",
"hide": "com.ebiznext.comet.privacy.Hide",
"hide10X": "com.ebiznext.comet.privacy.Hide(\"X\",10)",
"approxLong20": "com.ebiznext.comet.privacy.ApproxLong(20)",
"md5": "com.ebiznext.comet.privacy.Md5",
"sha1": "com.ebiznext.comet.privacy.Sha1",
"sha256": "com.ebiznext.comet.privacy.Sha256",
"sha512": "com.ebiznext.comet.privacy.Sha512",
"initials": "com.ebiznext.comet.privacy.Initials"
}
}
elasticsearch {
active = false
active = ${?COMET_ES_ACTIVE}
options = {
"es.nodes": "localhost",
"es.port": "9200",
# net.http.auth.user = ""
# net.http.auth.pass = ""
"es.net.ssl": "false",
"es.net.ssl.cert.allow.self.signed": "false",
"es.batch.size.entries": "1000",
"es.batch.size.bytes": "1mb",
"es.batch.write.retry.count": "3",
"es.batch.write.retry.wait": "10s"
}
}
atlas {
uri = "http://127.0.0.1:21000"
uri = ${?COMET_ATLAS_URI}
user = "admin"
user = ${?COMET_ATLAS_USER}
password = "admin"
password = ${?COMET_ATLAS_PASSWORD}
owner = "system"
owner = ${?COMET_ATLAS_OWNER}
}
spark {
# sql.hive.convertMetastoreParquet = false
#extraListeners = com.hortonworks.spark.atlas.SparkAtlasEventTracker
#sql.queryExecutionListeners = com.hortonworks.spark.atlas.SparkAtlasEventTracker
#sql.streaming.streamingQueryListeners=com.hortonworks.spark.atlas.SparkAtlasStreamingQueryEventTracker
# yarn.principal = "invalid"
# yarn.keytab = "invalid"
# yarn.principal = ${?SPARK_YARN_PRINCIPAL}
# yarn.keytab = ${?SPARK_YARN_KEYTAB}
debug.maxToStringFields = 100
#master = "local[*]"
# sql.catalogImplementation="hive"
# sql.catalogImplementation="in-memory"
sql.legacy.parquet.datetimeRebaseModeInWrite = "CORRECTED"
viewsEnabled = "true"
}
# curl -v -H 'Cache-Control: no-cache' -H 'Content-Type: application/json' -XPOST localhost:8080/api/experimental/dags/comet_validator/dag_runs -d '{"conf":"{\"key\":\"value\"}"}'
airflow {
ingest = "comet_ingest"
ingest = ${?AIRFLOW_INGEST}
endpoint = "http://127.0.0.1:8080/api/experimental"
endpoint = ${?AIRFLOW_ENDPOINT}
}
internal {
# See https://spark.apache.org/docs/latest/api/java/index.html?org/apache/spark/storage/StorageLevel.html
cache-storage-level = "MEMORY_AND_DISK"
cache-storage-level = ${?COMET_INTERNAL_CACHE_STORAGE_LEVEL}
}