r.0.27.1.source-code.secor.common.properties Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of secor Show documentation
Show all versions of secor Show documentation
Kafka to s3/gs/swift logs exporter
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
############
# MUST SET #
############
# Regular expression matching names of consumed topics.
secor.kafka.topic_filter=.*
secor.kafka.topic_blacklist=
# Choose what to fill according to the service you are using
# in the choice option you can fill S3, GS, Swift or Azure
cloud.service=S3
# AWS authentication credentials.
# Leave empty if using IAM role-based authentication with s3a filesystem.
aws.access.key=
aws.secret.key=
# Session token only required if using temporary S3 access keys
aws.session.token=
aws.role=
# Optional Proxy Setting. Set to true to enable proxy
# Only applicable to S3UploadManager
aws.proxy.isEnabled=false
aws.proxy.http.host=
aws.proxy.http.port=
################
# END MUST SET #
################
# AWS region or endpoint. region should be a known region name (eg.
# us-east-1). endpoint should be a known S3 endpoint url. If neither
# are specified, then the default region (us-east-1) is used. If both
# are specified then endpoint is used.
#
# Only apply if the the S3UploadManager is used - see
# secor.upload.manager.class.
#
# http://docs.aws.amazon.com/general/latest/gr/rande.html#s3_region
aws.region=
aws.endpoint=
# Toggle the AWS S3 client between virtual host style access and path style
# access. See http://docs.aws.amazon.com/AmazonS3/latest/dev/VirtualHosting.html
aws.client.pathstyleaccess=false
###########################
# START AWS S3 ENCRYPTION #
###########################
# AWS specify type of server-side encryption, if any
# set to S3 to enable S3-managed encryption
# set to KMS to enable AWS KMS-managed encryption (see aws.sse.kms.key)
# set to customer to enable customer-managed encryption (see aws.sse.customer.key)
# set empty to disable encryption
aws.sse.type=
# Key to use for S3 server-side encryption, base64-encoded
# Note: requires aws.sse.type to be set to customer to be used
aws.sse.customer.key=
# KMS Key to use for S3 server-side encryption, base64-encoded
# Leave empty to use default generated key
# Note: requires aws.sse.type to be set to KMS to be used
aws.sse.kms.key=
#########################
# END AWS S3 ENCRYPTION #
#########################
# Hadoop filesystem to use. Choices are s3n or s3a.
# See https://wiki.apache.org/hadoop/AmazonS3 for details.
secor.s3.filesystem=s3n
# Swift config, MUST configure if cloud.service=Swift
# Swift Login Details:
swift.use.get.auth=true
swift.auth.url=
swift.tenant=
swift.username=
swift.port=8080
swift.public=true
# only needed if "swift.use.get.auth" = false
swift.password=
# only needed if "swift.use.get.auth" = true
swift.api.key=
# GS config, MUST configure if gcloud.service=GS
# Name of the Google cloud storage bucket where log files are stored.
secor.gs.bucket=secor_gs
# Google cloud storage path where files are stored within the bucket.
secor.gs.path=data
# Use direct uploads
# WARNING: disables resumable uploads, files are uploaded in a single request
# This may help prevent IOException: insufficient data written,
# see https://github.com/pinterest/secor/issues/177
# https://cloud.google.com/storage/docs/json_api/v1/how-tos/upload
secor.gs.upload.direct=false
# Application credentials configuration file
# https://developers.google.com/identity/protocols/application-default-credentials
# It can be empty when secor running in Google Cloud VMs with proper scopes
secor.gs.credentials.path=
# Zookeeper config.
zookeeper.session.timeout.ms=3000
zookeeper.sync.time.ms=200
# Zookeeper path (chroot) under which secor data will be placed.
secor.zookeeper.path=/
# Impacts how frequently the upload logic is triggered if no messages are delivered.
kafka.consumer.timeout.ms=10000
# Where consumer should read from if no committed offset in zookeeper.
# "smallest" -> read from earliest offset
# "largest" -> read from latest offset
# Always use "smallest" unless you know what you're doing and are willing to risk
# data loss for new topics or topics whose number of partitions has changed.
# See the kafka docs for "auto.offset.reset".
kafka.consumer.auto.offset.reset=smallest
# Same as old configuration. Except accepted values are earliest and latest instead of smallest
# and largest
kafka.new.consumer.auto.offset.reset=earliest
# Comma-separated list of topics to consume. Please note that this is not a regular expression.
# If that's what you want, you can use "secor.kafka.topic_filter" instead.
kafka.new.consumer.topic.list=
kafka.new.consumer.poll.timeout.seconds=10
kafka.new.consumer.request.timeout.ms=
kafka.new.consumer.ssl.key.password=
kafka.new.consumer.ssl.keystore.location=
kafka.new.consumer.ssl.keystore.password=
kafka.new.consumer.ssl.truststore.location=
kafka.new.consumer.ssl.truststore.password=
kafka.new.consumer.isolation.level=
kafka.new.consumer.max.poll.records=
kafka.new.consumer.sasl.client.callback.handler.class=
kafka.new.consumer.sasl.jaas.config=
kafka.new.consumer.sasl.kerberos.service.name=
kafka.new.consumer.sasl.login.callback.handler.class=
kafka.new.consumer.sasl.login.class=
kafka.new.consumer.sasl.mechanism=
kafka.new.consumer.security.protocol=
kafka.new.consumer.ssl.enabled.protocols=
kafka.new.consumer.ssl.keystore.type=
kafka.new.consumer.ssl.protocol=
kafka.new.consumer.ssl.provider=
kafka.new.consumer.ssl.truststore.type=
kafka.new.consumer.partition.assignment.strategy.class=
# Choose between range and roundrobin partition assignment strategy for kafka
# high level consumers. Check PartitionAssignor.scala in kafa 821 module for
# the differences between the two.
# In kafka 811, only range strategy is supported.
kafka.partition.assignment.strategy=range
# Max number of retries during rebalance.
kafka.rebalance.max.retries=
# Rebalance backoff.
kafka.rebalance.backoff.ms=
# Kafka consumer receive buffer size (socket.receive.buffer.bytes)
kafka.socket.receive.buffer.bytes=
# Kafka fetch max size (fetch.message.max.bytes)
kafka.fetch.message.max.bytes=
# Kafka fetch min bytes (fetch.fetch.min.bytes)
kafka.fetch.min.bytes=
kafka.fetch.max.bytes=
# Kafka fetch max wait ms (fetch.max.wait.ms)
kafka.fetch.wait.max.ms=
# Port of the broker serving topic partition metadata.
kafka.seed.broker.port=9092
# Zookeeper path at which kafka is registered. In Zookeeper parlance, this is referred
# to as the chroot.
kafka.zookeeper.path=/
#URL of a Confluent Schema Registry: https://docs.confluent.io/current/schema-registry/docs/index.html
#Only acquired used for decoding Avro messages
schema.registry.url=
# Store offset in zookeeper and kafka consumer topic.
# Only used if kafka.offsets.storage is set to "kafka"
# http://kafka.apache.org/documentation.html#oldconsumerconfigs
# Possible values: true or false
kafka.dual.commit.enabled=true
# Storage offset.
# Possible values: "zookeeper" to read offset from zookeeper or "kafka" to read offset from kafka consumer topic
kafka.offsets.storage=zookeeper
include=kafka.properties
# Secor generation is a version that should be incremented during non-backwards-compatible
# Secor releases. Generation number is one of the components of generated log file names.
# Generation number makes sure that outputs of different Secor versions are isolated.
secor.generation=1
# Number of consumer threads per Secor process.
secor.consumer.threads=7
# Consumption rate limit enforced at the process level (not a consumer-thread level).
secor.messages.per.second=10000
# Used by the "backup" consumer group only.
# Number of continuous message offsets that constitute a single offset= partition on s3.
# Example:
# if set to 10,
# messages with offsets 0 to 9 will be written to s3 path s3n://.../offset=0/...
# messages with offsets 10 to 19 will be written to s3 path s3n://.../offset=10/...
# ...
secor.offsets.per.partition=10000000
secor.offsets.prefix=offset=
# How long does it take for secor to forget a topic partition. Applies to stats generation only.
secor.topic_partition.forget.seconds=600
# Setting the partitioner to use hourly partition
# By default, the partitioner will do daily partition, so the data will be
# written into
# s3n://.../topic/dt=2015-07-07/
# If this parameter is set to true, the data will be written into
# s3n://.../topic/dt=2015-07-07/hr=02
# The hour folder ranges from 00 to 23
partitioner.granularity.hour=false
partitioner.granularity.minute=false
partitioner.granularity.date.prefix=dt=
partitioner.granularity.hour.prefix=hr=
partitioner.granularity.minute.prefix=min=
partitioner.granularity.date.format=yyyy-MM-dd
partitioner.granularity.hour.format=HH
partitioner.granularity.minute.format=mm
# how many seconds should the finalizer wait to finalize a partition
partitioner.finalizer.delay.seconds=3600
# During partition finalization, the finalizer will start from the last
# time partition (e.g. dt=2015-07-17) and traverse backwards for n
# partition periods (e.g. dt=2015-07-16, dt=2015-07-15 ...)
# This parameter controls how many partition periods to traverse back
# The default is 10
# secor.finalizer.lookback.periods=10
# If greater than 0, upon startup Secor will clean up directories and files under secor.local.path
# that are older than this value.
secor.local.log.delete.age.hours=-1
# Secor comes with a tool that adds Hive partitions for finalized topics. Currently, we support
# only Hive clusters accessible through Qubole. The token gives access to the Qubole API.
# It is available at https://api.qubole.com/users/edit
qubole.api.token=
# hive tables are generally named after the topics. For instance if the topic
# is request_log the hive table is also called request_log. If you want this
# to be pinlog_request_log you can set this config to "pinlog_". This affects
# all topics.
hive.table.prefix=
# You can also name your hive table directly if your hive table doesn't
# follow the pattern of
# E.g. hive.table.name.topic1=table1 to indicate that hive table for
# kafka topic will be named
# Secor can export stats such as consumption lag (in seconds and offsets) per topic partition.
# Leave empty to disable this functionality.
tsdb.hostport=
# Regex of topics that are not exported to TSDB.
monitoring.blacklist.topics=
# Prefix of exported stats.
monitoring.prefix=secor
# Monitoring interval.
# Set to 0 to disable - the progress monitor will run once and exit.
monitoring.interval.seconds=0
# Secor can export stats to statsd such as consumption lag (in seconds and offsets) per topic partition.
# Leave empty to disable this functionality.
statsd.hostport=
# Thrift protocol class. It applies to timestamp extractor below and parquet output for thrift messages.
# TBinaryProtocol by default
secor.thrift.protocol.class=
# Thrift message class. It applies to parquet output.
# If all Kafka topics transfer the same thrift message type, set secor.thrift.message.class.*=
secor.thrift.message.class.*=
# If true, the consumer group will be the initial prefix of all
# exported metrics, before `monitoring.prefix` (if set).
#
# Setting to false and use monitoring.prefix can lead to nice paths.
# For example,
# secor.kafka.group = secor_hr_partition
# monitoring.prefix = secor.hr
# statsd.prefixWithConsumerGroup = false
# => secor.hr.lag.offsets..
#
# secor.kafka.group = secor_hr_partition
# monitoring.prefix = secor
# statsd.prefixWithConsumerGroup = true
# => secor_hr_partition.secor.lag.offsets..
statsd.prefixWithConsumerGroup=true
# Name of field that contains timestamp for JSON, MessagePack, or Thrift message parser. (1405970352123)
message.timestamp.name=timestamp
# Separator for defining message.timestamp.name in a nested structure. E.g.
# {"meta_data": {"created": "1405911096123", "last_modified": "1405912096123"}, "data": "test"}
# message.timestamp.name=meta_data.created
# message.timestamp.name.separator=.
message.timestamp.name.separator=
# Field ID of the field that contains timestamp for Thrift message parser.
# N.B. setting this past 1 will come with a performance penalty
message.timestamp.id=1
# Data type of the timestamp field for thrift message parser.
# Supports i64 and i32.
message.timestamp.type=i64
# Name of field that contains a timestamp, as a date Format, for JSON. (2014-08-07, Jul 23 02:16:57 2005, etc...)
# Should be used when there is no timestamp in a Long format. Also ignore time zones.
message.timestamp.input.pattern=
# whether timestamp field is required, it should always be required. But
# for historical reason, we didn't enforce this check, there might exist some
# installations with messages missing timestamp field
message.timestamp.required=true
# To enable compression, set this to a valid compression codec implementing
# org.apache.hadoop.io.compress.CompressionCodec interface, such as
# 'org.apache.hadoop.io.compress.GzipCodec'.
secor.compression.codec=
# To set a custom file extension set this to a valid file suffix, such as
# '.gz', '.part', etc.
secor.file.extension=
# The secor file reader/writer used to read/write the data, by default we write sequence files
secor.file.reader.writer.factory=com.pinterest.secor.io.impl.SequenceFileReaderWriterFactory
#if left blank defaults to \n
secor.file.reader.Delimiter=\n
#if left blank no Delimiter is added. do not use \ as that needs to be escaped and is an escape
#character and not a delimtier.
secor.file.writer.Delimiter=\n
# Max message size in bytes to retrieve via KafkaClient. This is used by ProgressMonitor and PartitionFinalizer.
# This should be set large enough to accept the max message size configured in your kafka broker
# Default is 0.1 MB
secor.max.message.size.bytes=100000
# Class that will manage uploads. Default is to use the hadoop
# interface to S3.
secor.upload.manager.class=com.pinterest.secor.uploader.HadoopS3UploadManager
#Set below property to your timezone, and partitions in s3 will be created as per timezone provided
secor.parser.timezone=UTC
# Transformer class that transform and filters message accordingly.
secor.message.transformer.class=com.pinterest.secor.transformer.IdentityMessageTransformer
# Set below property to true if you want to have the md5hash appended to your s3 path.
# This helps for better partitioning of the data on s3. Which gives better performance while reading and writing on s3
secor.s3.prefix.md5hash=false
# After the given date, secor will upload files to the supplied s3 alternative path
secor.s3.alter.path.date=
# An alternative S3 path for secor to upload files to
secor.s3.alternative.path=
# If enabled, add calls will be made to qubole, otherwise, skip qubole call for finalization
secor.enable.qubole=true
# Timeout value for qubole calls
secor.qubole.timeout.ms=300000
# Topics to upload at a fixed minute mark
secor.kafka.upload_at_minute_mark.topic_filter=
# What the minute mark is. This isn't triggered unless the topic name matches
secor.upload.minute_mark=0
# File age per topic and per partition is checked against secor.max.file.age.seconds by looking at
# the youngest file when true or at the oldest file when false. Setting it to true ensures that files
# are uploaded when data stops comming and sized based policy cannot trigger. Setting it to false
# ensures that files older than secor.max.file.age.seconds are uploaded immediately.
secor.file.age.youngest=true
# Class that manages metric collection.
# Sending metrics to Ostrich is the default implementation.
secor.monitoring.metrics.collector.class=com.pinterest.secor.monitoring.OstrichMetricCollector
# Row group size in bytes for Parquet writers. Specifies how much data will be buffered in memory before flushing a
# block to disk. Larger values allow for larger column chinks which makes it possible to do larger sequential IO.
# Should be aligned with HDFS blocks. Defaults to 128MB in Parquet 1.9.
parquet.block.size=134217728
# Page group size in bytes for Parquet writers. Indivisible unit for columnar data. Smaller data pages allow for more
# fine grained reading but have higher space overhead. Defaults to 1MB in Parquet 1.9.
parquet.page.size=1048576
# Enable or disable dictionary encoding for Parquet writers. The dictionary encoding builds a dictionary of values
# encountered in a given column. Defaults to true in Parquet 1.9.
parquet.enable.dictionary=true
# Enable or disable validation for Parquet writers. Validates records written against the schema. Defaults to false in
# Parquet 1.9.
parquet.validation=false
# User can configure ORC schema for each Kafka topic. Common schema is also possible. This property is mandatory
# if DefaultORCSchemaProvider is used. ORC schema for all the topics should be defined like this:
secor.orc.message.schema.*=struct\,f:array\,g:int>
# Below config used for defining ORC schema provider class name. User can use the custom implementation for orc schema provider
secor.orc.schema.provider=com.pinterest.secor.util.orc.schema.DefaultORCSchemaProvider
© 2015 - 2024 Weber Informatics LLC | Privacy Policy