examples.spark.batch.conf Maven / Gradle / Ivy
The newest version!
#
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
######
###### This config file is a demonstration of batch processing in SeaTunnel config
######
env {
# You can set spark configuration here
# see available properties defined by spark: https://spark.apache.org/docs/latest/configuration.html#available-properties
#job.mode = BATCH
job.name = "SeaTunnel"
spark.executor.instances = 1
spark.executor.cores = 1
spark.executor.memory = "1g"
spark.master = local
}
source {
# This is a example input plugin **only for test and demonstrate the feature input plugin**
FakeSource {
row.num = 16
parallelism = 2
schema = {
fields {
c_map = "map"
c_array = "array"
c_string = string
c_boolean = boolean
c_tinyint = tinyint
c_smallint = smallint
c_int = int
c_bigint = bigint
c_float = float
c_double = double
c_decimal = "decimal(30, 8)"
c_null = "null"
c_bytes = bytes
c_date = date
c_timestamp = timestamp
}
}
result_table_name = "fake"
}
# You can also use other input plugins, such as hdfs
# hdfs {
# result_table_name = "accesslog"
# path = "hdfs://hadoop-cluster-01/nginx/accesslog"
# format = "json"
# }
# If you would like to get more information about how to configure seatunnel and see full list of input plugins,
# please go to https://seatunnel.apache.org/docs/category/source-v2
}
transform {
# split data by specific delimiter
# you can also use other transform plugins, such as sql
sql {
source_table_name = "fake"
query = "select c_map,c_array,c_string,c_boolean,c_tinyint,c_smallint,c_int,c_bigint,c_float,c_double,c_null,c_bytes,c_date,c_timestamp from fake"
result_table_name = "sql"
}
# If you would like to get more information about how to configure seatunnel and see full list of transform plugins,
# please go to https://seatunnel.apache.org/docs/category/transform-v2
}
sink {
# choose stdout output plugin to output data to console
Console {
parallelism = 2
}
# you can also you other output plugins, such as sql
# hdfs {
# path = "hdfs://hadoop-cluster-01/nginx/accesslog_processed"
# save_mode = "append"
# }
# If you would like to get more information about how to configure seatunnel and see full list of output plugins,
# please go to https://seatunnel.apache.org/docs/category/sink-v2
}