org.apache.pulsar.io.flume.node.PropertiesFileConfigurationProvider Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.pulsar.io.flume.node;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.lang.reflect.InvocationTargetException;
import java.util.HashMap;
import java.util.Properties;
import org.apache.flume.conf.FlumeConfiguration;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
*
* A configuration provider that uses properties file for specifying
* configuration. The configuration files follow the Java properties file syntax
* rules specified at {@link java.util.Properties#load(java.io.Reader)}. Every
* configuration value specified in the properties file is prefixed by an
* Agent Name which helps isolate an individual agent's namespace.
*
*
* Valid configuration files must observe the following rules for every agent
* namespace.
*
* - For every <agent name> there must be three lists specified that
* include <agent name>.sources,
* <agent name>.sinks, and <agent name>.channels.
* Each of these lists must contain a space separated list of names
* corresponding to that particular entity.
* - For each source named in <agent name>.sources, there must
* be a non-empty type attribute specified from the valid set of source
* types. For example:
* <agent name>.sources.<source name>.type = event
* - For each source named in <agent name>.sources, there must
* be a space-separated list of channel names that the source will associate
* with during runtime. Each of these names must be contained in the channels
* list specified by <agent name>.channels. For example:
* <agent name>.sources.<source name>.channels =
* <channel-1 name> <channel-2 name>
* - For each source named in the <agent name>.sources, there
* must be a runner namespace of configuration that configures the
* associated source runner. For example:
* <agent name>.sources.<source name>.runner.type = avro.
* This namespace can also be used to configure other configuration of the
* source runner as needed. For example:
* <agent name>.sources.<source name>.runner.port = 10101
*
* - For each source named in <sources>.sources there can
* be an optional selector.type specified that identifies the type
* of channel selector associated with the source. If not specified, the
* default replicating channel selector is used.
*
- For each channel named in the <agent name>.channels,
* there must be a non-empty type attribute specified from the valid
* set of channel types. For example:
* <agent name>.channels.<channel name>.type = mem
* - For each sink named in the <agent name>.sinks, there must
* be a non-empty type attribute specified from the valid set of sink
* types. For example:
* <agent name>.sinks.<sink name>.type = hdfs
* - For each sink named in the <agent name>.sinks, there must
* be a non-empty single-valued channel name specified as the value of the
* channel attribute. This value must be contained in the channels list
* specified by <agent name>.channels. For example:
* <agent name>.sinks.<sink name>.channel =
* <channel name>
* - For each sink named in the <agent name>.sinks, there must
* be a runner namespace of configuration that configures the
* associated sink runner. For example:
* <agent name>.sinks.<sink name>.runner.type = polling.
* This namespace can also be used to configure other configuration of the sink
* runner as needed. For example:
* <agent name>.sinks.<sink name>.runner.polling.interval =
* 60
* - A fourth optional list <agent name>.sinkgroups
* may be added to each agent, consisting of unique space separated names
* for groups
* - Each sinkgroup must specify sinks, containing a list of all sinks
* belonging to it. These cannot be shared by multiple groups.
* Further, one can set a processor and behavioral parameters to determine
* how sink selection is made via <agent name>.sinkgroups.<
* group name<.processor. For further detail refer to individual processor
* documentation
* - Sinks not assigned to a group will be assigned to default single sink
* groups.
*
*
* Apart from the above required configuration values, each source, sink or
* channel can have its own set of arbitrary configuration as required by the
* implementation. Each of these configuration values are expressed by fully
* namespace qualified configuration keys. For example, the configuration
* property called capacity for a channel called ch1 for the
* agent named host1 with value 1000 will be expressed as:
* host1.channels.ch1.capacity = 1000.
*
*
* Any information contained in the configuration file other than what pertains
* to the configured agents, sources, sinks and channels via the explicitly
* enumerated list of sources, sinks and channels per agent name are ignored by
* this provider. Moreover, if any of the required configuration values are not
* present in the configuration file for the configured entities, that entity
* and anything that depends upon it is considered invalid and consequently not
* configured. For example, if a channel is missing its type attribute,
* it is considered misconfigured. Also, any sources or sinks that depend upon
* this channel are also considered misconfigured and not initialized.
*
*
* Example configuration file:
*
*
* #
* # Flume Configuration
* # This file contains configuration for one Agent identified as host1.
* #
*
* host1.sources = avroSource thriftSource
* host1.channels = jdbcChannel
* host1.sinks = hdfsSink
*
* # avroSource configuration
* host1.sources.avroSource.type = org.apache.flume.source.AvroSource
* host1.sources.avroSource.runner.type = avro
* host1.sources.avroSource.runner.port = 11001
* host1.sources.avroSource.channels = jdbcChannel
* host1.sources.avroSource.selector.type = replicating
*
* # thriftSource configuration
* host1.sources.thriftSource.type = org.apache.flume.source.ThriftSource
* host1.sources.thriftSource.runner.type = thrift
* host1.sources.thriftSource.runner.port = 12001
* host1.sources.thriftSource.channels = jdbcChannel
*
* # jdbcChannel configuration
* host1.channels.jdbcChannel.type = jdbc
* host1.channels.jdbcChannel.jdbc.driver = com.mysql.jdbc.Driver
* host1.channels.jdbcChannel.jdbc.connect.url = http://localhost/flumedb
* host1.channels.jdbcChannel.jdbc.username = flume
* host1.channels.jdbcChannel.jdbc.password = flume
*
* # hdfsSink configuration
* host1.sinks.hdfsSink.type = hdfs
* host1.sinks.hdfsSink.hdfs.path = hdfs://localhost/
* host1.sinks.hdfsSink.batchsize = 1000
* host1.sinks.hdfsSink.runner.type = polling
* host1.sinks.hdfsSink.runner.polling.interval = 60
*
*
*
*
* @see java.util.Properties#load(java.io.Reader)
*/
public class PropertiesFileConfigurationProvider extends
AbstractConfigurationProvider {
private static final Logger LOGGER = LoggerFactory
.getLogger(PropertiesFileConfigurationProvider.class);
private static final String DEFAULT_PROPERTIES_IMPLEMENTATION = "java.util.Properties";
private final File file;
public PropertiesFileConfigurationProvider(String agentName, File file) {
super(agentName);
this.file = file;
}
@Override
public FlumeConfiguration getFlumeConfiguration() {
BufferedReader reader = null;
try {
reader = new BufferedReader(new FileReader(file));
String resolverClassName = System.getProperty("propertiesImplementation",
DEFAULT_PROPERTIES_IMPLEMENTATION);
Class extends Properties> propsclass = Class.forName(resolverClassName)
.asSubclass(Properties.class);
Properties properties = propsclass.getDeclaredConstructor().newInstance();
properties.load(reader);
return new FlumeConfiguration(toMap(properties));
} catch (IOException ex) {
LOGGER.error("Unable to load file:" + file
+ " (I/O failure) - Exception follows.", ex);
} catch (ClassNotFoundException | NoClassDefFoundError e) {
LOGGER.error("Configuration resolver class not found", e);
} catch (InstantiationException e) {
LOGGER.error("Instantiation exception", e);
} catch (IllegalAccessException e) {
LOGGER.error("Illegal access exception", e);
} catch (InvocationTargetException e) {
LOGGER.error("Invocation target exception", e);
} catch (NoSuchMethodException e) {
LOGGER.error("No such method exception", e);
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException ex) {
LOGGER.warn(
"Unable to close file reader for file: " + file, ex);
}
}
}
return new FlumeConfiguration(new HashMap());
}
}