All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flume.source.ExecSource Maven / Gradle / Ivy

There is a newer version: 1.11.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.flume.source;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;

import org.apache.flume.Channel;
import org.apache.flume.Context;
import org.apache.flume.Event;
import org.apache.flume.EventDrivenSource;
import org.apache.flume.Source;
import org.apache.flume.SystemClock;
import org.apache.flume.channel.ChannelProcessor;
import org.apache.flume.conf.Configurable;
import org.apache.flume.event.EventBuilder;
import org.apache.flume.instrumentation.SourceCounter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.ThreadFactoryBuilder;

import java.nio.charset.Charset;

/**
 * 

* A {@link Source} implementation that executes a Unix process and turns each * line of text into an event. *

*

* This source runs a given Unix command on start up and expects that process to * continuously produce data on standard out (stderr ignored by default). Unless * told to restart, if the process exits for any reason, the source also exits and * will produce no further data. This means configurations such as cat [named pipe] * or tail -F [file] are going to produce the desired results where as * date will probably not - the former two commands produce streams of * data where as the latter produces a single event and exits. *

*

* The ExecSource is meant for situations where one must integrate with * existing systems without modifying code. It is a compatibility gateway built * to allow simple, stop-gap integration and doesn't necessarily offer all of * the benefits or guarantees of native integration with Flume. If one has the * option of using the AvroSource, for instance, that would be greatly * preferred to this source as it (and similarly implemented sources) can * maintain the transactional guarantees that exec can not. *

*

* Why doesn't ExecSource offer transactional guarantees? *

*

* The problem with ExecSource and other asynchronous sources is that * the source can not guarantee that if there is a failure to put the event into * the {@link Channel} the client knows about it. As a for instance, one of the * most commonly requested features is the tail -F [file]-like use case * where an application writes to a log file on disk and Flume tails the file, * sending each line as an event. While this is possible, there's an obvious * problem; what happens if the channel fills up and Flume can't send an event? * Flume has no way of indicating to the application writing the log file that * it needs to retain the log or that the event hasn't been sent, for some * reason. If this doesn't make sense, you need only know this: Your * application can never guarantee data has been received when using a * unidirectional asynchronous interface such as ExecSource! As an extension * of this warning - and to be completely clear - there is absolutely zero * guarantee of event delivery when using this source. You have been warned. *

*

* Configuration options *

* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
ParameterDescriptionUnit / TypeDefault
commandThe command to executeStringnone (required)
restartWhether to restart the command when it exitsBooleanfalse
restartThrottleHow long in milliseconds to wait before restarting the commandLong10000
logStderrWhether to log or discard the standard error stream of the commandBooleanfalse
batchSizeThe number of events to commit to channel at a time.integer20
batchTimeoutAmount of time (in milliseconds) to wait, if the buffer size was not reached, * before data is pushed downstream.long3000
*

* Metrics *

*

* TODO *

*/ public class ExecSource extends AbstractSource implements EventDrivenSource, Configurable { private static final Logger logger = LoggerFactory.getLogger(ExecSource.class); private String shell; private String command; private SourceCounter sourceCounter; private ExecutorService executor; private Future runnerFuture; private long restartThrottle; private boolean restart; private boolean logStderr; private Integer bufferCount; private long batchTimeout; private ExecRunnable runner; private Charset charset; @Override public void start() { logger.info("Exec source starting with command: {}", command); // Start the counter before starting any threads that may access it. sourceCounter.start(); executor = Executors.newSingleThreadExecutor(); runner = new ExecRunnable(shell, command, getChannelProcessor(), sourceCounter, restart, restartThrottle, logStderr, bufferCount, batchTimeout, charset); // Start the runner thread. runnerFuture = executor.submit(runner); // Mark the Source as RUNNING. super.start(); logger.debug("Exec source started"); } @Override public void stop() { logger.info("Stopping exec source with command: {}", command); if (runner != null) { runner.setRestart(false); runner.kill(); } if (runnerFuture != null) { logger.debug("Stopping exec runner"); runnerFuture.cancel(true); logger.debug("Exec runner stopped"); } executor.shutdown(); while (!executor.isTerminated()) { logger.debug("Waiting for exec executor service to stop"); try { executor.awaitTermination(500, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { logger.debug("Interrupted while waiting for exec executor service " + "to stop. Just exiting."); Thread.currentThread().interrupt(); } } sourceCounter.stop(); super.stop(); logger.debug("Exec source with command:{} stopped. Metrics:{}", command, sourceCounter); } @Override public void configure(Context context) { command = context.getString("command"); Preconditions.checkState(command != null, "The parameter command must be specified"); restartThrottle = context.getLong(ExecSourceConfigurationConstants.CONFIG_RESTART_THROTTLE, ExecSourceConfigurationConstants.DEFAULT_RESTART_THROTTLE); restart = context.getBoolean(ExecSourceConfigurationConstants.CONFIG_RESTART, ExecSourceConfigurationConstants.DEFAULT_RESTART); logStderr = context.getBoolean(ExecSourceConfigurationConstants.CONFIG_LOG_STDERR, ExecSourceConfigurationConstants.DEFAULT_LOG_STDERR); bufferCount = context.getInteger(ExecSourceConfigurationConstants.CONFIG_BATCH_SIZE, ExecSourceConfigurationConstants.DEFAULT_BATCH_SIZE); batchTimeout = context.getLong(ExecSourceConfigurationConstants.CONFIG_BATCH_TIME_OUT, ExecSourceConfigurationConstants.DEFAULT_BATCH_TIME_OUT); charset = Charset.forName(context.getString(ExecSourceConfigurationConstants.CHARSET, ExecSourceConfigurationConstants.DEFAULT_CHARSET)); shell = context.getString(ExecSourceConfigurationConstants.CONFIG_SHELL, null); if (sourceCounter == null) { sourceCounter = new SourceCounter(getName()); } } private static class ExecRunnable implements Runnable { public ExecRunnable(String shell, String command, ChannelProcessor channelProcessor, SourceCounter sourceCounter, boolean restart, long restartThrottle, boolean logStderr, int bufferCount, long batchTimeout, Charset charset) { this.command = command; this.channelProcessor = channelProcessor; this.sourceCounter = sourceCounter; this.restartThrottle = restartThrottle; this.bufferCount = bufferCount; this.batchTimeout = batchTimeout; this.restart = restart; this.logStderr = logStderr; this.charset = charset; this.shell = shell; } private final String shell; private final String command; private final ChannelProcessor channelProcessor; private final SourceCounter sourceCounter; private volatile boolean restart; private final long restartThrottle; private final int bufferCount; private long batchTimeout; private final boolean logStderr; private final Charset charset; private Process process = null; private SystemClock systemClock = new SystemClock(); private Long lastPushToChannel = systemClock.currentTimeMillis(); ScheduledExecutorService timedFlushService; ScheduledFuture future; @Override public void run() { do { String exitCode = "unknown"; BufferedReader reader = null; String line = null; final List eventList = new ArrayList(); timedFlushService = Executors.newSingleThreadScheduledExecutor( new ThreadFactoryBuilder().setNameFormat( "timedFlushExecService" + Thread.currentThread().getId() + "-%d").build()); try { if (shell != null) { String[] commandArgs = formulateShellCommand(shell, command); process = Runtime.getRuntime().exec(commandArgs); } else { String[] commandArgs = command.split("\\s+"); process = new ProcessBuilder(commandArgs).start(); } reader = new BufferedReader( new InputStreamReader(process.getInputStream(), charset)); // StderrLogger dies as soon as the input stream is invalid StderrReader stderrReader = new StderrReader(new BufferedReader( new InputStreamReader(process.getErrorStream(), charset)), logStderr); stderrReader.setName("StderrReader-[" + command + "]"); stderrReader.setDaemon(true); stderrReader.start(); future = timedFlushService.scheduleWithFixedDelay(new Runnable() { @Override public void run() { try { synchronized (eventList) { if (!eventList.isEmpty() && timeout()) { flushEventBatch(eventList); } } } catch (Exception e) { logger.error("Exception occurred when processing event batch", e); if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } } } }, batchTimeout, batchTimeout, TimeUnit.MILLISECONDS); while ((line = reader.readLine()) != null) { sourceCounter.incrementEventReceivedCount(); synchronized (eventList) { eventList.add(EventBuilder.withBody(line.getBytes(charset))); if (eventList.size() >= bufferCount || timeout()) { flushEventBatch(eventList); } } } synchronized (eventList) { if (!eventList.isEmpty()) { flushEventBatch(eventList); } } } catch (Exception e) { logger.error("Failed while running command: " + command, e); if (e instanceof InterruptedException) { Thread.currentThread().interrupt(); } } finally { if (reader != null) { try { reader.close(); } catch (IOException ex) { logger.error("Failed to close reader for exec source", ex); } } exitCode = String.valueOf(kill()); } if (restart) { logger.info("Restarting in {}ms, exit code {}", restartThrottle, exitCode); try { Thread.sleep(restartThrottle); } catch (InterruptedException e) { Thread.currentThread().interrupt(); } } else { logger.info("Command [" + command + "] exited with " + exitCode); } } while (restart); } private void flushEventBatch(List eventList) { channelProcessor.processEventBatch(eventList); sourceCounter.addToEventAcceptedCount(eventList.size()); eventList.clear(); lastPushToChannel = systemClock.currentTimeMillis(); } private boolean timeout() { return (systemClock.currentTimeMillis() - lastPushToChannel) >= batchTimeout; } private static String[] formulateShellCommand(String shell, String command) { String[] shellArgs = shell.split("\\s+"); String[] result = new String[shellArgs.length + 1]; System.arraycopy(shellArgs, 0, result, 0, shellArgs.length); result[shellArgs.length] = command; return result; } public int kill() { if (process != null) { synchronized (process) { process.destroy(); try { int exitValue = process.waitFor(); // Stop the Thread that flushes periodically if (future != null) { future.cancel(true); } if (timedFlushService != null) { timedFlushService.shutdown(); while (!timedFlushService.isTerminated()) { try { timedFlushService.awaitTermination(500, TimeUnit.MILLISECONDS); } catch (InterruptedException e) { logger.debug("Interrupted while waiting for exec executor service " + "to stop. Just exiting."); Thread.currentThread().interrupt(); } } } return exitValue; } catch (InterruptedException ex) { Thread.currentThread().interrupt(); } } return Integer.MIN_VALUE; } return Integer.MIN_VALUE / 2; } public void setRestart(boolean restart) { this.restart = restart; } } private static class StderrReader extends Thread { private BufferedReader input; private boolean logStderr; protected StderrReader(BufferedReader input, boolean logStderr) { this.input = input; this.logStderr = logStderr; } @Override public void run() { try { int i = 0; String line = null; while ((line = input.readLine()) != null) { if (logStderr) { // There is no need to read 'line' with a charset // as we do not to propagate it. // It is in UTF-16 and would be printed in UTF-8 format. logger.info("StderrLogger[{}] = '{}'", ++i, line); } } } catch (IOException e) { logger.info("StderrLogger exiting", e); } finally { try { if (input != null) { input.close(); } } catch (IOException ex) { logger.error("Failed to close stderr reader for exec source", ex); } } } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy