All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.beam.runners.flink.FlinkPipelineExecutionEnvironment Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.beam.runners.flink;

import static com.google.common.base.Preconditions.checkNotNull;

import java.util.List;
import org.apache.beam.sdk.Pipeline;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.java.CollectionEnvironment;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.runtime.state.AbstractStateBackend;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.environment.CheckpointConfig.ExternalizedCheckpointCleanup;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * The class that instantiates and manages the execution of a given job.
 * Depending on if the job is a Streaming or Batch processing one, it creates
 * the adequate execution environment ({@link ExecutionEnvironment}
 * or {@link StreamExecutionEnvironment}), the necessary {@link FlinkPipelineTranslator}
 * ({@link FlinkBatchPipelineTranslator} or {@link FlinkStreamingPipelineTranslator}) to
 * transform the Beam job into a Flink one, and executes the (translated) job.
 */
class FlinkPipelineExecutionEnvironment {

  private static final Logger LOG =
      LoggerFactory.getLogger(FlinkPipelineExecutionEnvironment.class);

  private final FlinkPipelineOptions options;

  /**
   * The Flink Batch execution environment. This is instantiated to either a
   * {@link org.apache.flink.api.java.CollectionEnvironment},
   * a {@link org.apache.flink.api.java.LocalEnvironment} or
   * a {@link org.apache.flink.api.java.RemoteEnvironment}, depending on the configuration
   * options.
   */
  private ExecutionEnvironment flinkBatchEnv;

  /**
   * The Flink Streaming execution environment. This is instantiated to either a
   * {@link org.apache.flink.streaming.api.environment.LocalStreamEnvironment} or
   * a {@link org.apache.flink.streaming.api.environment.RemoteStreamEnvironment}, depending
   * on the configuration options, and more specifically, the url of the master.
   */
  private StreamExecutionEnvironment flinkStreamEnv;

  /**
   * Creates a {@link FlinkPipelineExecutionEnvironment} with the user-specified parameters in the
   * provided {@link FlinkPipelineOptions}.
   *
   * @param options the user-defined pipeline options.
   * */
  FlinkPipelineExecutionEnvironment(FlinkPipelineOptions options) {
    this.options = checkNotNull(options);
  }

  /**
   * Depending on if the job is a Streaming or a Batch one, this method creates
   * the necessary execution environment and pipeline translator, and translates
   * the {@link org.apache.beam.sdk.values.PCollection} program into
   * a {@link org.apache.flink.api.java.DataSet}
   * or {@link org.apache.flink.streaming.api.datastream.DataStream} one.
   * */
  public void translate(FlinkRunner flinkRunner, Pipeline pipeline) {
    this.flinkBatchEnv = null;
    this.flinkStreamEnv = null;

    pipeline.replaceAll(FlinkTransformOverrides.getDefaultOverrides(options.isStreaming()));

    PipelineTranslationOptimizer optimizer =
        new PipelineTranslationOptimizer(TranslationMode.BATCH, options);

    optimizer.translate(pipeline);
    TranslationMode translationMode = optimizer.getTranslationMode();

    FlinkPipelineTranslator translator;
    if (translationMode == TranslationMode.STREAMING) {
      this.flinkStreamEnv = createStreamExecutionEnvironment();
      translator = new FlinkStreamingPipelineTranslator(flinkRunner, flinkStreamEnv, options);
    } else {
      this.flinkBatchEnv = createBatchExecutionEnvironment();
      translator = new FlinkBatchPipelineTranslator(flinkBatchEnv, options);
    }

    translator.translate(pipeline);
  }

  /**
   * Launches the program execution.
   * */
  public JobExecutionResult executePipeline() throws Exception {
    final String jobName = options.getJobName();

    if (flinkBatchEnv != null) {
      return flinkBatchEnv.execute(jobName);
    } else if (flinkStreamEnv != null) {
      return flinkStreamEnv.execute(jobName);
    } else {
      throw new IllegalStateException("The Pipeline has not yet been translated.");
    }
  }

  /**
   * If the submitted job is a batch processing job, this method creates the adequate
   * Flink {@link org.apache.flink.api.java.ExecutionEnvironment} depending
   * on the user-specified options.
   */
  private ExecutionEnvironment createBatchExecutionEnvironment() {

    LOG.info("Creating the required Batch Execution Environment.");

    String masterUrl = options.getFlinkMaster();
    ExecutionEnvironment flinkBatchEnv;

    // depending on the master, create the right environment.
    if (masterUrl.equals("[local]")) {
      flinkBatchEnv = ExecutionEnvironment.createLocalEnvironment();
    } else if (masterUrl.equals("[collection]")) {
      flinkBatchEnv = new CollectionEnvironment();
    } else if (masterUrl.equals("[auto]")) {
      flinkBatchEnv = ExecutionEnvironment.getExecutionEnvironment();
    } else if (masterUrl.matches(".*:\\d*")) {
      String[] parts = masterUrl.split(":");
      List stagingFiles = options.getFilesToStage();
      flinkBatchEnv = ExecutionEnvironment.createRemoteEnvironment(parts[0],
          Integer.parseInt(parts[1]),
          stagingFiles.toArray(new String[stagingFiles.size()]));
    } else {
      LOG.warn("Unrecognized Flink Master URL {}. Defaulting to [auto].", masterUrl);
      flinkBatchEnv = ExecutionEnvironment.getExecutionEnvironment();
    }

    // set the correct parallelism.
    if (options.getParallelism() != -1 && !(flinkBatchEnv instanceof CollectionEnvironment)) {
      flinkBatchEnv.setParallelism(options.getParallelism());
    }

    // set parallelism in the options (required by some execution code)
    options.setParallelism(flinkBatchEnv.getParallelism());

    if (options.getObjectReuse()) {
      flinkBatchEnv.getConfig().enableObjectReuse();
    } else {
      flinkBatchEnv.getConfig().disableObjectReuse();
    }

    return flinkBatchEnv;
  }

  /**
   * If the submitted job is a stream processing job, this method creates the adequate
   * Flink {@link org.apache.flink.streaming.api.environment.StreamExecutionEnvironment} depending
   * on the user-specified options.
   */
  private StreamExecutionEnvironment createStreamExecutionEnvironment() {

    LOG.info("Creating the required Streaming Environment.");

    String masterUrl = options.getFlinkMaster();
    StreamExecutionEnvironment flinkStreamEnv = null;

    // depending on the master, create the right environment.
    if (masterUrl.equals("[local]")) {
      flinkStreamEnv = StreamExecutionEnvironment.createLocalEnvironment();
    } else if (masterUrl.equals("[auto]")) {
      flinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    } else if (masterUrl.matches(".*:\\d*")) {
      String[] parts = masterUrl.split(":");
      List stagingFiles = options.getFilesToStage();
      flinkStreamEnv = StreamExecutionEnvironment.createRemoteEnvironment(parts[0],
          Integer.parseInt(parts[1]), stagingFiles.toArray(new String[stagingFiles.size()]));
    } else {
      LOG.warn("Unrecognized Flink Master URL {}. Defaulting to [auto].", masterUrl);
      flinkStreamEnv = StreamExecutionEnvironment.getExecutionEnvironment();
    }

    // set the correct parallelism.
    if (options.getParallelism() != -1) {
      flinkStreamEnv.setParallelism(options.getParallelism());
    }

    // set parallelism in the options (required by some execution code)
    options.setParallelism(flinkStreamEnv.getParallelism());

    if (options.getObjectReuse()) {
      flinkStreamEnv.getConfig().enableObjectReuse();
    } else {
      flinkStreamEnv.getConfig().disableObjectReuse();
    }

    // default to event time
    flinkStreamEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

    // for the following 2 parameters, a value of -1 means that Flink will use
    // the default values as specified in the configuration.
    int numRetries = options.getNumberOfExecutionRetries();
    if (numRetries != -1) {
      flinkStreamEnv.setNumberOfExecutionRetries(numRetries);
    }
    long retryDelay = options.getExecutionRetryDelay();
    if (retryDelay != -1) {
      flinkStreamEnv.getConfig().setExecutionRetryDelay(retryDelay);
    }

    // A value of -1 corresponds to disabled checkpointing (see CheckpointConfig in Flink).
    // If the value is not -1, then the validity checks are applied.
    // By default, checkpointing is disabled.
    long checkpointInterval = options.getCheckpointingInterval();
    if (checkpointInterval != -1) {
      if (checkpointInterval < 1) {
        throw new IllegalArgumentException("The checkpoint interval must be positive");
      }
      flinkStreamEnv.enableCheckpointing(checkpointInterval, options.getCheckpointingMode());
      flinkStreamEnv.getCheckpointConfig().setCheckpointTimeout(
          options.getCheckpointTimeoutMillis());
      boolean externalizedCheckpoint = options.isExternalizedCheckpointsEnabled();
      boolean retainOnCancellation = options.getRetainExternalizedCheckpointsOnCancellation();
      if (externalizedCheckpoint) {
        flinkStreamEnv.getCheckpointConfig().enableExternalizedCheckpoints(
            retainOnCancellation ? ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION
                : ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION);
      }
    }

    // State backend
    final AbstractStateBackend stateBackend = options.getStateBackend();
    if (stateBackend != null) {
      flinkStreamEnv.setStateBackend(stateBackend);
    }

    return flinkStreamEnv;
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy