All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.streaming.api.environment.RemoteStreamEnvironment Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.api.environment;

import org.apache.flink.annotation.Public;
import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.configuration.ConfigUtils;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.DeploymentOptions;
import org.apache.flink.configuration.JobManagerOptions;
import org.apache.flink.configuration.PipelineOptions;
import org.apache.flink.core.execution.DefaultExecutorServiceLoader;
import org.apache.flink.core.execution.PipelineExecutorServiceLoader;
import org.apache.flink.runtime.jobgraph.SavepointRestoreSettings;
import org.apache.flink.streaming.api.graph.StreamGraph;
import org.apache.flink.streaming.util.RemoteEnvironmentConfigUtils;

import java.net.URL;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

/** A {@link StreamExecutionEnvironment} for executing on a cluster. */
@Public
public class RemoteStreamEnvironment extends StreamExecutionEnvironment {

    /**
     * Creates a new RemoteStreamEnvironment that points to the master (JobManager) described by the
     * given host name and port.
     *
     * @param host The host name or address of the master (JobManager), where the program should be
     *     executed.
     * @param port The port of the master (JobManager), where the program should be executed.
     * @param jarFiles The JAR files with code that needs to be shipped to the cluster. If the
     *     program uses user-defined functions, user-defined input formats, or any libraries, those
     *     must be provided in the JAR files.
     */
    public RemoteStreamEnvironment(String host, int port, String... jarFiles) {
        this(host, port, null, jarFiles);
    }

    /**
     * Creates a new RemoteStreamEnvironment that points to the master (JobManager) described by the
     * given host name and port.
     *
     * @param host The host name or address of the master (JobManager), where the program should be
     *     executed.
     * @param port The port of the master (JobManager), where the program should be executed.
     * @param clientConfiguration The configuration used to parametrize the client that connects to
     *     the remote cluster.
     * @param jarFiles The JAR files with code that needs to be shipped to the cluster. If the
     *     program uses user-defined functions, user-defined input formats, or any libraries, those
     *     must be provided in the JAR files.
     */
    public RemoteStreamEnvironment(
            String host, int port, Configuration clientConfiguration, String... jarFiles) {
        this(host, port, clientConfiguration, jarFiles, null);
    }

    /**
     * Creates a new RemoteStreamEnvironment that points to the master (JobManager) described by the
     * given host name and port.
     *
     * @param host The host name or address of the master (JobManager), where the program should be
     *     executed.
     * @param port The port of the master (JobManager), where the program should be executed.
     * @param clientConfiguration The configuration used to parametrize the client that connects to
     *     the remote cluster.
     * @param jarFiles The JAR files with code that needs to be shipped to the cluster. If the
     *     program uses user-defined functions, user-defined input formats, or any libraries, those
     *     must be provided in the JAR files.
     * @param globalClasspaths The paths of directories and JAR files that are added to each user
     *     code classloader on all nodes in the cluster. Note that the paths must specify a protocol
     *     (e.g. file://) and be accessible on all nodes (e.g. by means of a NFS share). The
     *     protocol must be supported by the {@link java.net.URLClassLoader}.
     */
    public RemoteStreamEnvironment(
            String host,
            int port,
            Configuration clientConfiguration,
            String[] jarFiles,
            URL[] globalClasspaths) {
        this(host, port, clientConfiguration, jarFiles, globalClasspaths, null);
    }

    /**
     * Creates a new RemoteStreamEnvironment that points to the master (JobManager) described by the
     * given host name and port.
     *
     * @param host The host name or address of the master (JobManager), where the program should be
     *     executed.
     * @param port The port of the master (JobManager), where the program should be executed.
     * @param clientConfiguration The configuration used to parametrize the client that connects to
     *     the remote cluster.
     * @param jarFiles The JAR files with code that needs to be shipped to the cluster. If the
     *     program uses user-defined functions, user-defined input formats, or any libraries, those
     *     must be provided in the JAR files.
     * @param globalClasspaths The paths of directories and JAR files that are added to each user
     *     code classloader on all nodes in the cluster. Note that the paths must specify a protocol
     *     (e.g. file://) and be accessible on all nodes (e.g. by means of a NFS share). The
     *     protocol must be supported by the {@link java.net.URLClassLoader}.
     * @param savepointRestoreSettings Optional savepoint restore settings for job execution.
     */
    @PublicEvolving
    public RemoteStreamEnvironment(
            String host,
            int port,
            Configuration clientConfiguration,
            String[] jarFiles,
            URL[] globalClasspaths,
            SavepointRestoreSettings savepointRestoreSettings) {
        this(
                new DefaultExecutorServiceLoader(),
                host,
                port,
                clientConfiguration,
                jarFiles,
                globalClasspaths,
                savepointRestoreSettings);
    }

    @PublicEvolving
    public RemoteStreamEnvironment(
            final PipelineExecutorServiceLoader executorServiceLoader,
            final String host,
            final int port,
            final Configuration clientConfiguration,
            final String[] jarFiles,
            final URL[] globalClasspaths,
            final SavepointRestoreSettings savepointRestoreSettings) {
        super(
                executorServiceLoader,
                validateAndGetEffectiveConfiguration(
                        clientConfiguration,
                        host,
                        port,
                        jarFiles,
                        globalClasspaths,
                        savepointRestoreSettings),
                null);
    }

    private static Configuration getClientConfiguration(final Configuration configuration) {
        return configuration == null ? new Configuration() : configuration;
    }

    private static List getClasspathURLs(final URL[] classpaths) {
        return classpaths == null ? Collections.emptyList() : Arrays.asList(classpaths);
    }

    private static Configuration validateAndGetEffectiveConfiguration(
            final Configuration configuration,
            final String host,
            final int port,
            final String[] jarFiles,
            final URL[] classpaths,
            final SavepointRestoreSettings savepointRestoreSettings) {
        RemoteEnvironmentConfigUtils.validate(host, port);
        return getEffectiveConfiguration(
                getClientConfiguration(configuration),
                host,
                port,
                jarFiles,
                getClasspathURLs(classpaths),
                savepointRestoreSettings);
    }

    private static Configuration getEffectiveConfiguration(
            final Configuration baseConfiguration,
            final String host,
            final int port,
            final String[] jars,
            final List classpaths,
            final SavepointRestoreSettings savepointRestoreSettings) {

        final Configuration effectiveConfiguration = new Configuration(baseConfiguration);

        RemoteEnvironmentConfigUtils.setJobManagerAddressToConfig(
                host, port, effectiveConfiguration);
        RemoteEnvironmentConfigUtils.setJarURLsToConfig(jars, effectiveConfiguration);
        ConfigUtils.encodeCollectionToConfig(
                effectiveConfiguration, PipelineOptions.CLASSPATHS, classpaths, URL::toString);

        if (savepointRestoreSettings != null) {
            SavepointRestoreSettings.toConfiguration(
                    savepointRestoreSettings, effectiveConfiguration);
        } else {
            SavepointRestoreSettings.toConfiguration(
                    SavepointRestoreSettings.none(), effectiveConfiguration);
        }

        // these should be set in the end to overwrite any values from the client config provided in
        // the constructor.
        effectiveConfiguration.set(DeploymentOptions.TARGET, "remote");
        effectiveConfiguration.set(DeploymentOptions.ATTACHED, true);

        return effectiveConfiguration;
    }

    @Override
    public JobExecutionResult execute(StreamGraph streamGraph) throws Exception {
        return super.execute(streamGraph);
    }

    @Override
    public String toString() {
        final String host = configuration.get(JobManagerOptions.ADDRESS);
        final int port = configuration.get(JobManagerOptions.PORT);
        final String parallelism = (getParallelism() == -1 ? "default" : "" + getParallelism());

        return "Remote Environment ("
                + host
                + ":"
                + port
                + " - parallelism = "
                + parallelism
                + ").";
    }

    /**
     * Gets the hostname of the master (JobManager), where the program will be executed.
     *
     * @return The hostname of the master
     */
    public String getHost() {
        return configuration.get(JobManagerOptions.ADDRESS);
    }

    /**
     * Gets the port of the master (JobManager), where the program will be executed.
     *
     * @return The port of the master
     */
    public int getPort() {
        return configuration.get(JobManagerOptions.PORT);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy