All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.api.java.RemoteEnvironment Maven / Gradle / Ivy

There is a newer version: 1.20.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.api.java;

import org.apache.flink.annotation.Public;
import org.apache.flink.configuration.ConfigUtils;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.DeploymentOptions;
import org.apache.flink.configuration.JobManagerOptions;
import org.apache.flink.configuration.PipelineOptions;

import java.net.URL;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

/**
 * An {@link ExecutionEnvironment} that sends programs to a cluster for execution. The environment
 * needs to be created with the address and port of the JobManager of the Flink cluster that should
 * execute the programs.
 *
 * 

Many programs executed via the remote environment depend on additional classes. Such classes * may be the classes of functions (transformation, aggregation, ...) or libraries. Those classes * must be attached to the remote environment as JAR files, to allow the environment to ship the * classes into the cluster for the distributed execution. */ @Public public class RemoteEnvironment extends ExecutionEnvironment { /** * Creates a new RemoteEnvironment that points to the master (JobManager) described by the given * host name and port. * *

Each program execution will have all the given JAR files in its classpath. * * @param host The host name or address of the master (JobManager), where the program should be * executed. * @param port The port of the master (JobManager), where the program should be executed. * @param jarFiles The JAR files with code that needs to be shipped to the cluster. If the * program uses user-defined functions, user-defined input formats, or any libraries, those * must be provided in the JAR files. */ public RemoteEnvironment(String host, int port, String... jarFiles) { this(host, port, new Configuration(), jarFiles, null); } /** * Creates a new RemoteEnvironment that points to the master (JobManager) described by the given * host name and port. * *

Each program execution will have all the given JAR files in its classpath. * * @param host The host name or address of the master (JobManager), where the program should be * executed. * @param port The port of the master (JobManager), where the program should be executed. * @param clientConfig The configuration used by the client that connects to the cluster. * @param jarFiles The JAR files with code that needs to be shipped to the cluster. If the * program uses user-defined functions, user-defined input formats, or any libraries, those * must be provided in the JAR files. */ public RemoteEnvironment(String host, int port, Configuration clientConfig, String[] jarFiles) { this(host, port, clientConfig, jarFiles, null); } /** * Creates a new RemoteEnvironment that points to the master (JobManager) described by the given * host name and port. * *

Each program execution will have all the given JAR files in its classpath. * * @param host The host name or address of the master (JobManager), where the program should be * executed. * @param port The port of the master (JobManager), where the program should be executed. * @param clientConfig The configuration used by the client that connects to the cluster. * @param jarFiles The JAR files with code that needs to be shipped to the cluster. If the * program uses user-defined functions, user-defined input formats, or any libraries, those * must be provided in the JAR files. * @param globalClasspaths The paths of directories and JAR files that are added to each user * code classloader on all nodes in the cluster. Note that the paths must specify a protocol * (e.g. file://) and be accessible on all nodes (e.g. by means of a NFS share). The * protocol must be supported by the {@link java.net.URLClassLoader}. */ public RemoteEnvironment( String host, int port, Configuration clientConfig, String[] jarFiles, URL[] globalClasspaths) { super( validateAndGetEffectiveConfiguration( clientConfig, host, port, jarFiles, globalClasspaths)); } private static Configuration validateAndGetEffectiveConfiguration( final Configuration configuration, final String host, final int port, final String[] jarFiles, final URL[] globalClasspaths) { RemoteEnvironmentConfigUtils.validate(host, port); return getEffectiveConfiguration( getClientConfiguration(configuration), host, port, jarFiles, getClasspathURLs(globalClasspaths)); } private static Configuration getClientConfiguration(final Configuration configuration) { return configuration == null ? new Configuration() : configuration; } private static List getClasspathURLs(final URL[] classpaths) { return classpaths == null ? Collections.emptyList() : Arrays.asList(classpaths); } private static Configuration getEffectiveConfiguration( final Configuration baseConfiguration, final String host, final int port, final String[] jars, final List classpaths) { final Configuration effectiveConfiguration = new Configuration(baseConfiguration); RemoteEnvironmentConfigUtils.setJobManagerAddressToConfig( host, port, effectiveConfiguration); RemoteEnvironmentConfigUtils.setJarURLsToConfig(jars, effectiveConfiguration); ConfigUtils.encodeCollectionToConfig( effectiveConfiguration, PipelineOptions.CLASSPATHS, classpaths, URL::toString); // these should be set in the end to overwrite any values from the client config provided in // the constructor. effectiveConfiguration.setString(DeploymentOptions.TARGET, "remote"); effectiveConfiguration.setBoolean(DeploymentOptions.ATTACHED, true); return effectiveConfiguration; } @Override public String toString() { final String host = getConfiguration().getString(JobManagerOptions.ADDRESS); final int port = getConfiguration().getInteger(JobManagerOptions.PORT); final String parallelism = (getParallelism() == -1 ? "default" : "" + getParallelism()); return "Remote Environment (" + host + ":" + port + " - parallelism = " + parallelism + ")."; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy