All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.streaming.api.environment.RemoteStreamEnvironment Maven / Gradle / Ivy

There is a newer version: 1.14.6
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.streaming.api.environment;

import org.apache.flink.annotation.Public;
import org.apache.flink.api.common.InvalidProgramException;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.client.program.ClusterClient;
import org.apache.flink.client.program.JobWithJars;
import org.apache.flink.client.program.ProgramInvocationException;
import org.apache.flink.client.program.StandaloneClusterClient;
import org.apache.flink.client.program.rest.RestClusterClient;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.CoreOptions;
import org.apache.flink.configuration.JobManagerOptions;
import org.apache.flink.streaming.api.graph.StreamGraph;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

/**
 * A {@link StreamExecutionEnvironment} for executing on a cluster.
 */
@Public
public class RemoteStreamEnvironment extends StreamExecutionEnvironment {

	private static final Logger LOG = LoggerFactory.getLogger(RemoteStreamEnvironment.class);

	/** The hostname of the JobManager. */
	private final String host;

	/** The port of the JobManager main actor system. */
	private final int port;

	/** The configuration used to parametrize the client that connects to the remote cluster. */
	private final Configuration clientConfiguration;

	/** The jar files that need to be attached to each job. */
	private final List jarFiles;

	/** The classpaths that need to be attached to each job. */
	private final List globalClasspaths;

	/**
	 * Creates a new RemoteStreamEnvironment that points to the master
	 * (JobManager) described by the given host name and port.
	 *
	 * @param host
	 *            The host name or address of the master (JobManager), where the
	 *            program should be executed.
	 * @param port
	 *            The port of the master (JobManager), where the program should
	 *            be executed.
	 * @param jarFiles
	 *            The JAR files with code that needs to be shipped to the
	 *            cluster. If the program uses user-defined functions,
	 *            user-defined input formats, or any libraries, those must be
	 *            provided in the JAR files.
	 */
	public RemoteStreamEnvironment(String host, int port, String... jarFiles) {
		this(host, port, null, jarFiles);
	}

	/**
	 * Creates a new RemoteStreamEnvironment that points to the master
	 * (JobManager) described by the given host name and port.
	 *
	 * @param host
	 *            The host name or address of the master (JobManager), where the
	 *            program should be executed.
	 * @param port
	 *            The port of the master (JobManager), where the program should
	 *            be executed.
	 * @param clientConfiguration
	 *            The configuration used to parametrize the client that connects to the
	 *            remote cluster.
	 * @param jarFiles
	 *            The JAR files with code that needs to be shipped to the
	 *            cluster. If the program uses user-defined functions,
	 *            user-defined input formats, or any libraries, those must be
	 *            provided in the JAR files.
	 */
	public RemoteStreamEnvironment(String host, int port, Configuration clientConfiguration, String... jarFiles) {
		this(host, port, clientConfiguration, jarFiles, null);
	}

	/**
	 * Creates a new RemoteStreamEnvironment that points to the master
	 * (JobManager) described by the given host name and port.
	 *
	 * @param host
	 *            The host name or address of the master (JobManager), where the
	 *            program should be executed.
	 * @param port
	 *            The port of the master (JobManager), where the program should
	 *            be executed.
	 * @param clientConfiguration
	 *            The configuration used to parametrize the client that connects to the
	 *            remote cluster.
	 * @param jarFiles
	 *            The JAR files with code that needs to be shipped to the
	 *            cluster. If the program uses user-defined functions,
	 *            user-defined input formats, or any libraries, those must be
	 *            provided in the JAR files.
	 * @param globalClasspaths
	 *            The paths of directories and JAR files that are added to each user code
	 *            classloader on all nodes in the cluster. Note that the paths must specify a
	 *            protocol (e.g. file://) and be accessible on all nodes (e.g. by means of a NFS share).
	 *            The protocol must be supported by the {@link java.net.URLClassLoader}.
	 */
	public RemoteStreamEnvironment(String host, int port, Configuration clientConfiguration, String[] jarFiles, URL[] globalClasspaths) {
		if (!ExecutionEnvironment.areExplicitEnvironmentsAllowed()) {
			throw new InvalidProgramException(
					"The RemoteEnvironment cannot be used when submitting a program through a client, " +
							"or running in a TestEnvironment context.");
		}

		if (host == null) {
			throw new NullPointerException("Host must not be null.");
		}
		if (port < 1 || port >= 0xffff) {
			throw new IllegalArgumentException("Port out of range");
		}

		this.host = host;
		this.port = port;
		this.clientConfiguration = clientConfiguration == null ? new Configuration() : clientConfiguration;
		this.jarFiles = new ArrayList<>(jarFiles.length);
		for (String jarFile : jarFiles) {
			try {
				URL jarFileUrl = new File(jarFile).getAbsoluteFile().toURI().toURL();
				this.jarFiles.add(jarFileUrl);
				JobWithJars.checkJarFile(jarFileUrl);
			} catch (MalformedURLException e) {
				throw new IllegalArgumentException("JAR file path is invalid '" + jarFile + "'", e);
			} catch (IOException e) {
				throw new RuntimeException("Problem with jar file " + jarFile, e);
			}
		}
		if (globalClasspaths == null) {
			this.globalClasspaths = Collections.emptyList();
		}
		else {
			this.globalClasspaths = Arrays.asList(globalClasspaths);
		}
	}

	@Override
	public JobExecutionResult execute(String jobName) throws ProgramInvocationException {
		StreamGraph streamGraph = getStreamGraph();
		streamGraph.setJobName(jobName);
		transformations.clear();
		return executeRemotely(streamGraph, jarFiles);
	}

	/**
	 * Executes the remote job.
	 *
	 * @param streamGraph
	 *            Stream Graph to execute
	 * @param jarFiles
	 * 			  List of jar file URLs to ship to the cluster
	 * @return The result of the job execution, containing elapsed time and accumulators.
	 */
	protected JobExecutionResult executeRemotely(StreamGraph streamGraph, List jarFiles) throws ProgramInvocationException {
		if (LOG.isInfoEnabled()) {
			LOG.info("Running remotely at {}:{}", host, port);
		}

		ClassLoader usercodeClassLoader = JobWithJars.buildUserCodeClassLoader(jarFiles, globalClasspaths,
			getClass().getClassLoader());

		Configuration configuration = new Configuration();
		configuration.addAll(this.clientConfiguration);

		configuration.setString(JobManagerOptions.ADDRESS, host);
		configuration.setInteger(JobManagerOptions.PORT, port);

		final ClusterClient client;
		try {
			if (CoreOptions.LEGACY_MODE.equals(configuration.getString(CoreOptions.MODE))) {
				client = new StandaloneClusterClient(configuration);
			} else {
				client = new RestClusterClient<>(configuration, "RemoteStreamEnvironment");
			}
		}
		catch (Exception e) {
			throw new ProgramInvocationException("Cannot establish connection to JobManager: " + e.getMessage(), e);
		}

		client.setPrintStatusDuringExecution(getConfig().isSysoutLoggingEnabled());

		try {
			return client.run(streamGraph, jarFiles, globalClasspaths, usercodeClassLoader).getJobExecutionResult();
		}
		catch (ProgramInvocationException e) {
			throw e;
		}
		catch (Exception e) {
			String term = e.getMessage() == null ? "." : (": " + e.getMessage());
			throw new ProgramInvocationException("The program execution failed" + term, e);
		}
		finally {
			try {
				client.shutdown();
			} catch (Exception e) {
				LOG.warn("Could not properly shut down the cluster client.", e);
			}
		}
	}

	@Override
	public String toString() {
		return "Remote Environment (" + this.host + ":" + this.port + " - parallelism = "
				+ (getParallelism() == -1 ? "default" : getParallelism()) + ")";
	}

	/**
	 * Gets the hostname of the master (JobManager), where the
	 * program will be executed.
	 *
	 * @return The hostname of the master
	 */
	public String getHost() {
		return host;
	}

	/**
	 * Gets the port of the master (JobManager), where the
	 * program will be executed.
	 *
	 * @return The port of the master
	 */
	public int getPort() {
		return port;
	}

	public Configuration getClientConfiguration() {
		return clientConfiguration;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy