All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.api.java.RemoteEnvironment Maven / Gradle / Ivy

There is a newer version: 1.20.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.api.java;

import org.apache.flink.annotation.PublicEvolving;
import org.apache.flink.annotation.Public;
import org.apache.flink.api.common.InvalidProgramException;
import org.apache.flink.api.common.JobExecutionResult;
import org.apache.flink.api.common.JobID;
import org.apache.flink.api.common.Plan;
import org.apache.flink.api.common.PlanExecutor;
import org.apache.flink.configuration.Configuration;

import java.io.File;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

/**
 * An {@link ExecutionEnvironment} that sends programs to a cluster for execution. The environment
 * needs to be created with the address and port of the JobManager of the Flink cluster that
 * should execute the programs.
 * 
 * 

Many programs executed via the remote environment depend on additional classes. Such classes * may be the classes of functions (transformation, aggregation, ...) or libraries. Those classes * must be attached to the remote environment as JAR files, to allow the environment to ship the * classes into the cluster for the distributed execution. */ @Public public class RemoteEnvironment extends ExecutionEnvironment { /** The hostname of the JobManager */ protected final String host; /** The port of the JobManager main actor system */ protected final int port; /** The jar files that need to be attached to each job */ protected final List jarFiles; /** The configuration used by the client that connects to the cluster */ protected Configuration clientConfiguration; /** The remote executor lazily created upon first use */ protected PlanExecutor executor; /** Optional shutdown hook, used in session mode to eagerly terminate the last session */ private Thread shutdownHook; /** The classpaths that need to be attached to each job */ protected final List globalClasspaths; /** * Creates a new RemoteEnvironment that points to the master (JobManager) described by the * given host name and port. * *

Each program execution will have all the given JAR files in its classpath. * * @param host The host name or address of the master (JobManager), where the program should be executed. * @param port The port of the master (JobManager), where the program should be executed. * @param jarFiles The JAR files with code that needs to be shipped to the cluster. If the program uses * user-defined functions, user-defined input formats, or any libraries, those must be * provided in the JAR files. */ public RemoteEnvironment(String host, int port, String... jarFiles) { this(host, port, null, jarFiles, null); } /** * Creates a new RemoteEnvironment that points to the master (JobManager) described by the * given host name and port. * *

Each program execution will have all the given JAR files in its classpath. * * @param host The host name or address of the master (JobManager), where the program should be executed. * @param port The port of the master (JobManager), where the program should be executed. * @param clientConfig The configuration used by the client that connects to the cluster. * @param jarFiles The JAR files with code that needs to be shipped to the cluster. If the program uses * user-defined functions, user-defined input formats, or any libraries, those must be * provided in the JAR files. */ public RemoteEnvironment(String host, int port, Configuration clientConfig, String[] jarFiles) { this(host, port, clientConfig, jarFiles, null); } /** * Creates a new RemoteEnvironment that points to the master (JobManager) described by the * given host name and port. * *

Each program execution will have all the given JAR files in its classpath. * * @param host The host name or address of the master (JobManager), where the program should be executed. * @param port The port of the master (JobManager), where the program should be executed. * @param clientConfig The configuration used by the client that connects to the cluster. * @param jarFiles The JAR files with code that needs to be shipped to the cluster. If the program uses * user-defined functions, user-defined input formats, or any libraries, those must be * provided in the JAR files. * @param globalClasspaths The paths of directories and JAR files that are added to each user code * classloader on all nodes in the cluster. Note that the paths must specify a * protocol (e.g. file://) and be accessible on all nodes (e.g. by means of a NFS share). * The protocol must be supported by the {@link java.net.URLClassLoader}. */ public RemoteEnvironment(String host, int port, Configuration clientConfig, String[] jarFiles, URL[] globalClasspaths) { if (!ExecutionEnvironment.areExplicitEnvironmentsAllowed()) { throw new InvalidProgramException( "The RemoteEnvironment cannot be instantiated when running in a pre-defined context " + "(such as Command Line Client, Scala Shell, or TestEnvironment)"); } if (host == null) { throw new NullPointerException("Host must not be null."); } if (port < 1 || port >= 0xffff) { throw new IllegalArgumentException("Port out of range"); } this.host = host; this.port = port; this.clientConfiguration = clientConfig == null ? new Configuration() : clientConfig; if (jarFiles != null) { this.jarFiles = new ArrayList<>(jarFiles.length); for (String jarFile : jarFiles) { try { this.jarFiles.add(new File(jarFile).getAbsoluteFile().toURI().toURL()); } catch (MalformedURLException e) { throw new IllegalArgumentException("JAR file path invalid", e); } } } else { this.jarFiles = Collections.emptyList(); } if (globalClasspaths == null) { this.globalClasspaths = Collections.emptyList(); } else { this.globalClasspaths = Arrays.asList(globalClasspaths); } } // ------------------------------------------------------------------------ @Override public JobExecutionResult execute(String jobName) throws Exception { PlanExecutor executor = getExecutor(); Plan p = createProgramPlan(jobName); // Session management is disabled, revert this commit to enable //p.setJobId(jobID); //p.setSessionTimeout(sessionTimeout); JobExecutionResult result = executor.executePlan(p); this.lastJobExecutionResult = result; return result; } @Override public String getExecutionPlan() throws Exception { Plan p = createProgramPlan("plan", false); // make sure that we do not start an new executor here // if one runs, fine, of not, we create a local executor (lightweight) and let it // generate the plan if (executor != null) { return executor.getOptimizerPlanAsJSON(p); } else { PlanExecutor le = PlanExecutor.createLocalExecutor(null); String plan = le.getOptimizerPlanAsJSON(p); le.stop(); return plan; } } @Override @PublicEvolving public void startNewSession() throws Exception { dispose(); jobID = JobID.generate(); installShutdownHook(); } protected PlanExecutor getExecutor() throws Exception { if (executor == null) { executor = PlanExecutor.createRemoteExecutor(host, port, clientConfiguration, jarFiles, globalClasspaths); executor.setPrintStatusDuringExecution(getConfig().isSysoutLoggingEnabled()); } // if we are using sessions, we keep the executor running if (getSessionTimeout() > 0 && !executor.isRunning()) { executor.start(); installShutdownHook(); } return executor; } // ------------------------------------------------------------------------ // Dispose // ------------------------------------------------------------------------ protected void dispose() { // Remove shutdown hook to prevent resource leaks, unless this is invoked by the // shutdown hook itself if (shutdownHook != null && shutdownHook != Thread.currentThread()) { try { Runtime.getRuntime().removeShutdownHook(shutdownHook); } catch (IllegalStateException e) { // race, JVM is in shutdown already, we can safely ignore this } catch (Throwable t) { LOG.warn("Exception while unregistering the cleanup shutdown hook."); } } try { PlanExecutor executor = this.executor; if (executor != null) { executor.endSession(jobID); executor.stop(); } } catch (Exception e) { throw new RuntimeException("Failed to dispose the session shutdown hook."); } } @Override public String toString() { return "Remote Environment (" + this.host + ":" + this.port + " - parallelism = " + (getParallelism() == -1 ? "default" : getParallelism()) + ") : " + getIdString(); } // ------------------------------------------------------------------------ // Shutdown hooks and reapers // ------------------------------------------------------------------------ private void installShutdownHook() { if (shutdownHook == null) { Thread shutdownHook = new Thread(new Runnable() { @Override public void run() { try { dispose(); } catch (Throwable t) { LOG.error("Error in cleanup of RemoteEnvironment during JVM shutdown: " + t.getMessage(), t); } } }); try { // Add JVM shutdown hook to call shutdown of service Runtime.getRuntime().addShutdownHook(shutdownHook); this.shutdownHook = shutdownHook; } catch (IllegalStateException e) { // JVM is already shutting down. no need or a shutdown hook } catch (Throwable t) { LOG.error("Cannot register shutdown hook that cleanly terminates the BLOB service."); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy