All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.client.python.PythonDriver Maven / Gradle / Ivy

There is a newer version: 2.0-preview1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.client.python;

import org.apache.flink.api.java.ExecutionEnvironment;
import org.apache.flink.client.program.ProgramAbortException;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.entrypoint.parser.CommandLineParser;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import py4j.GatewayServer;

import java.io.BufferedReader;
import java.io.File;
import java.io.InputStreamReader;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.List;
import java.util.UUID;

/**
 * A main class used to launch Python applications. It executes python as a subprocess and then has
 * it connect back to the JVM to access system properties, etc.
 */
public final class PythonDriver {
    private static final Logger LOG = LoggerFactory.getLogger(PythonDriver.class);

    public static void main(String[] args) throws Throwable {
        // The python job needs at least 2 args.
        // e.g. py a.py [user args]
        // e.g. pym a.b [user args]
        if (args.length < 2) {
            LOG.error(
                    "Required at least two arguments, only python file or python module is available.");
            System.exit(1);
        }

        // parse args
        final CommandLineParser commandLineParser =
                new CommandLineParser<>(new PythonDriverOptionsParserFactory());
        PythonDriverOptions pythonDriverOptions = null;
        try {
            pythonDriverOptions = commandLineParser.parse(args);
        } catch (Exception e) {
            LOG.error("Could not parse command line arguments {}.", args, e);
            commandLineParser.printHelp(PythonDriver.class.getSimpleName());
            System.exit(1);
        }

        // Get configuration from ContextEnvironment/OptimizerPlanEnvironment. As the configurations
        // of
        // streaming and batch environments are always set at the same time, for streaming jobs we
        // can
        // also get its configuration from batch environments.
        Configuration config = ExecutionEnvironment.getExecutionEnvironment().getConfiguration();

        // start gateway server
        GatewayServer gatewayServer = PythonEnvUtils.startGatewayServer();
        PythonEnvUtils.setGatewayServer(gatewayServer);

        PythonEnvUtils.PythonProcessShutdownHook shutdownHook = null;

        // commands which will be exec in python progress.
        final List commands = constructPythonCommands(pythonDriverOptions);
        try {
            // prepare the exec environment of python progress.
            String tmpDir =
                    System.getProperty("java.io.tmpdir")
                            + File.separator
                            + "pyflink"
                            + File.separator
                            + UUID.randomUUID();
            // start the python process.
            Process pythonProcess =
                    PythonEnvUtils.launchPy4jPythonClient(
                            gatewayServer,
                            config,
                            commands,
                            pythonDriverOptions.getEntryPointScript().orElse(null),
                            tmpDir,
                            true);
            shutdownHook =
                    new PythonEnvUtils.PythonProcessShutdownHook(
                            pythonProcess, gatewayServer, tmpDir);
            Runtime.getRuntime().addShutdownHook(shutdownHook);

            BufferedReader in =
                    new BufferedReader(
                            new InputStreamReader(
                                    pythonProcess.getInputStream(), StandardCharsets.UTF_8));
            LOG.info(
                    "--------------------------- Python Process Started --------------------------");
            // print the python process output to stdout and log file
            final StringBuilder sb = new StringBuilder();
            try {
                while (true) {
                    String line = in.readLine();
                    if (line == null) {
                        break;
                    } else {
                        System.out.println(line);
                        sb.append(line);
                        sb.append("\n");
                    }
                }
            } finally {
                LOG.info(sb.toString());
            }
            int exitCode = pythonProcess.waitFor();
            LOG.info(
                    "--------------------------- Python Process Exited ---------------------------");
            if (exitCode != 0) {
                throw new RuntimeException("Python process exits with code: " + exitCode);
            }
        } catch (Throwable e) {
            LOG.error("Run python process failed", e);

            if (PythonEnvUtils.capturedJavaException != null) {
                throw PythonEnvUtils.capturedJavaException;
            } else {
                // throw ProgramAbortException if the caller is interested in the program plan,
                // there is no harm to throw ProgramAbortException even if it is not the case.
                throw new ProgramAbortException(e);
            }
        } finally {
            PythonEnvUtils.setGatewayServer(null);
            if (shutdownHook != null && Runtime.getRuntime().removeShutdownHook(shutdownHook)) {
                shutdownHook.run();
            }
        }
    }

    /**
     * Constructs the Python commands which will be executed in python process.
     *
     * @param pythonDriverOptions parsed Python command options
     */
    static List constructPythonCommands(final PythonDriverOptions pythonDriverOptions) {
        final List commands = new ArrayList<>();
        commands.add("-m");
        if (pythonDriverOptions.getEntryPointScript().isPresent()) {
            String pythonFileName = pythonDriverOptions.getEntryPointScript().get();
            commands.add(
                    pythonFileName.substring(
                            pythonFileName.lastIndexOf(File.separator) + 1,
                            pythonFileName.lastIndexOf(".py")));
        } else {
            commands.add(pythonDriverOptions.getEntryPointModule());
        }
        commands.addAll(pythonDriverOptions.getProgramArgs());
        return commands;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy