All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.datavec.python.PythonExecutioner Maven / Gradle / Ivy

The newest version!

/*******************************************************************************
 * Copyright (c) 2019 Konduit K.K.
 *
 * This program and the accompanying materials are made available under the
 * terms of the Apache License, Version 2.0 which is available at
 * https://www.apache.org/licenses/LICENSE-2.0.
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations
 * under the License.
 *
 * SPDX-License-Identifier: Apache-2.0
 ******************************************************************************/


package org.datavec.python;

import lombok.extern.slf4j.Slf4j;
import org.apache.commons.io.IOUtils;
import org.bytedeco.cpython.global.python;
import org.bytedeco.numpy.global.numpy;
import org.nd4j.common.io.ClassPathResource;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.Charset;
import java.util.concurrent.atomic.AtomicBoolean;

import static org.bytedeco.cpython.global.python.*;
import static org.datavec.python.Python.*;

/**
 *  Allows execution of python scripts managed by
 *  an internal interpreter.
 *  An end user may specify a python script to run
 *  via any of the execution methods available in this class.
 *
 *  At static initialization time (when the class is first initialized)
 *  a number of components are setup:
 *  1. The python path. A user may over ride this with the system property {@link #DEFAULT_PYTHON_PATH_PROPERTY}
 *
 *  2. Since this executioner uses javacpp to manage and run python interpreters underneath the covers,
 *  a user may also over ride the system property {@link #JAVACPP_PYTHON_APPEND_TYPE} with one of the {@link JavaCppPathType}
 *  values. This will allow the user to determine whether the javacpp default python path is used at all, and if so
 *  whether it is appended, prepended, or not used. This behavior is useful when you need to use an external
 *  python distribution such as anaconda.
 *
 *  3. A proper numpy import for use with javacpp: We call numpy import ourselves to ensure proper loading of
 *  native libraries needed by numpy are allowed to load in the proper order. If we don't do this,
 *  it causes a variety of issues with running numpy. (User must still include "import numpy as np" in their scripts).
 *
 *  4. Various python scripts pre defined on the classpath included right with the java code.
 *  These are auxillary python scripts used for loading classes, pre defining certain kinds of behavior
 *  in order for us to manipulate values within the python memory, as well as pulling them out of memory
 *  for integration within the internal python executioner.
 *
 *  For more information on how this works, please take a look at the {@link #init()}
 *  method.
 *
 *  Generally, a user defining a python script for use by the python executioner
 *  will have a set of defined target input values and output values.
 *  These values should not be present when actually running the script, but just referenced.
 *  In order to test your python script for execution outside the engine,
 *  we recommend commenting out a few default values as dummy input values.
 *  This will allow an end user to test their script before trying to use the server.
 *
 *  In order to get output values out of a python script, all a user has to do
 *  is define the output variables they want being used in the final output in the actual pipeline.
 *  For example, if a user wants to return a dictionary, they just have to create a dictionary with that name
 *  and based on the configured {@link PythonVariables} passed as outputs
 *  to one of the execution methods, we can pull the values out automatically.
 *
 *  For input definitions, it is similar. You just define the values you want used in
 *  {@link PythonVariables} and we will automatically generate code for defining those values
 *  as desired for running. This allows the user to customize values dynamically
 *  at runtime but reference them by name in a python script.
 *
 *
 *  @author Fariz Rahman
 *  @author Adam Gibson
 */


@Slf4j
public class PythonExecutioner {


    private static AtomicBoolean init = new AtomicBoolean(false);
    public final static String DEFAULT_PYTHON_PATH_PROPERTY = "org.datavec.python.path";
    public final static String JAVACPP_PYTHON_APPEND_TYPE = "org.datavec.python.javacpp.path.append";
    public final static String DEFAULT_APPEND_TYPE = "before";
    private final static String PYTHON_EXCEPTION_KEY = "__python_exception__";

    static {
        init();
    }


    private static synchronized void init() {
        if (init.get()) {
            return;
        }
        initPythonPath();
        init.set(true);
        log.info("CPython: PyEval_InitThreads()");
        PyEval_InitThreads();
        log.info("CPython: Py_InitializeEx()");
        Py_InitializeEx(0);
        numpy._import_array();
    }

    private static synchronized void simpleExec(String code) throws PythonException{
        log.debug(code);
        log.info("CPython: PyRun_SimpleStringFlag()");

        int result = PyRun_SimpleStringFlags(code, null);
        if (result != 0) {
            throw new PythonException("Execution failed, unable to retrieve python exception.");
        }
    }

    public static boolean validateVariableName(String s) {
        if (s.isEmpty()) return false;
        if (!Character.isJavaIdentifierStart(s.charAt(0))) return false;
        for (int i = 1; i < s.length(); i++)
            if (!Character.isJavaIdentifierPart(s.charAt(i)))
                return false;
        return true;
    }


    /**
     * Sets a variable in the global scope of the current context (See @PythonContextManager).
     * This is equivalent to `exec("a = b");` where a is the variable name
     * and b is the variable value.
     * @param varName Name of the python variable being set. Should be a valid python identifier string
     * @param pythonObject Value for the python variable
     * @throws Exception
     */
    public static void setVariable(String varName, PythonObject pythonObject) throws PythonException{
        if (!validateVariableName(varName)){
            throw new PythonException("Invalid variable name: " + varName);
        }
        Python.globals().set(new PythonObject(varName), pythonObject);
    }

    public  static  void setVariable(String varName, PythonType varType, Object value) throws PythonException {
        PythonObject pythonObject;
        switch (varType.getName()) {
            case STR:
                pythonObject = new PythonObject(PythonType.STR.convert(value));
                break;
            case INT:
                pythonObject = new PythonObject(PythonType.INT.convert(value));
                break;
            case FLOAT:
                pythonObject = new PythonObject(PythonType.FLOAT.convert(value));
                break;
            case BOOL:
                pythonObject = new PythonObject(PythonType.BOOL.convert(value));
                break;
            case NDARRAY:
                pythonObject = new PythonObject(PythonType.NDARRAY.convert(value));
                break;
            case LIST:
                pythonObject = new PythonObject(PythonType.LIST.convert(value));
                break;
            case DICT:
                pythonObject = new PythonObject(PythonType.DICT.convert(value));
                break;
            case BYTES:
                pythonObject = new PythonObject(PythonType.BYTES.convert(value));
                break;
            default:
                throw new PythonException("Unsupported type: " + varType);

        }
        setVariable(varName, pythonObject);
    }

    public static void setVariables(PythonVariables pyVars) throws PythonException{
        if (pyVars == null) return;
        for (String varName : pyVars.getVariables()) {
            setVariable(varName, pyVars.getType(varName), pyVars.getValue(varName));
        }
    }

    public static PythonObject getVariable(String varName) {
        return Python.globals().attr("get").call(varName);
    }

    public static  T getVariable(String varName, PythonType varType) throws PythonException{
        PythonObject pythonObject = getVariable(varName);
        return varType.toJava(pythonObject);
    }

    public static void getVariables(PythonVariables pyVars) throws PythonException {
        if (pyVars == null){
            return;
        }
        for (String varName : pyVars.getVariables()) {
            pyVars.setValue(varName, getVariable(varName, pyVars.getType(varName)));
        }
    }


    private static String getWrappedCode(String code) {
        try (InputStream is = new ClassPathResource("pythonexec/pythonexec.py").getInputStream()) {
            String base = IOUtils.toString(is, Charset.defaultCharset());
            StringBuffer indentedCode = new StringBuffer();
            for (String split : code.split("\n")) {
                indentedCode.append("    " + split + "\n");

            }

            String out = base.replace("    pass", indentedCode);
            return out;
        } catch (IOException e) {
            throw new IllegalStateException("Unable to read python code!", e);
        }

    }

    private static void throwIfExecutionFailed() throws PythonException{
        PythonObject ex = getVariable(PYTHON_EXCEPTION_KEY);
        if (ex != null && !ex.isNone() && !ex.toString().isEmpty()) {
            setVariable(PYTHON_EXCEPTION_KEY, new PythonObject(""));
            throw new PythonException(ex);
        }
    }

    public static void exec(String code) throws PythonException {
        simpleExec(getWrappedCode(code));
        throwIfExecutionFailed();
    }

    public static void exec(String code, PythonVariables inputVariables, PythonVariables outputVariables) throws PythonException {
        setVariables(inputVariables);
        simpleExec(getWrappedCode(code));
        throwIfExecutionFailed();
        getVariables(outputVariables);
    }

    public static PythonVariables execAndReturnAllVariables(String code) throws PythonException {
        simpleExec(getWrappedCode(code));
        throwIfExecutionFailed();
        PythonVariables out = new PythonVariables();
        PythonObject globals = Python.globals();
        PythonObject keysList = Python.list(globals.attr("keys"));
        int numKeys = Python.len(keysList).toInt();
        for (int i = 0; i < numKeys; i++) {
            PythonObject key = keysList.get(i);
            String keyStr = key.toString();
            if (!keyStr.startsWith("_")) {
                PythonObject val = globals.get(key);
                if (Python.isinstance(val, intType())) {
                    out.addInt(keyStr, val.toInt());
                } else if (Python.isinstance(val, floatType())) {
                    out.addFloat(keyStr, val.toDouble());
                } else if (Python.isinstance(val, strType())) {
                    out.addStr(keyStr, val.toString());
                } else if (Python.isinstance(val, boolType())) {
                    out.addBool(keyStr, val.toBoolean());
                } else if (Python.isinstance(val, listType())) {
                    out.addList(keyStr, val.toList().toArray(new Object[0]));
                } else if (Python.isinstance(val, dictType())) {
                    out.addDict(keyStr, val.toMap());
                }
            }
        }
        return out;

    }

    public static PythonVariables getAllVariables() throws PythonException{
        PythonVariables out = new PythonVariables();
        PythonObject globals = Python.globals();
        PythonObject keysList = Python.list(globals.attr("keys").call());
        int numKeys = Python.len(keysList).toInt();
        for (int i = 0; i < numKeys; i++) {
            PythonObject key = keysList.get(i);
            String keyStr = key.toString();
            if (!keyStr.startsWith("_")) {
                PythonObject val = globals.get(key);
                if (Python.isinstance(val, intType())) {
                    out.addInt(keyStr, val.toInt());
                } else if (Python.isinstance(val, floatType())) {
                    out.addFloat(keyStr, val.toDouble());
                } else if (Python.isinstance(val, strType())) {
                    out.addStr(keyStr, val.toString());
                } else if (Python.isinstance(val, boolType())) {
                    out.addBool(keyStr, val.toBoolean());
                } else if (Python.isinstance(val, listType())) {
                    out.addList(keyStr, val.toList().toArray(new Object[0]));
                } else if (Python.isinstance(val, dictType())) {
                    out.addDict(keyStr, val.toMap());
                } else {
                    PythonObject np = importModule("numpy");
                    if (Python.isinstance(val, np.attr("ndarray"), np.attr("generic"))) {
                        out.addNDArray(keyStr, val.toNumpy());
                    }
                }

            }
        }
        return out;
    }

    public static PythonVariables execAndReturnAllVariables(String code, PythonVariables inputs) throws Exception{
        setVariables(inputs);
        simpleExec(getWrappedCode(code));
        return getAllVariables();
    }

    /**
     * One of a few desired values
     * for how we should handle
     * using javacpp's python path.
     * BEFORE: Prepend the python path alongside a defined one
     * AFTER: Append the javacpp python path alongside the defined one
     * NONE: Don't use javacpp's python path at all
     */
    public enum JavaCppPathType {
        BEFORE, AFTER, NONE
    }

    /**
     * Set the python path.
     * Generally you can just use the PYTHONPATH environment variable,
     * but if you need to set it from code, this can work as well.
     */

    public static synchronized void initPythonPath() {
        if (!init.get()) {
            try {
                String path = System.getProperty(DEFAULT_PYTHON_PATH_PROPERTY);
                if (path == null) {
                    log.info("Setting python default path");
                    File[] packages = numpy.cachePackages();

                    //// TODO: fix in javacpp
                    File sitePackagesWindows = new File(python.cachePackage(), "site-packages");
                    File[] packages2 = new File[packages.length + 1];
                    for (int i = 0;i < packages.length; i++){
                        //System.out.println(packages[i].getAbsolutePath());
                        packages2[i] = packages[i];
                    }
                    packages2[packages.length] = sitePackagesWindows;
                    //System.out.println(sitePackagesWindows.getAbsolutePath());
                    packages = packages2;
                    //////////

                    Py_SetPath(packages);
                } else {
                    log.info("Setting python path " + path);
                    StringBuffer sb = new StringBuffer();
                    File[] packages = numpy.cachePackages();
                    JavaCppPathType pathAppendValue = JavaCppPathType.valueOf(System.getProperty(JAVACPP_PYTHON_APPEND_TYPE, DEFAULT_APPEND_TYPE).toUpperCase());
                    switch (pathAppendValue) {
                        case BEFORE:
                            for (File cacheDir : packages) {
                                sb.append(cacheDir);
                                sb.append(java.io.File.pathSeparator);
                            }

                            sb.append(path);

                            log.info("Prepending javacpp python path: {}", sb.toString());
                            break;
                        case AFTER:
                            sb.append(path);

                            for (File cacheDir : packages) {
                                sb.append(cacheDir);
                                sb.append(java.io.File.pathSeparator);
                            }

                            log.info("Appending javacpp python path " + sb.toString());
                            break;
                        case NONE:
                            log.info("Not appending javacpp path");
                            sb.append(path);
                            break;
                    }

                    //prepend the javacpp packages
                    log.info("Final python path: {}", sb.toString());

                    Py_SetPath(sb.toString());
                }
            } catch (IOException e) {
                log.error("Failed to set python path.", e);
            }
        } else {
            throw new IllegalStateException("Unable to reset python path. Already initialized.");
        }
    }
    
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy