All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.zeppelin.python.PythonInterpreter Maven / Gradle / Ivy

There is a newer version: 0.8.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.zeppelin.python;

import com.google.gson.Gson;
import org.apache.commons.exec.CommandLine;
import org.apache.commons.exec.ExecuteException;
import org.apache.commons.exec.environment.EnvironmentUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.zeppelin.interpreter.ZeppelinContext;
import org.apache.zeppelin.interpreter.Interpreter;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.interpreter.InterpreterException;
import org.apache.zeppelin.interpreter.InterpreterGroup;
import org.apache.zeppelin.interpreter.InterpreterHookRegistry.HookType;
import org.apache.zeppelin.interpreter.InterpreterResult;
import org.apache.zeppelin.interpreter.InterpreterResult.Code;
import org.apache.zeppelin.interpreter.InvalidHookException;
import org.apache.zeppelin.interpreter.remote.RemoteInterpreterUtils;
import org.apache.zeppelin.interpreter.thrift.InterpreterCompletion;
import org.apache.zeppelin.interpreter.util.InterpreterOutputStream;
import org.apache.zeppelin.interpreter.util.ProcessLauncher;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import py4j.GatewayServer;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.nio.file.Files;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;

/**
 * Interpreter for Python, it is the first implementation of interpreter for Python, so with less
 * features compared to IPythonInterpreter, but requires less prerequisites than
 * IPythonInterpreter, only python installation is required.
 */
public class PythonInterpreter extends Interpreter {
  private static final Logger LOGGER = LoggerFactory.getLogger(PythonInterpreter.class);
  private static final int MAX_TIMEOUT_SEC = 30;

  private GatewayServer gatewayServer;
  protected PythonProcessLauncher pythonProcessLauncher;
  private File pythonWorkDir;
  protected boolean useBuiltinPy4j = true;

  // used to forward output from python process to InterpreterOutput
  private InterpreterOutputStream outputStream;
  private long pythonPid = -1;
  private IPythonInterpreter iPythonInterpreter;
  private ZeppelinContext zeppelinContext;
  // set by PythonCondaInterpreter
  private String condaPythonExec;

  public PythonInterpreter(Properties property) {
    super(property);
  }

  @Override
  public void open() throws InterpreterException {
    // try IPythonInterpreter first
    iPythonInterpreter = getIPythonInterpreter();
    boolean useIPython = Boolean.parseBoolean(getProperty("zeppelin.python.useIPython", "true"));

    LOGGER.info("zeppelin.python.useIPython: {}", useIPython);
    if (useIPython) {
      String checkKernelPrerequisiteResult = iPythonInterpreter.checkKernelPrerequisite(
              getPythonExec());
      if (StringUtils.isEmpty(checkKernelPrerequisiteResult)) {
        try {
          iPythonInterpreter.open();
          LOGGER.info("IPython is available, Use IPythonInterpreter to replace PythonInterpreter");
          return;
        } catch (Exception e) {
          iPythonInterpreter = null;
          LOGGER.warn("Fail to open IPythonInterpreter", e);
        }
      } else {
        LOGGER.info("IPython requirement is not met, checkKernelPrerequisiteResult: {}",
                checkKernelPrerequisiteResult);
      }
    }

    // reset iPythonInterpreter to null as it is not available
    iPythonInterpreter = null;
    LOGGER.info("IPython is not available, use the native PythonInterpreter");
    // Add matplotlib display hook
    InterpreterGroup intpGroup = getInterpreterGroup();
    if (intpGroup != null && intpGroup.getInterpreterHookRegistry() != null) {
      try {
        // just for unit test I believe (zjffdu)
        registerHook(HookType.POST_EXEC_DEV.getName(), "__zeppelin__._displayhook()");
      } catch (InvalidHookException e) {
        throw new InterpreterException(e);
      }
    }

    try {
      createGatewayServerAndStartScript();
    } catch (IOException e) {
      LOGGER.error("Fail to open PythonInterpreter", e);
      throw new InterpreterException("Fail to open PythonInterpreter", e);
    }
  }

  // start gateway sever and start python process
  private void createGatewayServerAndStartScript() throws IOException {
    // start gateway server in JVM side
    int port = RemoteInterpreterUtils.findRandomAvailablePortOnAllLocalInterfaces();
    // use the FQDN as the server address instead of 127.0.0.1 so that python process in docker
    // container can also connect to this gateway server.
    String serverAddress = PythonUtils.getLocalIP(properties);
    String secret = PythonUtils.createSecret(256);
    this.gatewayServer = PythonUtils.createGatewayServer(this, serverAddress, port, secret);
    gatewayServer.start();

    // launch python process to connect to the gateway server in JVM side
    createPythonScript();
    String pythonExec = getPythonExec();
    CommandLine cmd = CommandLine.parse(pythonExec);
    if (!pythonExec.endsWith(".py")) {
      // PythonDockerInterpreter set pythonExec with script
      cmd.addArgument(pythonWorkDir + "/zeppelin_python.py", false);
    }
    cmd.addArgument(serverAddress, false);
    cmd.addArgument(Integer.toString(port), false);

    outputStream = new InterpreterOutputStream(LOGGER);
    Map env = setupPythonEnv();
    env.put("PY4J_GATEWAY_SECRET", secret);
    if (LOGGER.isInfoEnabled()) {
      LOGGER.info("Launching Python Process Command: {} {}",
          cmd.getExecutable(), StringUtils.join(cmd.getArguments(), " "));
    }

    pythonProcessLauncher = new PythonProcessLauncher(cmd, env);
    pythonProcessLauncher.launch();
    pythonProcessLauncher.waitForReady(MAX_TIMEOUT_SEC * 1000);

    if (!pythonProcessLauncher.isRunning()) {
      if (pythonProcessLauncher.isLaunchTimeout()) {
        throw new IOException("Launch python process is time out.\n" +
                pythonProcessLauncher.getErrorMessage());
      } else {
        throw new IOException("Fail to launch python process.\n" +
                pythonProcessLauncher.getErrorMessage());
      }
    }
  }

  public PythonProcessLauncher getPythonProcessLauncher() {
    return pythonProcessLauncher;
  }

  private void createPythonScript() throws IOException {
    // set java.io.tmpdir to /tmp on MacOS, because docker can not share the /var folder which will
    // cause PythonDockerInterpreter fails.
    // https://stackoverflow.com/questions/45122459/docker-mounts-denied-the-paths-are-not-shared-
    // from-os-x-and-are-not-known
    if (System.getProperty("os.name", "").contains("Mac")) {
      System.setProperty("java.io.tmpdir", "/tmp");
    }
    this.pythonWorkDir = Files.createTempDirectory("python").toFile();
    this.pythonWorkDir.deleteOnExit();
    LOGGER.info("Create Python working dir: {}", pythonWorkDir.getAbsolutePath());
    copyResourceToPythonWorkDir("python/zeppelin_python.py", "zeppelin_python.py");
    copyResourceToPythonWorkDir("python/zeppelin_context.py", "zeppelin_context.py");
    copyResourceToPythonWorkDir("python/backend_zinline.py", "backend_zinline.py");
    copyResourceToPythonWorkDir("python/mpl_config.py", "mpl_config.py");
    copyResourceToPythonWorkDir("python/py4j-src-0.10.9.7.zip", "py4j-src-0.10.9.7.zip");
  }

  protected boolean useIPython() {
    return this.iPythonInterpreter != null;
  }

  private void copyResourceToPythonWorkDir(String srcResourceName,
                                           String dstFileName) throws IOException {
    FileOutputStream out = null;
    try {
      out = new FileOutputStream(pythonWorkDir.getAbsoluteFile() + "/" + dstFileName);
      IOUtils.copy(
          getClass().getClassLoader().getResourceAsStream(srcResourceName),
          out);
    } finally {
      if (out != null) {
        out.close();
      }
    }
  }

  protected Map setupPythonEnv() throws IOException {
    Map env = EnvironmentUtils.getProcEnvironment();
    appendToPythonPath(env, pythonWorkDir.getAbsolutePath());
    if (useBuiltinPy4j) {
      appendToPythonPath(env, pythonWorkDir.getAbsolutePath() + "/py4j-src-0.10.9.7.zip");
    }
    LOGGER.info("PYTHONPATH: {}", env.get("PYTHONPATH"));
    return env;
  }

  private void appendToPythonPath(Map env, String path) {
    if (!env.containsKey("PYTHONPATH")) {
      env.put("PYTHONPATH", path);
    } else {
      env.put("PYTHONPATH", env.get("PYTHONPATH") + File.pathSeparator + path);
    }
  }

  // Run python script
  // Choose python in the order of
  // {conda.env.name}/bin/python > condaPythonExec > zeppelin.python
  protected String getPythonExec() {
    String condaEnv = getProperty("zeppelin.interpreter.conda.env.name");
    if (StringUtils.isNotBlank(condaEnv)) {
      return condaEnv + "/bin/python";
    } else if (condaPythonExec != null) {
      return condaPythonExec;
    } else {
      return getProperty("zeppelin.python", "python");
    }
  }

  public File getPythonWorkDir() {
    return pythonWorkDir;
  }

  @Override
  public void close() throws InterpreterException {
    if (iPythonInterpreter != null) {
      iPythonInterpreter.close();
      return;
    }
    if (pythonProcessLauncher != null && pythonProcessLauncher.isRunning()) {
      LOGGER.info("Kill python process");
      pythonProcessLauncher.stop();
    }
    if (gatewayServer != null) {
      gatewayServer.shutdown();
    }

    // reset these 2 monitors otherwise when you restart PythonInterpreter it would fails to execute
    // python code as these 2 objects are in incorrect state.
    statementSetNotifier = Integer.valueOf(0);
    statementFinishedNotifier = Integer.valueOf(0);
  }

  private PythonInterpretRequest pythonInterpretRequest = null;
  private Integer statementSetNotifier = Integer.valueOf(0);
  private Integer statementFinishedNotifier = Integer.valueOf(0);
  private String statementOutput = null;
  private boolean statementError = false;

  public void setPythonExec(String pythonExec) {
    LOGGER.info("Set Python Command : {}", pythonExec);
    this.condaPythonExec = pythonExec;
  }

  /**
   * Request send to Python Daemon
   */
  public class PythonInterpretRequest {
    public String statements;
    public boolean isForCompletion;
    public boolean isCallHooks;

    public PythonInterpretRequest(String statements, boolean isForCompletion) {
      this(statements, isForCompletion, true);
    }

    public PythonInterpretRequest(String statements, boolean isForCompletion, boolean isCallHooks) {
      this.statements = statements;
      this.isForCompletion = isForCompletion;
      this.isCallHooks = isCallHooks;
    }

    public String statements() {
      return statements;
    }

    public boolean isForCompletion() {
      return isForCompletion;
    }

    public boolean isCallHooks() {
      return isCallHooks;
    }
  }

  // called by Python Process
  public PythonInterpretRequest getStatements() {
    synchronized (statementSetNotifier) {
      while (pythonInterpretRequest == null) {
        try {
          statementSetNotifier.wait(1000);
        } catch (InterruptedException e) {
          e.printStackTrace();
        }
      }
      PythonInterpretRequest req = pythonInterpretRequest;
      pythonInterpretRequest = null;
      return req;
    }
  }

  // called by Python Process
  public void setStatementsFinished(String out, boolean error) {
    synchronized (statementFinishedNotifier) {
      LOGGER.debug("Setting python statement output: {}, error: {}", out, error);
      statementOutput = out;
      statementError = error;
      statementFinishedNotifier.notify();
    }
  }

  // called by Python Process
  public void onPythonScriptInitialized(long pid) {
    pythonPid = pid;
    synchronized (pythonProcessLauncher) {
      LOGGER.debug("onPythonScriptInitialized is called");
      pythonProcessLauncher.initialized();
    }
  }

  // called by Python Process
  public void appendOutput(String message) throws IOException {
    LOGGER.debug("Output from python process: {}", message);
    outputStream.getInterpreterOutput().write(message);
  }

  // used by subclass such as PySparkInterpreter to set JobGroup before executing spark code
  protected void preCallPython(InterpreterContext context) {

  }

  // blocking call. Send python code to python process and get response
  protected void callPython(PythonInterpretRequest request) {
    synchronized (statementSetNotifier) {
      this.pythonInterpretRequest = request;
      statementOutput = null;
      statementSetNotifier.notify();
    }

    synchronized (statementFinishedNotifier) {
      while (statementOutput == null && pythonProcessLauncher.isRunning()) {
        try {
          statementFinishedNotifier.wait(1000);
        } catch (InterruptedException e) {
          // ignore this exception
        }
      }
    }
  }

  @Override
  public InterpreterResult interpret(String st, InterpreterContext context)
      throws InterpreterException {
    if (iPythonInterpreter != null) {
      return iPythonInterpreter.interpret(st, context);
    }

    outputStream.setInterpreterOutput(context.out);
    ZeppelinContext z = getZeppelinContext();
    z.setInterpreterContext(context);
    z.setGui(context.getGui());
    z.setNoteGui(context.getNoteGui());
    InterpreterContext.set(context);

    preCallPython(context);
    callPython(new PythonInterpretRequest(st, false));

    if (statementError) {
      return new InterpreterResult(Code.ERROR, statementOutput);
    } else {
      try {
        context.out.flush();
      } catch (IOException e) {
        throw new InterpreterException(e);
      }
      if (pythonProcessLauncher.isRunning()) {
        return new InterpreterResult(Code.SUCCESS);
      } else {
        return new InterpreterResult(Code.ERROR,
                "Python process is abnormally exited, please check your code and log.");
      }
    }
  }

  public void interrupt() throws IOException, InterpreterException {
    if (pythonPid > -1) {
      LOGGER.info("Sending SIGINT signal to PID : {}", pythonPid);
      Runtime.getRuntime().exec("kill -SIGINT " + pythonPid);
    } else {
      LOGGER.warn("Non UNIX/Linux system, close the interpreter");
      close();
    }
  }

  @Override
  public void cancel(InterpreterContext context) throws InterpreterException {
    if (iPythonInterpreter != null) {
      iPythonInterpreter.cancel(context);
      return;
    }
    try {
      interrupt();
    } catch (IOException e) {
      LOGGER.error("Error", e);
    }
  }

  @Override
  public FormType getFormType() {
    return FormType.NATIVE;
  }

  @Override
  public int getProgress(InterpreterContext context) throws InterpreterException {
    if (iPythonInterpreter != null) {
      return iPythonInterpreter.getProgress(context);
    }
    return 0;
  }


  @Override
  public List completion(String buf, int cursor,
                                                InterpreterContext interpreterContext)
      throws InterpreterException {
    if (iPythonInterpreter != null) {
      return iPythonInterpreter.completion(buf, cursor, interpreterContext);
    }
    if (buf.length() < cursor) {
      cursor = buf.length();
    }
    String completionString = getCompletionTargetString(buf, cursor);
    String completionCommand = "__zeppelin_completion__.getCompletion('" + completionString + "')";
    LOGGER.debug("completionCommand: {}", completionCommand);

    pythonInterpretRequest = new PythonInterpretRequest(completionCommand, true);
    statementOutput = null;

    synchronized (statementSetNotifier) {
      statementSetNotifier.notify();
    }

    String[] completionList = null;
    synchronized (statementFinishedNotifier) {
      long startTime = System.currentTimeMillis();
      while (statementOutput == null
          && pythonProcessLauncher.isRunning()) {
        try {
          if (System.currentTimeMillis() - startTime > MAX_TIMEOUT_SEC * 1000) {
            LOGGER.error("Python completion didn't have response for {}sec.", MAX_TIMEOUT_SEC);
            break;
          }
          statementFinishedNotifier.wait(1000);
        } catch (InterruptedException e) {
          // not working
          LOGGER.info("wait drop");
          return new LinkedList<>();
        }
      }
      if (statementError) {
        return new LinkedList<>();
      }
      Gson gson = new Gson();
      completionList = gson.fromJson(statementOutput, String[].class);
    }
    //end code for completion
    if (completionList == null) {
      return new LinkedList<>();
    }

    List results = new LinkedList<>();
    for (String name : completionList) {
      results.add(new InterpreterCompletion(name, name, StringUtils.EMPTY));
    }
    return results;
  }

  private String getCompletionTargetString(String text, int cursor) {
    String[] completionSeqCharaters = {" ", "\n", "\t"};
    int completionEndPosition = cursor;
    int completionStartPosition = cursor;
    int indexOfReverseSeqPostion = cursor;

    String resultCompletionText = "";
    String completionScriptText = "";
    try {
      completionScriptText = text.substring(0, cursor);
    } catch (Exception e) {
      LOGGER.error(e.toString());
      return null;
    }
    completionEndPosition = completionScriptText.length();

    String tempReverseCompletionText = new StringBuilder(completionScriptText).reverse().toString();

    for (String seqCharacter : completionSeqCharaters) {
      indexOfReverseSeqPostion = tempReverseCompletionText.indexOf(seqCharacter);

      if (indexOfReverseSeqPostion < completionStartPosition && indexOfReverseSeqPostion > 0) {
        completionStartPosition = indexOfReverseSeqPostion;
      }

    }

    if (completionStartPosition == completionEndPosition) {
      completionStartPosition = 0;
    } else {
      completionStartPosition = completionEndPosition - completionStartPosition;
    }
    resultCompletionText = completionScriptText.substring(
        completionStartPosition, completionEndPosition);

    return resultCompletionText;
  }

  protected IPythonInterpreter getIPythonInterpreter() throws InterpreterException {
    return getInterpreterInTheSameSessionByClassName(IPythonInterpreter.class, false);
  }

  protected ZeppelinContext createZeppelinContext() {
    return new PythonZeppelinContext(
        getInterpreterGroup().getInterpreterHookRegistry(),
        Integer.parseInt(getProperty("zeppelin.python.maxResult", "1000")));
  }

  public ZeppelinContext getZeppelinContext() {
    if (zeppelinContext == null) {
      zeppelinContext = createZeppelinContext();
    }
    return zeppelinContext;
  }

  protected void bootstrapInterpreter(String resourceName) throws IOException {
    LOGGER.info("Bootstrap interpreter via {}", resourceName);
    String bootstrapCode =
        IOUtils.toString(getClass().getClassLoader().getResourceAsStream(resourceName));
    try {
      if (iPythonInterpreter != null) {
        InterpreterResult result = iPythonInterpreter.interpret(bootstrapCode,
                InterpreterContext.get());
        if (result.code() != Code.SUCCESS) {
          throw new IOException("Fail to run bootstrap script: " + resourceName + "\n" + result);
        } else {
          LOGGER.debug("Bootstrap python successfully.");
        }
      } else {
        // Add hook explicitly, otherwise python will fail to execute the statement
        InterpreterResult result = interpret(bootstrapCode + "\n" + "__zeppelin__._displayhook()",
                InterpreterContext.get());
        if (result.code() != Code.SUCCESS) {
          throw new IOException("Fail to run bootstrap script: " + resourceName + "\n" + result);
        } else {
          LOGGER.debug("Bootstrap python successfully.");
        }
      }
    } catch (InterpreterException e) {
      throw new IOException(e);
    }
  }

  // Called by Python Process, used for debugging purpose
  public void logPythonOutput(String message) {
    LOGGER.debug("Python Process Output: {}", message);
  }

  public class PythonProcessLauncher extends ProcessLauncher {

    PythonProcessLauncher(CommandLine commandLine, Map envs) {
      super(commandLine, envs);
    }

    @Override
    public void waitForReady(int timeout) {
      long startTime = System.currentTimeMillis();
      synchronized (this) {
        while (state == State.LAUNCHED) {
          LOGGER.info("Waiting for python process initialized");
          try {
            wait(100);
          } catch (InterruptedException e) {
            throw new RuntimeException(e);
          }
          if ((System.currentTimeMillis() - startTime) > timeout) {
            onTimeout();
            break;
          }
        }
      }
    }

    public void initialized() {
      synchronized (this) {
        this.state = State.RUNNING;
        notify();
      }
    }

    @Override
    public void onProcessFailed(ExecuteException e) {
      super.onProcessFailed(e);
      synchronized (statementFinishedNotifier) {
        statementFinishedNotifier.notify();
      }
    }

    @Override
    public void onProcessComplete(int exitValue) {
      super.onProcessComplete(exitValue);
      synchronized (statementFinishedNotifier) {
        statementFinishedNotifier.notify();
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy