Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.zeppelin.submarine.job.SubmarineJob Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.submarine.job;
import com.google.common.annotations.VisibleForTesting;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.fs.Path;
import org.apache.zeppelin.interpreter.InterpreterContext;
import org.apache.zeppelin.submarine.hadoop.HdfsClient;
import org.apache.zeppelin.submarine.job.thread.JobRunThread;
import org.apache.zeppelin.submarine.commons.SubmarineCommand;
import org.apache.zeppelin.submarine.commons.SubmarineConstants;
import org.apache.zeppelin.submarine.commons.SubmarineUI;
import org.apache.zeppelin.submarine.commons.SubmarineUtils;
import org.apache.zeppelin.submarine.job.thread.TensorboardRunThread;
import org.apache.zeppelin.submarine.hadoop.FinalApplicationStatus;
import org.apache.zeppelin.submarine.hadoop.YarnApplicationState;
import org.apache.zeppelin.submarine.hadoop.YarnClient;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.File;
import java.io.IOException;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.atomic.AtomicBoolean;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.JOB_STATUS;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.TENSORBOARD_URL;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.TF_TENSORBOARD_ENABLE;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.YARN_APPLICATION_FINAL_STATUS;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.YARN_APPLICATION_ID;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.YARN_APPLICATION_NAME;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.YARN_APPLICATION_STATUS;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.YARN_APPLICATION_URL;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.YARN_APP_ELAPSED_TIME;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.YARN_APP_FINAL_STATUS_NAME;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.YARN_APP_FINISHED_TIME;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.YARN_APP_LAUNCHTIME_NAME;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.YARN_APP_LAUNCH_TIME;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.YARN_APP_STARTEDTIME_NAME;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.YARN_APP_STARTED_TIME;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.YARN_APP_STATE_NAME;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.YARN_TENSORBOARD_URL;
import static org.apache.zeppelin.submarine.commons.SubmarineConstants.YARN_WEB_HTTP_ADDRESS;
import static org.apache.zeppelin.submarine.job.SubmarineJobStatus.EXECUTE_SUBMARINE;
public class SubmarineJob extends Thread {
private Logger LOGGER = LoggerFactory.getLogger(SubmarineJob.class);
private AtomicBoolean running = new AtomicBoolean(true);
private static final long SYNC_SUBMARINE_RUNTIME_CYCLE = 3000;
private YarnClient yarnClient = null;
private SubmarineUI submarineUI = null;
private Properties properties = null;
private HdfsClient hdfsClient = null;
private File pythonWorkDir = null;
private String noteId = null;
private String noteName = null;
private String userName = null;
private String applicationId = null;
private YarnApplicationState yarnApplicationState = null;
private FinalApplicationStatus finalApplicationStatus = null;
private long startTime = 0;
private long launchTime = 0;
private long finishTime = 0;
private float progress = 0; // [0 ~ 100]
private SubmarineJobStatus currentJobStatus = EXECUTE_SUBMARINE;
private InterpreterContext intpContext = null;
JobRunThread jobRunThread = null;
TensorboardRunThread tensorboardRunThread = null;
public static final String DIRECTORY_USER_HOME = "shell.working.directory.userName.home";
private static final boolean isWindows = System.getProperty("os.name").startsWith("Windows");
public static final String shell = isWindows ? "cmd /c" : "bash -c";
public static final String TIMEOUT_PROPERTY = "submarine.command.timeout.millisecond";
public static final String defaultTimeout = "100000";
public static final String SUBMARINE_JOBRUN_TF_JINJA
= "jinja_templates/submarine-job-run-tf.jinja";
public static final String SUBMARINE_COMMAND_JINJA
= "jinja_templates/submarine-command.jinja";
public static final String SUBMARINE_TENSORBOARD_JINJA
= "jinja_templates/submarine-tensorboard.jinja";
public SubmarineJob(InterpreterContext context, Properties properties) {
this.intpContext = context;
this.properties = properties;
this.noteId = context.getNoteId();
this.noteName = context.getNoteName();
this.userName = context.getAuthenticationInfo().getUser();
this.yarnClient = new YarnClient(properties);
this.hdfsClient = new HdfsClient(properties);
this.submarineUI = new SubmarineUI(intpContext);
this.start();
}
// 1. Synchronize submarine runtime state
@Override
public void run() {
while (running.get()) {
String jobName = SubmarineUtils.getJobName(userName, noteId);
updateJobStateByYarn(jobName);
getTensorboardStatus();
try {
Thread.sleep(SYNC_SUBMARINE_RUNTIME_CYCLE);
} catch (InterruptedException e) {
LOGGER.error(e.getMessage(), e);
}
}
}
@VisibleForTesting
public boolean getRunning() {
return running.get();
}
// Stop SubmarineJob
public void stopRunning() {
running.set(false);
// stop JobRunThread
if (null != jobRunThread && jobRunThread.isAlive()) {
jobRunThread.stopRunning();
}
// stop TensorboardRunThread
if (null != tensorboardRunThread && tensorboardRunThread.isAlive()) {
tensorboardRunThread.stopRunning();
}
}
public String getUserTensorboardPath() {
String tfCheckpointPath = properties.getProperty(SubmarineConstants.TF_CHECKPOINT_PATH, "");
return tfCheckpointPath;
}
public String getJobDefaultCheckpointPath() {
String userTensorboardPath = getUserTensorboardPath();
return userTensorboardPath + "/" + noteId;
}
public void cleanJobDefaultCheckpointPath() {
String jobCheckpointPath = getJobDefaultCheckpointPath();
Path notePath = new Path(jobCheckpointPath);
if (notePath.depth() <= 3) {
submarineUI.outputLog("ERROR", "Checkpoint path depth must be greater than 3");
return;
}
try {
String message = "Clean up the checkpoint directory: " + jobCheckpointPath;
submarineUI.outputLog("", message);
hdfsClient.delete(notePath);
} catch (IOException e) {
LOGGER.error(e.getMessage(), e);
}
}
public Properties getProperties() {
return properties;
}
public HdfsClient getHdfsClient() {
return hdfsClient;
}
public SubmarineUI getSubmarineUI() {
return submarineUI;
}
public void setPythonWorkDir(File pythonWorkDir) {
this.pythonWorkDir = pythonWorkDir;
}
public File getPythonWorkDir() {
return this.pythonWorkDir;
}
public void onDashboard() {
submarineUI.createSubmarineUI(SubmarineCommand.DASHBOARD);
}
public void runJob() {
// Need to display the UI when the page is reloaded, don't create it in the thread
submarineUI.createSubmarineUI(SubmarineCommand.JOB_RUN);
submarineUI.createLogHeadUI();
// Check if job already exists
String jobName = SubmarineUtils.getJobName(userName, noteId);
Map mapAppStatus = getJobStateByYarn(jobName);
if (mapAppStatus.size() == 0) {
if (null == jobRunThread || !jobRunThread.isAlive()) {
jobRunThread = new JobRunThread(this);
jobRunThread.start();
} else {
submarineUI.outputLog("INFO", "JOB " + jobName + " being start up.");
}
} else {
submarineUI.outputLog("INFO", "JOB " + jobName + " already running.");
}
}
public void deleteJob(String serviceName) {
submarineUI.createSubmarineUI(SubmarineCommand.JOB_STOP);
yarnClient.deleteService(serviceName);
}
public void runTensorBoard() {
submarineUI.createSubmarineUI(SubmarineCommand.TENSORBOARD_RUN);
submarineUI.createLogHeadUI();
String tensorboardName = SubmarineUtils.getTensorboardName(userName);
Map mapAppStatus = getJobStateByYarn(tensorboardName);
if (mapAppStatus.size() == 0) {
if (null == tensorboardRunThread || !tensorboardRunThread.isAlive()) {
tensorboardRunThread = new TensorboardRunThread(this);
tensorboardRunThread.start();
} else {
submarineUI.outputLog("INFO", "Tensorboard being start up.");
}
} else {
submarineUI.outputLog("INFO", "Tensorboard already running.");
}
}
// Check if tensorboard already exists
public boolean getTensorboardStatus() {
String enableTensorboard = properties.getProperty(TF_TENSORBOARD_ENABLE, "false");
boolean tensorboardExist = false;
if (StringUtils.equals(enableTensorboard, "true")) {
String tensorboardName = SubmarineUtils.getTensorboardName(userName);
// create tensorboard link of YARN
Map mapAppStatus = getJobStateByYarn(tensorboardName);
String appId = "";
if (mapAppStatus.containsKey(YARN_APPLICATION_ID)) {
appId = mapAppStatus.get(YARN_APPLICATION_ID).toString();
StringBuffer sbUrl = new StringBuffer();
String yarnBaseUrl = properties.getProperty(YARN_WEB_HTTP_ADDRESS, "");
sbUrl.append(yarnBaseUrl).append("/ui2/#/yarn-app/").append(appId);
sbUrl.append("/components?service=").append(tensorboardName);
SubmarineUtils.setAgulObjValue(intpContext, YARN_TENSORBOARD_URL, sbUrl.toString());
// Detection tensorboard Container export port
List> listExportPorts = yarnClient.getAppExportPorts(tensorboardName);
for (Map exportPorts : listExportPorts) {
if (exportPorts.containsKey(YarnClient.HOST_IP)
&& exportPorts.containsKey(YarnClient.HOST_PORT)
&& exportPorts.containsKey(YarnClient.CONTAINER_PORT)) {
String intpAppHostIp = (String) exportPorts.get(YarnClient.HOST_IP);
String intpAppHostPort = (String) exportPorts.get(YarnClient.HOST_PORT);
String intpAppContainerPort = (String) exportPorts.get(YarnClient.CONTAINER_PORT);
if (StringUtils.equals("6006", intpAppContainerPort)) {
tensorboardExist = true;
if (LOGGER.isDebugEnabled()) {
LOGGER.debug("Detection tensorboard Container hostIp:{}, hostPort:{}, " +
"containerPort:{}.", intpAppHostIp, intpAppHostPort, intpAppContainerPort);
}
// show tensorboard link button
String tensorboardUrl = "http://" + intpAppHostIp + ":" + intpAppHostPort;
SubmarineUtils.setAgulObjValue(intpContext, TENSORBOARD_URL, tensorboardUrl);
break;
}
}
}
} else {
SubmarineUtils.removeAgulObjValue(intpContext, YARN_TENSORBOARD_URL);
}
if (false == tensorboardExist) {
SubmarineUtils.removeAgulObjValue(intpContext, TENSORBOARD_URL);
}
}
return tensorboardExist;
}
public void showUsage() {
submarineUI.createSubmarineUI(SubmarineCommand.USAGE);
}
public void cleanRuntimeCache() {
intpContext.getAngularObjectRegistry().removeAll(noteId, intpContext.getParagraphId());
submarineUI.createSubmarineUI(SubmarineCommand.DASHBOARD);
}
public String getNoteId() {
return noteId;
}
public String getUserName() {
return this.userName;
}
// from state to state
public void setCurrentJobState(SubmarineJobStatus toStatus) {
SubmarineUtils.setAgulObjValue(intpContext, JOB_STATUS,
toStatus.getStatus());
currentJobStatus = toStatus;
}
public Map getJobStateByYarn(String jobName) {
Map mapAppStatus = new HashMap<>();
Map mapStatus = yarnClient.getAppServices(jobName);
if (mapStatus.containsKey(YARN_APPLICATION_ID)
&& mapStatus.containsKey(YARN_APPLICATION_NAME)) {
String appId = mapStatus.get(YARN_APPLICATION_ID).toString();
mapAppStatus = yarnClient.getClusterApps(appId);
mapAppStatus.putAll(mapStatus);
}
return mapAppStatus;
}
public void updateJobStateByYarn(String appName) {
Map mapAppStatus = getJobStateByYarn(appName);
if (mapAppStatus.size() == 0) {
SubmarineUtils.removeAgulObjValue(intpContext, YARN_APPLICATION_ID);
SubmarineUtils.removeAgulObjValue(intpContext, YARN_APPLICATION_STATUS);
SubmarineUtils.removeAgulObjValue(intpContext, YARN_APPLICATION_URL);
SubmarineUtils.removeAgulObjValue(intpContext, YARN_APP_STARTED_TIME);
SubmarineUtils.removeAgulObjValue(intpContext, YARN_APP_LAUNCH_TIME);
SubmarineUtils.removeAgulObjValue(intpContext, YARN_APP_FINISHED_TIME);
SubmarineUtils.removeAgulObjValue(intpContext, YARN_APP_ELAPSED_TIME);
// TODO(Xun Liu) Not wait job run ???
SubmarineUtils.removeAgulObjValue(intpContext, JOB_STATUS);
} else {
String state = "", finalStatus = "", appId = "";
if (mapAppStatus.containsKey(YARN_APPLICATION_ID)) {
appId = mapAppStatus.get(YARN_APPLICATION_ID).toString();
}
if (mapAppStatus.containsKey(YARN_APP_STATE_NAME)) {
state = mapAppStatus.get(YARN_APP_STATE_NAME).toString();
SubmarineUtils.setAgulObjValue(intpContext, YARN_APPLICATION_STATUS, state);
}
if (mapAppStatus.containsKey(YARN_APP_FINAL_STATUS_NAME)) {
finalStatus = mapAppStatus.get(YARN_APP_FINAL_STATUS_NAME).toString();
SubmarineUtils.setAgulObjValue(intpContext,
YARN_APPLICATION_FINAL_STATUS, finalStatus);
}
SubmarineJobStatus jobStatus = convertYarnState(state, finalStatus);
setCurrentJobState(jobStatus);
try {
if (mapAppStatus.containsKey(YARN_APP_STARTEDTIME_NAME)) {
String startedTime = mapAppStatus.get(YARN_APP_STARTEDTIME_NAME).toString();
long lStartedTime = Long.parseLong(startedTime);
if (lStartedTime > 0) {
Date startedDate = new Date(lStartedTime);
SubmarineUtils.setAgulObjValue(intpContext, YARN_APP_STARTED_TIME,
startedDate.toString());
}
}
if (mapAppStatus.containsKey(YARN_APP_LAUNCHTIME_NAME)) {
String launchTime = mapAppStatus.get(YARN_APP_LAUNCHTIME_NAME).toString();
long lLaunchTime = Long.parseLong(launchTime);
if (lLaunchTime > 0) {
Date launchDate = new Date(lLaunchTime);
SubmarineUtils.setAgulObjValue(intpContext, YARN_APP_LAUNCH_TIME,
launchDate.toString());
}
}
if (mapAppStatus.containsKey("finishedTime")) {
String finishedTime = mapAppStatus.get("finishedTime").toString();
long lFinishedTime = Long.parseLong(finishedTime);
if (lFinishedTime > 0) {
Date finishedDate = new Date(lFinishedTime);
SubmarineUtils.setAgulObjValue(intpContext, YARN_APP_FINISHED_TIME,
finishedDate.toString());
}
}
if (mapAppStatus.containsKey("elapsedTime")) {
String elapsedTime = mapAppStatus.get("elapsedTime").toString();
long lElapsedTime = Long.parseLong(elapsedTime);
if (lElapsedTime > 0) {
String finishedDate = org.apache.hadoop.util.StringUtils.formatTime(lElapsedTime);
SubmarineUtils.setAgulObjValue(intpContext, YARN_APP_ELAPSED_TIME, finishedDate);
}
}
} catch (NumberFormatException e) {
LOGGER.error(e.getMessage());
}
// create YARN UI link
StringBuffer sbUrl = new StringBuffer();
String yarnBaseUrl = properties.getProperty(YARN_WEB_HTTP_ADDRESS, "");
sbUrl.append(yarnBaseUrl).append("/ui2/#/yarn-app/").append(appId);
sbUrl.append("/components?service=").append(appName);
SubmarineUtils.setAgulObjValue(intpContext, YARN_APPLICATION_ID, appId);
SubmarineUtils.setAgulObjValue(intpContext, YARN_APPLICATION_URL, sbUrl.toString());
}
}
private SubmarineJobStatus convertYarnState(String status, String finalStatus) {
SubmarineJobStatus submarineJobStatus = SubmarineJobStatus.UNKNOWN;
switch (status) {
case "NEW":
submarineJobStatus = SubmarineJobStatus.YARN_NEW;
break;
case "NEW_SAVING":
submarineJobStatus = SubmarineJobStatus.YARN_NEW_SAVING;
break;
case "SUBMITTED":
submarineJobStatus = SubmarineJobStatus.YARN_SUBMITTED;
break;
case "ACCEPTED":
submarineJobStatus = SubmarineJobStatus.YARN_ACCEPTED;
break;
case "RUNNING":
submarineJobStatus = SubmarineJobStatus.YARN_RUNNING;
break;
case "FINISHED":
submarineJobStatus = SubmarineJobStatus.YARN_FINISHED;
break;
case "FAILED":
submarineJobStatus = SubmarineJobStatus.YARN_FAILED;
break;
case "KILLED":
submarineJobStatus = SubmarineJobStatus.YARN_KILLED;
break;
case "STOPPED":
submarineJobStatus = SubmarineJobStatus.YARN_STOPPED;
}
switch (finalStatus) {
case "NEW":
submarineJobStatus = SubmarineJobStatus.YARN_NEW;
break;
case "NEW_SAVING":
submarineJobStatus = SubmarineJobStatus.YARN_NEW_SAVING;
break;
case "SUBMITTED":
submarineJobStatus = SubmarineJobStatus.YARN_SUBMITTED;
break;
case "ACCEPTED":
submarineJobStatus = SubmarineJobStatus.YARN_ACCEPTED;
break;
case "RUNNING":
submarineJobStatus = SubmarineJobStatus.YARN_RUNNING;
break;
case "FINISHED":
submarineJobStatus = SubmarineJobStatus.YARN_FINISHED;
break;
case "FAILED":
submarineJobStatus = SubmarineJobStatus.YARN_FAILED;
break;
case "KILLED":
submarineJobStatus = SubmarineJobStatus.YARN_KILLED;
break;
case "STOPPED":
submarineJobStatus = SubmarineJobStatus.YARN_STOPPED;
break;
default: // UNDEFINED
break;
}
return submarineJobStatus;
}
public InterpreterContext getIntpContext() {
return intpContext;
}
public void setIntpContext(InterpreterContext intpContext) {
this.intpContext = intpContext;
this.submarineUI = new SubmarineUI(intpContext);
}
}