All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.zeppelin.jdbc.hive.HiveUtils Maven / Gradle / Ivy

There is a newer version: 0.11.2
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more contributor license
 * agreements. See the NOTICE file distributed with this work for additional information regarding
 * copyright ownership. The ASF licenses this file to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance with the License. You may obtain a
 * copy of the License at
 * 

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software distributed under the License * is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express * or implied. See the License for the specific language governing permissions and limitations under * the License. */ package org.apache.zeppelin.jdbc.hive; import org.apache.commons.dbcp2.DelegatingStatement; import org.apache.commons.lang3.StringUtils; import org.apache.hive.common.util.HiveVersionInfo; import org.apache.hive.jdbc.HiveStatement; import org.apache.zeppelin.interpreter.InterpreterContext; import org.apache.zeppelin.jdbc.JDBCInterpreter; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import java.sql.Statement; import java.util.HashMap; import java.util.List; import java.util.Map; import java.util.Optional; import java.util.regex.Matcher; import java.util.regex.Pattern; /** * This class include hive specific stuff. * e.g. Display hive job execution info. * */ public class HiveUtils { private static final Logger LOGGER = LoggerFactory.getLogger(HiveUtils.class); private static final int DEFAULT_QUERY_PROGRESS_INTERVAL = 1000; private static final Pattern JOBURL_PATTERN = Pattern.compile(".*Tracking URL = (\\S*).*", Pattern.DOTALL); private static final Pattern APPID_PATTERN = Pattern.compile(".*with App id (\\S*)\\).*", Pattern.DOTALL); /** * Display hive job execution info, and progress info for hive >= 2.3 * * @param stmt * @param context * @param displayLog */ public static void startHiveMonitorThread(Statement stmt, InterpreterContext context, boolean displayLog, JDBCInterpreter jdbcInterpreter) { HiveStatement hiveStmt = (HiveStatement) ((DelegatingStatement) ((DelegatingStatement) stmt).getDelegate()).getDelegate(); String hiveVersion = HiveVersionInfo.getVersion(); ProgressBar progressBarTemp = null; if (isProgressBarSupported(hiveVersion)) { LOGGER.debug("ProgressBar is supported for hive version: {}", hiveVersion); progressBarTemp = new ProgressBar(); } else { LOGGER.debug("ProgressBar is not supported for hive version: {}", hiveVersion); } // need to use final variable progressBar in thread, so need progressBarTemp here. final ProgressBar progressBar = progressBarTemp; final long queryInterval = Long.parseLong( jdbcInterpreter.getProperty("zeppelin.jdbc.hive.monitor.query_interval", DEFAULT_QUERY_PROGRESS_INTERVAL + "")); Thread thread = new Thread(() -> { String jobUrlTemplate = jdbcInterpreter.getProperty("zeppelin.jdbc.hive.jobUrl.template"); boolean jobUrlExtracted = false; try { while (hiveStmt.hasMoreLogs() && !hiveStmt.isClosed() && !Thread.interrupted()) { Thread.sleep(queryInterval); List logs = hiveStmt.getQueryLog(); String logsOutput = StringUtils.join(logs, System.lineSeparator()); LOGGER.debug("Hive job output: {}", logsOutput); boolean displayLogProperty = context.getBooleanLocalProperty("displayLog", displayLog); if (!StringUtils.isBlank(logsOutput) && displayLogProperty) { context.out.write(logsOutput + "\n"); context.out.flush(); } if (!StringUtils.isBlank(logsOutput) && progressBar != null && displayLogProperty) { progressBar.operationLogShowedToUser(); } if (!jobUrlExtracted) { jobUrlExtracted = extractJobURL(logsOutput, jobUrlTemplate, context); } } } catch (InterruptedException e) { LOGGER.warn("Hive monitor thread is interrupted", e); Thread.currentThread().interrupt(); } catch (Exception e) { LOGGER.warn("Fail to monitor hive statement", e); } LOGGER.info("HiveMonitor-Thread is finished"); }); thread.setName("HiveMonitor-Thread"); thread.setDaemon(true); thread.start(); LOGGER.info("Start HiveMonitor-Thread for sql: {}", hiveStmt); if (progressBar != null) { // old: hiveStmt.setInPlaceUpdateStream(progressBar.getInPlaceUpdateStream(context.out)); // Move codes into ProgressBar to delay NoClassDefFoundError of InPlaceUpdateStream // until ProgressBar instanced. // When hive < 2.3, ProgressBar will not be instanced, so it works well. progressBar.setInPlaceUpdateStream(hiveStmt, context.out); } } /** * Extract hive job url in the following order: * 1. Extract job url from hive logs when hive use mr engine * 2. Extract job url from hive logs when hive use tez engine * 3. Extract job url via yarn tags when the above 2 methods doesn't work * @param logsOutput * @param jobUrlTemplate * @param context * @return */ private static boolean extractJobURL(String logsOutput, String jobUrlTemplate, InterpreterContext context) { String jobUrl = null; Optional mrJobURLOption = extractMRJobURL(logsOutput); Optional tezAppIdOption = extractTezAppId(logsOutput); if (mrJobURLOption.isPresent()) { jobUrl = mrJobURLOption.get(); LOGGER.info("Extract MR jobUrl: {} from logs", mrJobURLOption.get()); if (StringUtils.isNotBlank(jobUrlTemplate)) { Optional yarnAppId = extractYarnAppId(jobUrl); if (yarnAppId.isPresent()) { LOGGER.info("Extract yarn app id: {} from MR jobUrl", yarnAppId); jobUrl = jobUrlTemplate.replace("{{applicationId}}", yarnAppId.get()); } else { LOGGER.warn("Unable to extract yarn App Id from jobURL: {}", jobUrl); } } } else if (tezAppIdOption.isPresent()) { String yarnAppId = tezAppIdOption.get(); LOGGER.info("Extract Tez job yarn appId: {} from logs", yarnAppId); if (StringUtils.isNotBlank(jobUrlTemplate)) { jobUrl = jobUrlTemplate.replace("{{applicationId}}", yarnAppId); } else { LOGGER.warn("Unable to set JobUrl because zeppelin.jdbc.hive.jobUrl.template is not set"); jobUrl = yarnAppId; } } else { if (isHadoopJarAvailable()) { String yarnAppId = YarnUtil.getYarnAppIdByTag(context.getParagraphId()); if (StringUtils.isNotBlank(yarnAppId)) { LOGGER.info("Extract yarn appId: {} by tag", yarnAppId); if (StringUtils.isNotBlank(jobUrlTemplate)) { jobUrl = jobUrlTemplate.replace("{{applicationId}}", yarnAppId); } else { jobUrl = yarnAppId; } } } else { LOGGER.warn("Hadoop jar is not available, unable to use tags to fetch yarn app url"); } } if (jobUrl != null) { LOGGER.info("Detected hive jobUrl: {}", jobUrl); Map infos = new HashMap<>(); infos.put("jobUrl", jobUrl); infos.put("label", "HIVE JOB"); infos.put("tooltip", "View in YARN WEB UI"); infos.put("noteId", context.getNoteId()); infos.put("paraId", context.getParagraphId()); context.getIntpEventClient().onParaInfosReceived(infos); return true; } else { return false; } } private static boolean isHadoopJarAvailable() { try { Class.forName("org.apache.hadoop.conf.Configuration"); return true; } catch (ClassNotFoundException e) { return false; } } // Hive progress bar is supported from hive 2.3 (HIVE-16045) private static boolean isProgressBarSupported(String hiveVersion) { String[] tokens = hiveVersion.split("\\."); int majorVersion = Integer.parseInt(tokens[0]); int minorVersion = Integer.parseInt(tokens[1]); return majorVersion > 2 || ((majorVersion == 2) && minorVersion >= 3); } // extract hive job url from logs, it only works for MR engine. static Optional extractMRJobURL(String log) { Matcher matcher = JOBURL_PATTERN.matcher(log); if (matcher.matches()) { String jobURL = matcher.group(1); return Optional.of(jobURL); } return Optional.empty(); } // extract yarn appId from logs, it only works for Tez engine. static Optional extractTezAppId(String log) { Matcher matcher = APPID_PATTERN.matcher(log); if (matcher.matches()) { String appId = matcher.group(1); return Optional.of(appId); } return Optional.empty(); } // extract yarn appId from jobURL static Optional extractYarnAppId(String jobURL) { int pos = jobURL.indexOf("application_"); if (pos != -1) { return Optional.of(jobURL.substring(pos)); } return Optional.empty(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy