All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.kylin.tool.MrJobInfoExtractor Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kylin.tool;
import java.io.File;
import java.io.IOException;
import java.nio.charset.Charset;
import java.util.Map;
import java.util.Set;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.OptionBuilder;
import org.apache.commons.io.FileUtils;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.http.HttpResponse;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.apache.http.util.EntityUtils;
import org.apache.kylin.common.util.HadoopUtil;
import org.apache.kylin.common.util.OptionsHelper;
import org.apache.kylin.tool.common.HadoopConfExtractor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.google.common.base.Preconditions;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
public class MrJobInfoExtractor extends AbstractInfoExtractor {
private static final Logger logger = LoggerFactory.getLogger(MrJobInfoExtractor.class);
@SuppressWarnings("static-access")
private static final Option OPTION_INCLUDE_DETAILS = OptionBuilder.withArgName("includeTasks").hasArg().isRequired(false).withDescription("Specify whether to include mr task details to extract. Default true.").create("includeTasks");
@SuppressWarnings("static-access")
private static final Option OPTION_MR_JOB_ID = OptionBuilder.withArgName("mrJobId").hasArg().isRequired(false).withDescription("Specify MR Job Id").create("mrJobId");
private static final int HTTP_RETRY = 3;
private Map nodeInfoMap = Maps.newHashMap();
private String jobHistoryUrlBase;
private String yarnMasterUrlBase;
public MrJobInfoExtractor() {
packageType = "MR";
options.addOption(OPTION_INCLUDE_DETAILS);
options.addOption(OPTION_MR_JOB_ID);
}
public static void main(String[] args) {
MrJobInfoExtractor extractor = new MrJobInfoExtractor();
extractor.execute(args);
}
private void extractRestCheckUrl() {
Configuration conf = HadoopUtil.getCurrentConfiguration();
yarnMasterUrlBase = HadoopConfExtractor.extractYarnMasterUrl(conf);
jobHistoryUrlBase = HadoopConfExtractor.extractJobHistoryUrl(yarnMasterUrlBase, conf);
logger.info("job history url base: " + jobHistoryUrlBase);
}
private String getHttpResponse(String url) {
DefaultHttpClient client = new DefaultHttpClient();
String msg = null;
int retry_times = 0;
while (msg == null && retry_times < HTTP_RETRY) {
retry_times++;
HttpGet request = new HttpGet(url);
try {
request.addHeader("accept", "application/json");
HttpResponse response = client.execute(request);
msg = EntityUtils.toString(response.getEntity());
} catch (Exception e) {
logger.warn("Failed to fetch http response. Retry={}", retry_times, e);
} finally {
request.releaseConnection();
}
}
return msg;
}
private void extractTaskDetail(String taskId, String user, File exportDir, String taskUrl, String urlBase) throws IOException {
try {
if (StringUtils.isEmpty(taskId)) {
return;
}
String taskUrlBase = taskUrl + taskId;
File destDir = new File(exportDir, taskId);
// get task basic info
String taskInfo = saveHttpResponseQuietly(new File(destDir, "task.json"), taskUrlBase);
JsonNode taskAttempt = new ObjectMapper().readTree(taskInfo).path("task").path("successfulAttempt");
String succAttemptId = taskAttempt.textValue();
String attemptInfo = saveHttpResponseQuietly(new File(destDir, "task_attempts.json"), taskUrlBase + "/attempts/" + succAttemptId);
JsonNode attemptAttempt = new ObjectMapper().readTree(attemptInfo).path("taskAttempt");
String containerId = attemptAttempt.get("assignedContainerId").textValue();
String nodeId = nodeInfoMap.get(attemptAttempt.get("nodeHttpAddress").textValue());
// save task counters
saveHttpResponseQuietly(new File(destDir, "task_counters.json"), taskUrlBase + "/counters");
// save task logs
String logUrl = urlBase + "/jobhistory/logs/" + nodeId + "/" + containerId + "/" + succAttemptId + "/" + user + "/syslog/?start=0";
logger.debug("Fetch task log from url: " + logUrl);
saveHttpResponseQuietly(new File(destDir, "task_log.txt"), logUrl);
} catch (Exception e) {
logger.warn("Failed to get task counters rest response" + e);
}
}
private String saveHttpResponseQuietly(File dest, String url) {
String response = null;
try {
response = getHttpResponse(url);
FileUtils.forceMkdir(dest.getParentFile());
FileUtils.writeStringToFile(dest, response, Charset.defaultCharset());
return response;
} catch (Exception e) {
logger.warn("Failed to get http response from {}.", url, e);
}
return response;
}
@Override
protected void executeExtract(OptionsHelper optionsHelper, File exportDir) throws Exception {
try {
boolean includeTaskDetails = optionsHelper.hasOption(OPTION_INCLUDE_DETAILS) ? Boolean.valueOf(optionsHelper.getOptionValue(OPTION_INCLUDE_DETAILS)) : true;
String mrJobId = optionsHelper.getOptionValue(OPTION_MR_JOB_ID);
extractRestCheckUrl();
Preconditions.checkNotNull(jobHistoryUrlBase);
Preconditions.checkNotNull(yarnMasterUrlBase);
String jobUrlPrefix = jobHistoryUrlBase + "/ws/v1/history/mapreduce/jobs/" + mrJobId;
// cache node info
String nodeUrl = yarnMasterUrlBase + "/ws/v1/cluster/nodes";
String nodeResponse = getHttpResponse(nodeUrl);
JsonNode nodes = new ObjectMapper().readTree(nodeResponse).path("nodes").path("node");
for (JsonNode node : nodes) {
nodeInfoMap.put(node.path("nodeHTTPAddress").textValue(), node.path("id").textValue());
}
// save mr job stats
String jobResponse = saveHttpResponseQuietly(new File(exportDir, "job.json"), jobUrlPrefix);
String user = new ObjectMapper().readTree(jobResponse).path("job").path("user").textValue();
// save mr job conf
saveHttpResponseQuietly(new File(exportDir, "job_conf.json"), jobUrlPrefix + "/conf");
// save mr job counters
saveHttpResponseQuietly(new File(exportDir, "job_counters.json"), jobUrlPrefix + "/counters");
// save task details
if (includeTaskDetails) {
extractTaskDetails(exportDir, jobUrlPrefix, jobHistoryUrlBase, user);
}
} catch (Exception e) {
logger.warn("Failed to get mr tasks rest response.", e);
}
}
private void extractTaskDetails(File exportDir, String jobUrlPrefix, String jobUrlBase, String user) {
try {
String tasksUrl = jobUrlPrefix + "/tasks/";
String tasksResponse = saveHttpResponseQuietly(new File(exportDir, "job_tasks.json"), tasksUrl);
JsonNode tasks = new ObjectMapper().readTree(tasksResponse).path("tasks").path("task");
// find the first start map and reduce
String firstStartMapId = null;
String firstStartReduceId = null;
long firstStartMapTime = Long.MAX_VALUE;
long firstStartReduceTime = Long.MAX_VALUE;
// find the first end map and reduce
String firstEndMapId = null;
String firstEndReduceId = null;
long firstEndMapTime = Long.MAX_VALUE;
long firstEndReduceTime = Long.MAX_VALUE;
// find the last start map and reduce
String lastStartMapId = null;
String lastStartReduceId = null;
long lastStartMapTime = 0L;
long lastStartReduceTime = 0L;
// find the last end map and reduce
String lastEndMapId = null;
String lastEndReduceId = null;
long lastEndMapTime = 0L;
long lastEndReduceTime = 0L;
// find the max map and reduce duation
String maxReduceId = null;
String maxMapId = null;
long maxMapElapsedTime = 0L;
long maxReduceElapsedTime = 0L;
// find the min map and reduce duration
String minReduceId = null;
String minMapId = null;
long minMapElapsedTime = Long.MAX_VALUE;
long minReduceElapsedTime = Long.MAX_VALUE;
Set selectedTaskIds = Sets.newHashSet();
for (JsonNode node : tasks) {
if (node.get("type").textValue().equals("MAP")) {
if (node.get("elapsedTime").longValue() >= maxMapElapsedTime) {
maxMapElapsedTime = node.get("elapsedTime").longValue();
maxMapId = node.get("id").textValue();
}
if (node.get("elapsedTime").longValue() <= minMapElapsedTime) {
minMapElapsedTime = node.get("elapsedTime").longValue();
minMapId = node.get("id").textValue();
}
if (node.get("startTime").longValue() <= firstStartMapTime) {
firstStartMapTime = node.get("startTime").longValue();
firstStartMapId = node.get("id").textValue();
}
if (node.get("startTime").longValue() >= lastStartMapTime) {
lastStartMapTime = node.get("startTime").longValue();
lastStartMapId = node.get("id").textValue();
}
if (node.get("finishTime").longValue() <= firstEndMapTime) {
firstEndMapTime = node.get("finishTime").longValue();
firstEndMapId = node.get("id").textValue();
}
if (node.get("finishTime").longValue() >= lastEndMapTime) {
lastEndMapTime = node.get("finishTime").longValue();
lastEndMapId = node.get("id").textValue();
}
}
if (node.get("type").textValue().equals("REDUCE")) {
if (node.get("elapsedTime").longValue() >= maxReduceElapsedTime) {
maxReduceElapsedTime = node.get("elapsedTime").longValue();
maxReduceId = node.get("id").textValue();
}
if (node.get("elapsedTime").longValue() <= minReduceElapsedTime) {
minReduceElapsedTime = node.get("elapsedTime").longValue();
minReduceId = node.get("id").textValue();
}
if (node.get("startTime").longValue() <= firstStartReduceTime) {
firstStartReduceTime = node.get("startTime").longValue();
firstStartReduceId = node.get("id").textValue();
}
if (node.get("startTime").longValue() >= lastStartReduceTime) {
lastStartReduceTime = node.get("startTime").longValue();
lastStartReduceId = node.get("id").textValue();
}
if (node.get("finishTime").longValue() <= firstEndReduceTime) {
firstEndReduceTime = node.get("finishTime").longValue();
firstEndReduceId = node.get("id").textValue();
}
if (node.get("finishTime").longValue() >= lastEndReduceTime) {
lastEndReduceTime = node.get("finishTime").longValue();
lastEndReduceId = node.get("id").textValue();
}
}
}
selectedTaskIds.add(maxMapId);
selectedTaskIds.add(maxReduceId);
selectedTaskIds.add(minMapId);
selectedTaskIds.add(minReduceId);
selectedTaskIds.add(firstStartMapId);
selectedTaskIds.add(firstStartReduceId);
selectedTaskIds.add(lastStartMapId);
selectedTaskIds.add(lastStartReduceId);
selectedTaskIds.add(firstEndMapId);
selectedTaskIds.add(firstEndReduceId);
selectedTaskIds.add(lastEndMapId);
selectedTaskIds.add(lastEndReduceId);
File tasksDir = new File(exportDir, "tasks");
FileUtils.forceMkdir(tasksDir);
for (String taskId : selectedTaskIds) {
extractTaskDetail(taskId, user, tasksDir, tasksUrl, jobUrlBase);
}
} catch (Exception e) {
logger.warn("Failed to get mr tasks rest response.", e);
}
}
}