org.elasticsearch.hadoop.mr.HadoopCfgUtils Maven / Gradle / Ivy
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.hadoop.mr;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.hadoop.mapred.TaskID;
import org.elasticsearch.hadoop.util.StringUtils;
import org.elasticsearch.hadoop.util.unit.TimeValue;
/**
* Class which handles the various Hadoop properties, aware of the changes between YARN (Hadoop 2) and 1.
*/
public abstract class HadoopCfgUtils {
public static boolean isLocal(Configuration cfg) {
return "local".equals(cfg.get("mapreduce.framework.name")) || "local".equals(getJobTracker(cfg));
}
public static String getFileSystem(Configuration cfg) {
return get(cfg, "fs.defaultFS", "fs.default.name");
}
public static void setFileSystem(Configuration cfg, String value) {
set(cfg, value, "fs.defaultFS", "fs.default.name");
}
public static String getJobTracker(Configuration cfg) {
return get(cfg, "mapreduce.jobtracker.address", "mapred.job.tracker");
}
public static void setJobTracker(Configuration cfg, String value) {
set(cfg, value, "mapreduce.jobtracker.address", "mapred.job.tracker");
}
public static String getFileOutputFormatDir(Configuration cfg) {
return get(cfg, "mapreduce.output.fileoutputformat.outputdir", "mapred.output.dir");
}
public static void setFileOutputFormatDir(Configuration cfg, String value) {
set(cfg, value, "mapreduce.output.fileoutputformat.outputdir", "mapred.output.dir");
}
public static String getOutputCommitterClass(Configuration cfg) {
return get(cfg, "mapred.output.committer.class", null);
}
public static void setOutputCommitterClass(Configuration cfg, String value) {
set(cfg, value, "mapred.output.committer.class", null);
}
public static String getTaskAttemptId(Configuration cfg) {
return get(cfg, "mapreduce.task.attempt.id", "mapred.task.id");
}
public static String getTaskId(Configuration cfg) {
return get(cfg, "mapreduce.task.id", "mapred.tip.id");
}
public static String getReduceTasks(Configuration cfg) {
return get(cfg, "mapreduce.job.reduces", "mapred.reduce.tasks", "1");
}
public static boolean getSpeculativeReduce(Configuration cfg) {
return get(cfg, "mapreduce.reduce.speculative", "mapred.reduce.tasks.speculative.execution", true);
}
public static boolean getSpeculativeMap(Configuration cfg) {
return get(cfg, "mapreduce.map.speculative", "mapred.map.tasks.speculative.execution", true);
}
public static void setGenericOptions(Configuration cfg) {
set(cfg, "true", "mapreduce.client.genericoptionsparser.used", "mapred.used.genericoptionsparser");
}
public static TimeValue getTaskTimeout(Configuration cfg) {
return TimeValue.parseTimeValue(get(cfg, "mapreduce.task.timeout", "mapred.task.timeout", "600s"));
}
public static Properties asProperties(Configuration cfg) {
Properties props = new Properties();
if (cfg != null) {
for (Map.Entry entry : cfg) {
props.setProperty(entry.getKey(), entry.getValue());
}
}
return props;
}
private static String get(Configuration cfg, String hadoop2, String hadoop1) {
return get(cfg, hadoop2, hadoop1, null);
}
private static String get(Configuration cfg, String hadoop2, String hadoop1, String defaultValue) {
String prop = cfg.get(hadoop2);
return (prop != null ? prop : (hadoop1 != null ? cfg.get(hadoop1) : defaultValue));
}
private static boolean get(Configuration cfg, String hadoop2, String hadoop1, boolean defaultValue) {
String result = get(cfg, hadoop2, hadoop1);
if ("true".equals(result))
return true;
else if ("false".equals(result))
return false;
else
return defaultValue;
}
private static void set(Configuration cfg, String value, String hadoop2, String hadoop1) {
cfg.set(hadoop2, value);
if (hadoop1 != null) {
cfg.set(hadoop1, value);
}
}
public static JobConf asJobConf(Configuration cfg) {
return (cfg instanceof JobConf ? (JobConf) cfg : new JobConf(cfg));
}
public static String getMapValueClass(Configuration cfg) {
return get(cfg, "mapred.mapoutput.value.class", "mapreduce.map.output.value.class");
}
public static TaskID getTaskID(Configuration cfg) {
// first try with the attempt since some Hadoop versions mix the two
String taskAttemptId = HadoopCfgUtils.getTaskAttemptId(cfg);
if (StringUtils.hasText(taskAttemptId)) {
try {
return TaskAttemptID.forName(taskAttemptId).getTaskID();
} catch (IllegalArgumentException ex) {
// the task attempt is invalid (Tez in particular uses the wrong string - see #346)
// try to fallback to task id
return parseTaskIdFromTaskAttemptId(taskAttemptId);
}
}
String taskIdProp = HadoopCfgUtils.getTaskId(cfg);
// double-check task id bug in Hadoop 2.5.x
if (StringUtils.hasText(taskIdProp) && !taskIdProp.contains("attempt")) {
return TaskID.forName(taskIdProp);
}
return null;
}
private static TaskID parseTaskIdFromTaskAttemptId(String taskAttemptId) {
// Tez in particular uses an incorrect String task1244XXX instead of task_1244 which makes the parsing fail
// this method try to cope with such issues and look at the numbers if possible
if (taskAttemptId.startsWith("task")) {
taskAttemptId = taskAttemptId.substring(4);
}
if (taskAttemptId.startsWith("_")) {
taskAttemptId = taskAttemptId.substring(1);
}
List tokenize = StringUtils.tokenize(taskAttemptId, "_");
// need at least 4 entries from 123123123123_0001_r_0000_4
if (tokenize.size() < 4) {
LogFactory.getLog(HadoopCfgUtils.class).warn("Cannot parse task attempt (too little arguments) " + taskAttemptId);
return null;
}
// we parse straight away - in case of an exception we can catch the new format
try {
return new TaskID(tokenize.get(0), Integer.parseInt(tokenize.get(1)), tokenize.get(2).startsWith("m"), Integer.parseInt(tokenize.get(3)));
} catch (Exception ex) {
LogFactory.getLog(HadoopCfgUtils.class).warn("Cannot parse task attempt " + taskAttemptId);
return null;
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy