org.elasticsearch.hadoop.mr.HadoopCfgUtils Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch-hadoop-cascading Show documentation
Elasticsearch Hadoop Cascading
The newest version!
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.hadoop.mr;

import java.util.List;
import java.util.Map;
import java.util.Properties;

import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.TaskAttemptID;
import org.apache.hadoop.mapred.TaskID;
import org.elasticsearch.hadoop.util.StringUtils;
import org.elasticsearch.hadoop.util.unit.TimeValue;

/**
 * Class which handles the various Hadoop properties, aware of the changes between YARN (Hadoop 2) and 1.
 */
public abstract class HadoopCfgUtils {


    public static boolean isLocal(Configuration cfg) {
        return "local".equals(cfg.get("mapreduce.framework.name")) || "local".equals(getJobTracker(cfg));
    }

    public static String getFileSystem(Configuration cfg) {
        return get(cfg, "fs.defaultFS", "fs.default.name");
    }

    public static void setFileSystem(Configuration cfg, String value) {
        set(cfg, value, "fs.defaultFS", "fs.default.name");
    }

    public static String getJobTracker(Configuration cfg) {
        return get(cfg, "mapreduce.jobtracker.address", "mapred.job.tracker");
    }

    public static void setJobTracker(Configuration cfg, String value) {
        set(cfg, value, "mapreduce.jobtracker.address", "mapred.job.tracker");
    }

    public static String getFileOutputFormatDir(Configuration cfg) {
        return get(cfg, "mapreduce.output.fileoutputformat.outputdir", "mapred.output.dir");
    }

    public static void setFileOutputFormatDir(Configuration cfg, String value) {
        set(cfg, value, "mapreduce.output.fileoutputformat.outputdir", "mapred.output.dir");
    }

    public static String getOutputCommitterClass(Configuration cfg) {
        return get(cfg, "mapred.output.committer.class", null);
    }

    public static void setOutputCommitterClass(Configuration cfg, String value) {
        set(cfg, value, "mapred.output.committer.class", null);
    }

    public static String getTaskAttemptId(Configuration cfg) {
        return get(cfg, "mapreduce.task.attempt.id", "mapred.task.id");
    }

    public static String getTaskId(Configuration cfg) {
        return get(cfg, "mapreduce.task.id", "mapred.tip.id");
    }

    public static String getReduceTasks(Configuration cfg) {
        return get(cfg, "mapreduce.job.reduces", "mapred.reduce.tasks", "1");
    }

    public static boolean getSpeculativeReduce(Configuration cfg) {
        return get(cfg, "mapreduce.reduce.speculative", "mapred.reduce.tasks.speculative.execution", true);
    }

    public static boolean getSpeculativeMap(Configuration cfg) {
        return get(cfg, "mapreduce.map.speculative", "mapred.map.tasks.speculative.execution", true);
    }

    public static void setGenericOptions(Configuration cfg) {
        set(cfg, "true", "mapreduce.client.genericoptionsparser.used", "mapred.used.genericoptionsparser");
    }

    public static TimeValue getTaskTimeout(Configuration cfg) {
        return TimeValue.parseTimeValue(get(cfg, "mapreduce.task.timeout", "mapred.task.timeout", "600s"));
    }

    public static Properties asProperties(Configuration cfg) {
        Properties props = new Properties();

        if (cfg != null) {
            for (Map.Entry entry : cfg) {
                props.setProperty(entry.getKey(), entry.getValue());
            }
        }

        return props;
    }

    private static String get(Configuration cfg, String hadoop2, String hadoop1) {
        return get(cfg, hadoop2, hadoop1, null);
    }

    private static String get(Configuration cfg, String hadoop2, String hadoop1, String defaultValue) {
        String prop = cfg.get(hadoop2);
        return (prop != null ? prop : (hadoop1 != null ? cfg.get(hadoop1) : defaultValue));
    }

    private static boolean get(Configuration cfg, String hadoop2, String hadoop1, boolean defaultValue) {
        String result = get(cfg, hadoop2, hadoop1);
        if ("true".equals(result))
            return true;
        else if ("false".equals(result))
            return false;
        else
            return defaultValue;
    }


    private static void set(Configuration cfg, String value, String hadoop2, String hadoop1) {
        cfg.set(hadoop2, value);
        if (hadoop1 != null) {
            cfg.set(hadoop1, value);
        }
    }

    public static JobConf asJobConf(Configuration cfg) {
        return (cfg instanceof JobConf ? (JobConf) cfg : new JobConf(cfg));
    }

    public static String getMapValueClass(Configuration cfg) {
        return get(cfg, "mapred.mapoutput.value.class", "mapreduce.map.output.value.class");
    }

    public static TaskID getTaskID(Configuration cfg) {
        // first try with the attempt since some Hadoop versions mix the two
        String taskAttemptId = HadoopCfgUtils.getTaskAttemptId(cfg);
        if (StringUtils.hasText(taskAttemptId)) {
            try {
                return TaskAttemptID.forName(taskAttemptId).getTaskID();
            } catch (IllegalArgumentException ex) {
                // the task attempt is invalid (Tez in particular uses the wrong string - see #346)
                // try to fallback to task id
                return parseTaskIdFromTaskAttemptId(taskAttemptId);
            }
        }
        String taskIdProp = HadoopCfgUtils.getTaskId(cfg);
        // double-check task id bug in Hadoop 2.5.x
        if (StringUtils.hasText(taskIdProp) && !taskIdProp.contains("attempt")) {
            return TaskID.forName(taskIdProp);
        }
        return null;
    }

    private static TaskID parseTaskIdFromTaskAttemptId(String taskAttemptId) {
        // Tez in particular uses an incorrect String task1244XXX instead of task_1244 which makes the parsing fail
        // this method try to cope with such issues and look at the numbers if possible
        if (taskAttemptId.startsWith("task")) {
            taskAttemptId = taskAttemptId.substring(4);
        }
        if (taskAttemptId.startsWith("_")) {
            taskAttemptId = taskAttemptId.substring(1);
        }
        List tokenize = StringUtils.tokenize(taskAttemptId, "_");
        // need at least 4 entries from 123123123123_0001_r_0000_4
        if (tokenize.size() < 4) {
            LogFactory.getLog(HadoopCfgUtils.class).warn("Cannot parse task attempt (too little arguments) " + taskAttemptId);
            return null;
        }
        // we parse straight away - in case of an exception we can catch the new format
        try {
            return new TaskID(tokenize.get(0), Integer.parseInt(tokenize.get(1)), tokenize.get(2).startsWith("m"), Integer.parseInt(tokenize.get(3)));
        } catch (Exception ex) {
            LogFactory.getLog(HadoopCfgUtils.class).warn("Cannot parse task attempt " + taskAttemptId);
            return null;
        }
    }
}