All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.mapreduce.util.MRJobConfUtil Maven / Gradle / Ivy

There is a newer version: 3.4.1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in org.apache.hadoop.shaded.com.liance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org.apache.hadoop.shaded.org.licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.util;

import java.org.apache.hadoop.shaded.io.File;
import java.util.concurrent.TimeUnit;

import org.apache.hadoop.shaded.org.slf4j.Logger;
import org.apache.hadoop.shaded.org.slf4j.LoggerFactory;

import org.apache.hadoop.shaded.org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.shaded.org.apache.hadoop.fs.Path;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.MRJobConfig;

/**
 * A class that contains utility methods for MR Job configuration.
 */
public final class MRJobConfUtil {
  private static final Logger LOG =
      LoggerFactory.getLogger(MRJobConfUtil.class);
  public static final String REDACTION_REPLACEMENT_VAL = "*********(redacted)";

  /**
   * Redact job configuration properties.
   * @param conf the job configuration to redact
   */
  public static void redact(final Configuration conf) {
    for (String prop : conf.getTrimmedStringCollection(
        MRJobConfig.MR_JOB_REDACTED_PROPERTIES)) {
      conf.set(prop, REDACTION_REPLACEMENT_VAL);
    }
  }

  /**
   * There is no reason to instantiate this utility class.
   */
  private MRJobConfUtil() {
  }

  /**
   * Get the progress heartbeat interval configuration for mapreduce tasks.
   * By default, the value of progress heartbeat interval is a proportion of
   * that of task timeout.
   * @param conf  the job configuration to read from
   * @return the value of task progress report interval
   */
  public static long getTaskProgressReportInterval(final Configuration conf) {
    long taskHeartbeatTimeOut = conf.getLong(
        MRJobConfig.TASK_TIMEOUT, MRJobConfig.DEFAULT_TASK_TIMEOUT_MILLIS);
    return conf.getLong(MRJobConfig.TASK_PROGRESS_REPORT_INTERVAL,
        (long) (TASK_REPORT_INTERVAL_TO_TIMEOUT_RATIO * taskHeartbeatTimeOut));
  }

  public static final float TASK_REPORT_INTERVAL_TO_TIMEOUT_RATIO = 0.01f;

  /**
   * Configurations to control the frequency of logging of task Attempt.
   */
  public static final double PROGRESS_MIN_DELTA_FACTOR = 100.0;
  private static volatile Double progressMinDeltaThreshold = null;
  private static volatile Long progressMaxWaitDeltaTimeThreshold = null;

  /**
   * load the values defined from a configuration file including the delta
   * progress and the maximum time between each log message.
   * @param conf
   */
  public static void setTaskLogProgressDeltaThresholds(
      final Configuration conf) {
    if (progressMinDeltaThreshold == null) {
      progressMinDeltaThreshold =
          new Double(PROGRESS_MIN_DELTA_FACTOR
              * conf.getDouble(MRJobConfig.TASK_LOG_PROGRESS_DELTA_THRESHOLD,
              MRJobConfig.TASK_LOG_PROGRESS_DELTA_THRESHOLD_DEFAULT));
    }

    if (progressMaxWaitDeltaTimeThreshold == null) {
      progressMaxWaitDeltaTimeThreshold =
          TimeUnit.SECONDS.toMillis(conf
              .getLong(
                  MRJobConfig.TASK_LOG_PROGRESS_WAIT_INTERVAL_SECONDS,
                  MRJobConfig.TASK_LOG_PROGRESS_WAIT_INTERVAL_SECONDS_DEFAULT));
    }
  }

  /**
   * Retrieves the min delta progress required to log the task attempt current
   * progress.
   * @return the defined threshold in the conf.
   *         returns the default value if
   *         {@link #setTaskLogProgressDeltaThresholds} has not been called.
   */
  public static double getTaskProgressMinDeltaThreshold() {
    if (progressMinDeltaThreshold == null) {
      return PROGRESS_MIN_DELTA_FACTOR
          * MRJobConfig.TASK_LOG_PROGRESS_DELTA_THRESHOLD_DEFAULT;
    }
    return progressMinDeltaThreshold.doubleValue();
  }

  /**
   * Retrieves the min time required to log the task attempt current
   * progress.
   * @return the defined threshold in the conf.
   *         returns the default value if
   *         {@link #setTaskLogProgressDeltaThresholds} has not been called.
   */
  public static long getTaskProgressWaitDeltaTimeThreshold() {
    if (progressMaxWaitDeltaTimeThreshold == null) {
      return TimeUnit.SECONDS.toMillis(
          MRJobConfig.TASK_LOG_PROGRESS_WAIT_INTERVAL_SECONDS_DEFAULT);
    }
    return progressMaxWaitDeltaTimeThreshold.longValue();
  }

  /**
   * Coverts a progress between 0.0 to 1.0 to double format used to log the
   * task attempt.
   * @param progress of the task which is a value between 0.0 and 1.0.
   * @return the double value that is less than or equal to the argument
   *          multiplied by {@link #PROGRESS_MIN_DELTA_FACTOR}.
   */
  public static double convertTaskProgressToFactor(final float progress) {
    return Math.floor(progress * MRJobConfUtil.PROGRESS_MIN_DELTA_FACTOR);
  }

  /**
   * For unit tests, use urandom to avoid the YarnChild  process from hanging
   * on low entropy systems.
   */
  private static final String TEST_JVM_SECURITY_EGD_OPT =
      "-Djava.security.egd=file:/dev/./urandom";

  public static Configuration initEncryptedIntermediateConfigsForTesting(
      Configuration conf) {
    Configuration config =
        (conf == null) ? new Configuration(): conf;
    final String childJVMOpts =
        TEST_JVM_SECURITY_EGD_OPT.concat(" ")
            .concat(config.get("mapred.child.java.opts", " "));
    // Set the jvm arguments.
    config.set("yarn.app.mapreduce.am.admin-org.apache.hadoop.shaded.com.and-opts",
        TEST_JVM_SECURITY_EGD_OPT);
    config.set("mapred.child.java.opts", childJVMOpts);
    config.setBoolean("mapreduce.job.encrypted-intermediate-data", true);
    return config;
  }

  /**
   * Set local directories so that the generated folders is subdirectory of the
   * test directories.
   * @param conf
   * @param testRootDir
   * @return
   */
  public static Configuration setLocalDirectoriesConfigForTesting(
      Configuration conf, File testRootDir) {
    Configuration config =
        (conf == null) ? new Configuration(): conf;
    final File hadoopLocalDir = new File(testRootDir, "hadoop-dir");
    // create the directory
    if (!hadoopLocalDir.getAbsoluteFile().mkdirs()) {
      LOG.info("{} directory already exists", hadoopLocalDir.getPath());
    }
    Path mapredHadoopTempDir = new Path(hadoopLocalDir.getPath());
    Path mapredSystemDir = new Path(mapredHadoopTempDir, "system");
    Path stagingDir = new Path(mapredHadoopTempDir, "tmp/staging");
    // Set the temp directories a subdir of the test directory.
    config.set("mapreduce.jobtracker.staging.root.dir", stagingDir.toString());
    config.set("mapreduce.jobtracker.system.dir", mapredSystemDir.toString());
    config.set("mapreduce.cluster.temp.dir", mapredHadoopTempDir.toString());
    config.set("mapreduce.cluster.local.dir",
        new Path(mapredHadoopTempDir, "local").toString());
    return config;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy