All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.mapreduce.util.MRJobConfUtil Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.mapreduce.util;

import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.MRJobConfig;

/**
 * A class that contains utility methods for MR Job configuration.
 */
public final class MRJobConfUtil {
  public static final String REDACTION_REPLACEMENT_VAL = "*********(redacted)";

  /**
   * Redact job configuration properties.
   * @param conf the job configuration to redact
   */
  public static void redact(final Configuration conf) {
    for (String prop : conf.getTrimmedStringCollection(
        MRJobConfig.MR_JOB_REDACTED_PROPERTIES)) {
      conf.set(prop, REDACTION_REPLACEMENT_VAL);
    }
  }

  /**
   * There is no reason to instantiate this utility class.
   */
  private MRJobConfUtil() {
  }

  /**
   * Get the progress heartbeat interval configuration for mapreduce tasks.
   * By default, the value of progress heartbeat interval is a proportion of
   * that of task timeout.
   * @param conf  the job configuration to read from
   * @return the value of task progress report interval
   */
  public static long getTaskProgressReportInterval(final Configuration conf) {
    long taskHeartbeatTimeOut = conf.getLong(
        MRJobConfig.TASK_TIMEOUT, MRJobConfig.DEFAULT_TASK_TIMEOUT_MILLIS);
    return conf.getLong(MRJobConfig.TASK_PROGRESS_REPORT_INTERVAL,
        (long) (TASK_REPORT_INTERVAL_TO_TIMEOUT_RATIO * taskHeartbeatTimeOut));
  }

  public static final float TASK_REPORT_INTERVAL_TO_TIMEOUT_RATIO = 0.01f;

  /**
   * Configurations to control the frequency of logging of task Attempt.
   */
  public static final double PROGRESS_MIN_DELTA_FACTOR = 100.0;
  private static volatile Double progressMinDeltaThreshold = null;
  private static volatile Long progressMaxWaitDeltaTimeThreshold = null;

  /**
   * load the values defined from a configuration file including the delta
   * progress and the maximum time between each log message.
   * @param conf
   */
  public static void setTaskLogProgressDeltaThresholds(
      final Configuration conf) {
    if (progressMinDeltaThreshold == null) {
      progressMinDeltaThreshold =
          new Double(PROGRESS_MIN_DELTA_FACTOR
              * conf.getDouble(MRJobConfig.TASK_LOG_PROGRESS_DELTA_THRESHOLD,
              MRJobConfig.TASK_LOG_PROGRESS_DELTA_THRESHOLD_DEFAULT));
    }

    if (progressMaxWaitDeltaTimeThreshold == null) {
      progressMaxWaitDeltaTimeThreshold =
          TimeUnit.SECONDS.toMillis(conf
              .getLong(
                  MRJobConfig.TASK_LOG_PROGRESS_WAIT_INTERVAL_SECONDS,
                  MRJobConfig.TASK_LOG_PROGRESS_WAIT_INTERVAL_SECONDS_DEFAULT));
    }
  }

  /**
   * Retrieves the min delta progress required to log the task attempt current
   * progress.
   * @return the defined threshold in the conf.
   *         returns the default value if
   *         {@link #setTaskLogProgressDeltaThresholds} has not been called.
   */
  public static double getTaskProgressMinDeltaThreshold() {
    if (progressMinDeltaThreshold == null) {
      return PROGRESS_MIN_DELTA_FACTOR
          * MRJobConfig.TASK_LOG_PROGRESS_DELTA_THRESHOLD_DEFAULT;
    }
    return progressMinDeltaThreshold.doubleValue();
  }

  /**
   * Retrieves the min time required to log the task attempt current
   * progress.
   * @return the defined threshold in the conf.
   *         returns the default value if
   *         {@link #setTaskLogProgressDeltaThresholds} has not been called.
   */
  public static long getTaskProgressWaitDeltaTimeThreshold() {
    if (progressMaxWaitDeltaTimeThreshold == null) {
      return TimeUnit.SECONDS.toMillis(
          MRJobConfig.TASK_LOG_PROGRESS_WAIT_INTERVAL_SECONDS_DEFAULT);
    }
    return progressMaxWaitDeltaTimeThreshold.longValue();
  }

  /**
   * Coverts a progress between 0.0 to 1.0 to double format used to log the
   * task attempt.
   * @param progress of the task which is a value between 0.0 and 1.0.
   * @return the double value that is less than or equal to the argument
   *          multiplied by {@link #PROGRESS_MIN_DELTA_FACTOR}.
   */
  public static double convertTaskProgressToFactor(final float progress) {
    return Math.floor(progress * MRJobConfUtil.PROGRESS_MIN_DELTA_FACTOR);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy