org.apache.hadoop.mapreduce.util.MRJobConfUtil Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapreduce.util;
import java.util.concurrent.TimeUnit;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.MRJobConfig;
/**
* A class that contains utility methods for MR Job configuration.
*/
public final class MRJobConfUtil {
public static final String REDACTION_REPLACEMENT_VAL = "*********(redacted)";
/**
* Redact job configuration properties.
* @param conf the job configuration to redact
*/
public static void redact(final Configuration conf) {
for (String prop : conf.getTrimmedStringCollection(
MRJobConfig.MR_JOB_REDACTED_PROPERTIES)) {
conf.set(prop, REDACTION_REPLACEMENT_VAL);
}
}
/**
* There is no reason to instantiate this utility class.
*/
private MRJobConfUtil() {
}
/**
* Get the progress heartbeat interval configuration for mapreduce tasks.
* By default, the value of progress heartbeat interval is a proportion of
* that of task timeout.
* @param conf the job configuration to read from
* @return the value of task progress report interval
*/
public static long getTaskProgressReportInterval(final Configuration conf) {
long taskHeartbeatTimeOut = conf.getLong(
MRJobConfig.TASK_TIMEOUT, MRJobConfig.DEFAULT_TASK_TIMEOUT_MILLIS);
return conf.getLong(MRJobConfig.TASK_PROGRESS_REPORT_INTERVAL,
(long) (TASK_REPORT_INTERVAL_TO_TIMEOUT_RATIO * taskHeartbeatTimeOut));
}
public static final float TASK_REPORT_INTERVAL_TO_TIMEOUT_RATIO = 0.01f;
/**
* Configurations to control the frequency of logging of task Attempt.
*/
public static final double PROGRESS_MIN_DELTA_FACTOR = 100.0;
private static volatile Double progressMinDeltaThreshold = null;
private static volatile Long progressMaxWaitDeltaTimeThreshold = null;
/**
* load the values defined from a configuration file including the delta
* progress and the maximum time between each log message.
* @param conf
*/
public static void setTaskLogProgressDeltaThresholds(
final Configuration conf) {
if (progressMinDeltaThreshold == null) {
progressMinDeltaThreshold =
new Double(PROGRESS_MIN_DELTA_FACTOR
* conf.getDouble(MRJobConfig.TASK_LOG_PROGRESS_DELTA_THRESHOLD,
MRJobConfig.TASK_LOG_PROGRESS_DELTA_THRESHOLD_DEFAULT));
}
if (progressMaxWaitDeltaTimeThreshold == null) {
progressMaxWaitDeltaTimeThreshold =
TimeUnit.SECONDS.toMillis(conf
.getLong(
MRJobConfig.TASK_LOG_PROGRESS_WAIT_INTERVAL_SECONDS,
MRJobConfig.TASK_LOG_PROGRESS_WAIT_INTERVAL_SECONDS_DEFAULT));
}
}
/**
* Retrieves the min delta progress required to log the task attempt current
* progress.
* @return the defined threshold in the conf.
* returns the default value if
* {@link #setTaskLogProgressDeltaThresholds} has not been called.
*/
public static double getTaskProgressMinDeltaThreshold() {
if (progressMinDeltaThreshold == null) {
return PROGRESS_MIN_DELTA_FACTOR
* MRJobConfig.TASK_LOG_PROGRESS_DELTA_THRESHOLD_DEFAULT;
}
return progressMinDeltaThreshold.doubleValue();
}
/**
* Retrieves the min time required to log the task attempt current
* progress.
* @return the defined threshold in the conf.
* returns the default value if
* {@link #setTaskLogProgressDeltaThresholds} has not been called.
*/
public static long getTaskProgressWaitDeltaTimeThreshold() {
if (progressMaxWaitDeltaTimeThreshold == null) {
return TimeUnit.SECONDS.toMillis(
MRJobConfig.TASK_LOG_PROGRESS_WAIT_INTERVAL_SECONDS_DEFAULT);
}
return progressMaxWaitDeltaTimeThreshold.longValue();
}
/**
* Coverts a progress between 0.0 to 1.0 to double format used to log the
* task attempt.
* @param progress of the task which is a value between 0.0 and 1.0.
* @return the double value that is less than or equal to the argument
* multiplied by {@link #PROGRESS_MIN_DELTA_FACTOR}.
*/
public static double convertTaskProgressToFactor(final float progress) {
return Math.floor(progress * MRJobConfUtil.PROGRESS_MIN_DELTA_FACTOR);
}
}