org.apache.hadoop.mapred.PeriodicStatsAccumulator Maven / Gradle / Ivy
Show all versions of hadoop-apache Show documentation
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.mapred;
import org.apache.hadoop.classification.InterfaceAudience.Private;
import org.apache.hadoop.classification.InterfaceStability.Unstable;
/**
*
* This abstract class that represents a bucketed series of
* measurements of a quantity being measured in a running task
* attempt.
*
* The sole constructor is called with a count, which is the
* number of buckets into which we evenly divide the spectrum of
* progress from 0.0D to 1.0D . In the future we may provide for
* custom split points that don't have to be uniform.
*
*
A subclass determines how we fold readings for portions of a
* bucket and how we interpret the readings by overriding
* {@code extendInternal(...)} and {@code initializeInterval()}
*/
@Private
@Unstable
public abstract class PeriodicStatsAccumulator {
// The range of progress from 0.0D through 1.0D is divided into
// count "progress segments". This object accumulates an
// estimate of the effective value of a time-varying value during
// the zero-based i'th progress segment, ranging from i/count
// through (i+1)/count .
// This is an abstract class. We have two implementations: one
// for monotonically increasing time-dependent variables
// [currently, CPU time in milliseconds and wallclock time in
// milliseconds] and one for quantities that can vary arbitrarily
// over time, currently virtual and physical memory used, in
// kilobytes.
// We carry int's here. This saves a lot of JVM heap space in the
// job tracker per running task attempt [200 bytes per] but it
// has a small downside.
// No task attempt can run for more than 57 days nor occupy more
// than two terabytes of virtual memory.
protected final int count;
protected final int[] values;
static class StatsetState {
int oldValue = 0;
double oldProgress = 0.0D;
double currentAccumulation = 0.0D;
}
// We provide this level of indirection to reduce the memory
// footprint of done task attempts. When a task's progress
// reaches 1.0D, we delete this objecte StatsetState.
StatsetState state = new StatsetState();
PeriodicStatsAccumulator(int count) {
this.count = count;
this.values = new int[count];
for (int i = 0; i < count; ++i) {
values[i] = -1;
}
}
protected int[] getValues() {
return values;
}
// The concrete implementation of this abstract function
// accumulates more data into the current progress segment.
// newProgress [from the call] and oldProgress [from the object]
// must be in [or at the border of] a single progress segment.
/**
*
* adds a new reading to the current bucket.
*
* @param newProgress the endpoint of the interval this new
* reading covers
* @param newValue the value of the reading at {@code newProgress}
*
* The class has three instance variables, {@code oldProgress} and
* {@code oldValue} and {@code currentAccumulation}.
*
* {@code extendInternal} can count on three things:
*
* 1: The first time it's called in a particular instance, both
* oldXXX's will be zero.
*
* 2: oldXXX for a later call is the value of newXXX of the
* previous call. This ensures continuity in accumulation from
* one call to the next.
*
* 3: {@code currentAccumulation} is owned by
* {@code initializeInterval} and {@code extendInternal}.
*/
protected abstract void extendInternal(double newProgress, int newValue);
// What has to be done when you open a new interval
/**
* initializes the state variables to be ready for a new interval
*/
protected void initializeInterval() {
state.currentAccumulation = 0.0D;
}
// called for each new reading
/**
* This method calls {@code extendInternal} at least once. It
* divides the current progress interval [from the last call's
* {@code newProgress} to this call's {@code newProgress} ]
* into one or more subintervals by splitting at any point which
* is an interval boundary if there are any such points. It
* then calls {@code extendInternal} for each subinterval, or the
* whole interval if there are no splitting points.
*
*
For example, if the value was {@code 300} last time with
* {@code 0.3} progress, and count is {@code 5}, and you get a
* new reading with the variable at {@code 700} and progress at
* {@code 0.7}, you get three calls to {@code extendInternal}:
* one extending from progress {@code 0.3} to {@code 0.4} [the
* next boundary] with a value of {@code 400}, the next one
* through {@code 0.6} with a value of {@code 600}, and finally
* one at {@code 700} with a progress of {@code 0.7} .
*
* @param newProgress the endpoint of the progress range this new
* reading covers
* @param newValue the value of the reading at {@code newProgress}
*/
protected void extend(double newProgress, int newValue) {
if (state == null || newProgress < state.oldProgress) {
return;
}
// This correctness of this code depends on 100% * count = count.
int oldIndex = (int)(state.oldProgress * count);
int newIndex = (int)(newProgress * count);
int originalOldValue = state.oldValue;
double fullValueDistance = (double)newValue - state.oldValue;
double fullProgressDistance = newProgress - state.oldProgress;
double originalOldProgress = state.oldProgress;
// In this loop we detect each subinterval boundary within the
// range from the old progress to the new one. Then we
// interpolate the value from the old value to the new one to
// infer what its value might have been at each such boundary.
// Lastly we make the necessary calls to extendInternal to fold
// in the data for each trapazoid where no such trapazoid
// crosses a boundary.
for (int closee = oldIndex; closee < newIndex; ++closee) {
double interpolationProgress = (double)(closee + 1) / count;
// In floats, x * y / y might not equal y.
interpolationProgress = Math.min(interpolationProgress, newProgress);
double progressLength = (interpolationProgress - originalOldProgress);
double interpolationProportion = progressLength / fullProgressDistance;
double interpolationValueDistance
= fullValueDistance * interpolationProportion;
// estimates the value at the next [interpolated] subsegment boundary
int interpolationValue
= (int)interpolationValueDistance + originalOldValue;
extendInternal(interpolationProgress, interpolationValue);
advanceState(interpolationProgress, interpolationValue);
values[closee] = (int)state.currentAccumulation;
initializeInterval();
}
extendInternal(newProgress, newValue);
advanceState(newProgress, newValue);
if (newIndex == count) {
state = null;
}
}
protected void advanceState(double newProgress, int newValue) {
state.oldValue = newValue;
state.oldProgress = newProgress;
}
int getCount() {
return count;
}
int get(int index) {
return values[index];
}
}