org.apache.hadoop.hbase.regionserver.DefaultHeapMemoryTuner Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hbase-server Show documentation
Show all versions of hbase-server Show documentation
Server functionality for HBase
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.regionserver;
import static org.apache.hadoop.hbase.HConstants.HFILE_BLOCK_CACHE_SIZE_KEY;
import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.BLOCK_CACHE_SIZE_MAX_RANGE_KEY;
import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.BLOCK_CACHE_SIZE_MIN_RANGE_KEY;
import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.MEMSTORE_SIZE_MAX_RANGE_KEY;
import static org.apache.hadoop.hbase.regionserver.HeapMemoryManager.MEMSTORE_SIZE_MIN_RANGE_KEY;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.io.util.MemorySizeUtil;
import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.TunerContext;
import org.apache.hadoop.hbase.regionserver.HeapMemoryManager.TunerResult;
import org.apache.hadoop.hbase.util.RollingStatCalculator;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* The default implementation for the HeapMemoryTuner. This will do statistical checks on number of
* evictions, cache misses and flushes to decide whether there should be changes in the heap size of
* memstore/block cache. During each tuner operation tuner takes a step which can either be
* INCREASE_BLOCK_CACHE_SIZE (increase block cache size), INCREASE_MEMSTORE_SIZE (increase memstore
* size) and by default it is NEUTRAL (no change). We say block cache is sufficient when there is no
* block cache eviction at all or major amount of memory allocated to block cache is empty,
* similarly we say memory allocated for memstore is sufficient when there is no memstore flushes
* because of heap pressure or major amount of memory allocated to memstore is empty. If both are
* sufficient we do nothing, if exactly one of them is found to be sufficient we decrease its size
* by step and increase the other by same amount. If none of them is sufficient we do
* statistical analysis on number of cache misses and flushes to determine tuner direction. Based on
* these statistics we decide the tuner direction. If we are not confident about which step
* direction to take we do nothing and wait for next iteration. On expectation we will be tuning for
* at least 10% tuner calls. The number of past periods to consider for statistics calculation can
* be specified in config by hbase.regionserver.heapmemory.autotuner.lookup.periods. Also
* these many initial calls to tuner will be ignored (cache is warming up and we leave the system to
* reach steady state). After the tuner takes a step, in next call we insure that last call was
* indeed helpful and did not do us any harm. If not then we revert the previous step. The step size
* is dynamic and it changes based on current and past few tuning directions and their step sizes.
* We maintain a parameter decayingAvgTunerStepSize which is sum of past tuner steps with
* sign(positive for increase in memstore and negative for increase in block cache). But rather than
* simple sum it is calculated by giving more priority to the recent tuning steps. When last few
* tuner steps were NETURAL then we assume we are restarting the tuning process and step size is
* updated to maximum allowed size which can be specified in config by
* hbase.regionserver.heapmemory.autotuner.step.max. If in a particular tuning operation the
* step direction is opposite to what indicated by decayingTunerStepSizeSum we decrease the
* step size by half. Step size does not change in other tuning operations. When step size gets
* below a certain threshold then the following tuner operations are considered to be neutral. The
* minimum step size can be specified in config by
* hbase.regionserver.heapmemory.autotuner.step.min.
*/
@InterfaceAudience.Private
class DefaultHeapMemoryTuner implements HeapMemoryTuner {
public static final String MAX_STEP_KEY = "hbase.regionserver.heapmemory.autotuner.step.max";
public static final String MIN_STEP_KEY = "hbase.regionserver.heapmemory.autotuner.step.min";
public static final String SUFFICIENT_MEMORY_LEVEL_KEY =
"hbase.regionserver.heapmemory.autotuner.sufficient.memory.level";
public static final String LOOKUP_PERIODS_KEY =
"hbase.regionserver.heapmemory.autotuner.lookup.periods";
public static final String NUM_PERIODS_TO_IGNORE =
"hbase.regionserver.heapmemory.autotuner.ignored.periods";
// Maximum step size that the tuner can take
public static final float DEFAULT_MAX_STEP_VALUE = 0.04f; // 4%
// Minimum step size that the tuner can take
public static final float DEFAULT_MIN_STEP_VALUE = 0.00125f; // 0.125%
// If current block cache size or memstore size in use is below this level relative to memory
// provided to it then corresponding component will be considered to have sufficient memory
public static final float DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE = 0.5f; // 50%
// Number of tuner periods that will be considered while calculating mean and deviation
// If set to zero, all stats will be calculated from the start
public static final int DEFAULT_LOOKUP_PERIODS = 60;
public static final int DEFAULT_NUM_PERIODS_IGNORED = 60;
private static final TunerResult NO_OP_TUNER_RESULT = new TunerResult(false);
// If deviation of tuner step size gets below this value then it means past few periods were
// NEUTRAL(given that last tuner period was also NEUTRAL).
private static final double TUNER_STEP_EPS = 1e-6;
private Logger LOG = LoggerFactory.getLogger(DefaultHeapMemoryTuner.class);
private TunerResult TUNER_RESULT = new TunerResult(true);
private Configuration conf;
private float sufficientMemoryLevel = DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE;
private float maximumStepSize = DEFAULT_MAX_STEP_VALUE;
private float minimumStepSize = DEFAULT_MIN_STEP_VALUE;
private int tunerLookupPeriods = DEFAULT_LOOKUP_PERIODS;
private int numPeriodsToIgnore = DEFAULT_NUM_PERIODS_IGNORED;
// Counter to ignore few initial periods while cache is still warming up
// Memory tuner will do no operation for the first "tunerLookupPeriods"
private int ignoreInitialPeriods = 0;
private float globalMemStorePercentMinRange;
private float globalMemStorePercentMaxRange;
private float blockCachePercentMinRange;
private float blockCachePercentMaxRange;
private float globalMemStoreLimitLowMarkPercent;
// Store statistics about the corresponding parameters for memory tuning
private RollingStatCalculator rollingStatsForCacheMisses;
private RollingStatCalculator rollingStatsForFlushes;
private RollingStatCalculator rollingStatsForEvictions;
private RollingStatCalculator rollingStatsForTunerSteps;
// Set step size to max value for tuning, this step size will adjust dynamically while tuning
private float step = DEFAULT_MAX_STEP_VALUE;
private StepDirection prevTuneDirection = StepDirection.NEUTRAL;
// positive means memstore's size was increased
// It is not just arithmetic sum of past tuner periods. More priority is given to recent
// tuning steps.
private double decayingTunerStepSizeSum = 0;
@Override
public TunerResult tune(TunerContext context) {
float curMemstoreSize = context.getCurMemStoreSize();
float curBlockCacheSize = context.getCurBlockCacheSize();
addToRollingStats(context);
if (ignoreInitialPeriods < numPeriodsToIgnore) {
// Ignoring the first few tuner periods
ignoreInitialPeriods++;
rollingStatsForTunerSteps.insertDataValue(0);
LOG.info("Ignoring initial tuning periods: {} so far, {} to ignore", ignoreInitialPeriods,
numPeriodsToIgnore);
return NO_OP_TUNER_RESULT;
}
StepDirection newTuneDirection = getTuneDirection(context);
long blockedFlushCount = context.getBlockedFlushCount();
long unblockedFlushCount = context.getUnblockedFlushCount();
long totalOnheapFlushCount = blockedFlushCount + unblockedFlushCount;
boolean offheapMemstore = context.isOffheapMemStore();
float newMemstoreSize;
float newBlockCacheSize;
// Adjusting step size for tuning to get to steady state or restart from steady state.
// Even if the step size was 4% and 32 GB memory size, we will be shifting 1 GB back and forth
// per tuner operation and it can affect the performance of cluster so we keep on decreasing
// step size until everything settles.
if (
prevTuneDirection == StepDirection.NEUTRAL && newTuneDirection != StepDirection.NEUTRAL
&& rollingStatsForTunerSteps.getDeviation() < TUNER_STEP_EPS
) {
// Restarting the tuning from steady state and setting step size to maximum.
// The deviation cannot be that low if last period was neutral and some recent periods were
// not neutral.
step = maximumStepSize;
} else if (
(newTuneDirection == StepDirection.INCREASE_MEMSTORE_SIZE && decayingTunerStepSizeSum < 0)
|| (newTuneDirection == StepDirection.INCREASE_BLOCK_CACHE_SIZE
&& decayingTunerStepSizeSum > 0)
) {
// Current step is opposite of past tuner actions so decrease the step size to reach steady
// state.
if (!offheapMemstore && step != minimumStepSize) {
// we leave the step to be at minimumStepSize for offheap memstore
step = step / 2.00f;
}
}
if (step < minimumStepSize) {
// If step size is too small then we do nothing.
LOG.debug("Tuner step size is too low; we will not perform any tuning this time.");
step = 0.0f;
newTuneDirection = StepDirection.NEUTRAL;
}
// There are no flushes due to onheap pressure and
// we have an offheap memstore and we are in need of more block_cache size.
if (
totalOnheapFlushCount == 0 && offheapMemstore
&& newTuneDirection == StepDirection.INCREASE_BLOCK_CACHE_SIZE
) {
// we are sure that there are flushes only due to offheap pressure
// So don't do the memstore decrease equal to the step size. Instead do minimum stepSize
// decrease. But even if we have some flushes due to heap then it is better we tune
// the existing way.
step = minimumStepSize;
}
// Increase / decrease the memstore / block cache sizes depending on new tuner step.
// We don't want to exert immediate pressure on memstore. So, we decrease its size gracefully;
// we set a minimum bar in the middle of the total memstore size and the lower limit.
float minMemstoreSize = ((globalMemStoreLimitLowMarkPercent + 1) * curMemstoreSize) / 2.00f;
switch (newTuneDirection) {
case INCREASE_BLOCK_CACHE_SIZE:
if (curMemstoreSize - step < minMemstoreSize) {
step = curMemstoreSize - minMemstoreSize;
}
newMemstoreSize = curMemstoreSize - step;
newBlockCacheSize = curBlockCacheSize + step;
rollingStatsForTunerSteps.insertDataValue(-(int) (step * 100000));
decayingTunerStepSizeSum = (decayingTunerStepSizeSum - step) / 2.00f;
break;
case INCREASE_MEMSTORE_SIZE:
newBlockCacheSize = curBlockCacheSize - step;
newMemstoreSize = curMemstoreSize + step;
rollingStatsForTunerSteps.insertDataValue((int) (step * 100000));
decayingTunerStepSizeSum = (decayingTunerStepSizeSum + step) / 2.00f;
break;
default:
prevTuneDirection = StepDirection.NEUTRAL;
rollingStatsForTunerSteps.insertDataValue(0);
decayingTunerStepSizeSum = (decayingTunerStepSizeSum) / 2.00f;
return NO_OP_TUNER_RESULT;
}
// Check we are within max/min bounds.
if (newMemstoreSize > globalMemStorePercentMaxRange) {
newMemstoreSize = globalMemStorePercentMaxRange;
} else if (newMemstoreSize < globalMemStorePercentMinRange) {
newMemstoreSize = globalMemStorePercentMinRange;
}
if (newBlockCacheSize > blockCachePercentMaxRange) {
newBlockCacheSize = blockCachePercentMaxRange;
} else if (newBlockCacheSize < blockCachePercentMinRange) {
newBlockCacheSize = blockCachePercentMinRange;
}
TUNER_RESULT.setBlockCacheSize(newBlockCacheSize);
TUNER_RESULT.setMemStoreSize(newMemstoreSize);
prevTuneDirection = newTuneDirection;
return TUNER_RESULT;
}
/**
* Determine best direction of tuning base on given context.
* @param context The tuner context.
* @return tuning direction.
*/
private StepDirection getTuneDirection(TunerContext context) {
StepDirection newTuneDirection = StepDirection.NEUTRAL;
long blockedFlushCount = context.getBlockedFlushCount();
long unblockedFlushCount = context.getUnblockedFlushCount();
long evictCount = context.getEvictCount();
long cacheMissCount = context.getCacheMissCount();
long totalFlushCount = blockedFlushCount + unblockedFlushCount;
float curMemstoreSize = context.getCurMemStoreSize();
float curBlockCacheSize = context.getCurBlockCacheSize();
StringBuilder tunerLog = new StringBuilder();
// We can consider memstore or block cache to be sufficient if
// we are using only a minor fraction of what have been already provided to it.
boolean earlyMemstoreSufficientCheck = totalFlushCount == 0
|| context.getCurMemStoreUsed() < curMemstoreSize * sufficientMemoryLevel;
boolean earlyBlockCacheSufficientCheck =
evictCount == 0 || context.getCurBlockCacheUsed() < curBlockCacheSize * sufficientMemoryLevel;
if (earlyMemstoreSufficientCheck && earlyBlockCacheSufficientCheck) {
// Both memstore and block cache memory seems to be sufficient. No operation required.
newTuneDirection = StepDirection.NEUTRAL;
tunerLog.append("Going to do nothing because no changes are needed.");
} else if (earlyMemstoreSufficientCheck) {
// Increase the block cache size and corresponding decrease in memstore size.
newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE;
tunerLog.append("Going to increase the block cache size.");
} else if (earlyBlockCacheSufficientCheck) {
// Increase the memstore size and corresponding decrease in block cache size.
newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
tunerLog.append("Going to increase the memstore size.");
} else {
// Early checks for sufficient memory failed. Tuning memory based on past statistics.
// Boolean indicator to show if we need to revert previous step or not.
boolean isReverting = false;
switch (prevTuneDirection) {
// Here we are using number of evictions rather than cache misses because it is more
// strong indicator for deficient cache size. Improving caching is what we
// would like to optimize for in steady state.
case INCREASE_BLOCK_CACHE_SIZE:
if (
(double) evictCount > rollingStatsForEvictions.getMean() || (double) totalFlushCount
> rollingStatsForFlushes.getMean() + rollingStatsForFlushes.getDeviation() / 2.00
) {
// Reverting previous step as it was not useful.
// Tuning failed to decrease evictions or tuning resulted in large number of flushes.
newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
tunerLog.append("We will revert previous tuning");
if ((double) evictCount > rollingStatsForEvictions.getMean()) {
tunerLog.append(" because we could not decrease evictions sufficiently.");
} else {
tunerLog.append(" because the number of flushes rose significantly.");
}
isReverting = true;
}
break;
case INCREASE_MEMSTORE_SIZE:
if (
(double) totalFlushCount > rollingStatsForFlushes.getMean()
|| (double) evictCount > rollingStatsForEvictions.getMean()
+ rollingStatsForEvictions.getDeviation() / 2.00
) {
// Reverting previous step as it was not useful.
// Tuning failed to decrease flushes or tuning resulted in large number of evictions.
newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE;
tunerLog.append("We will revert previous tuning");
if ((double) totalFlushCount > rollingStatsForFlushes.getMean()) {
tunerLog.append(" because we could not decrease flushes sufficiently.");
} else {
tunerLog.append(" because number of evictions rose significantly.");
}
isReverting = true;
}
break;
default:
// Last step was neutral, revert doesn't not apply here.
break;
}
// If we are not reverting. We try to tune memory sizes by looking at cache misses / flushes.
if (!isReverting) {
// mean +- deviation*0.8 is considered to be normal
// below it its consider low and above it is considered high.
// We can safely assume that the number cache misses, flushes are normally distributed over
// past periods and hence on all the above mentioned classes (normal, high and low)
// are likely to occur with probability 56%, 22%, 22% respectively. Hence there is at
// least ~10% probability that we will not fall in NEUTRAL step.
// This optimization solution is feedback based and we revert when we
// dont find our steps helpful. Hence we want to do tuning only when we have clear
// indications because too many unnecessary tuning may affect the performance of cluster.
if (
(double) cacheMissCount
< rollingStatsForCacheMisses.getMean()
- rollingStatsForCacheMisses.getDeviation() * 0.80
&& (double) totalFlushCount
< rollingStatsForFlushes.getMean() - rollingStatsForFlushes.getDeviation() * 0.80
) {
// Everything is fine no tuning required
newTuneDirection = StepDirection.NEUTRAL;
} else if (
(double) cacheMissCount
> rollingStatsForCacheMisses.getMean()
+ rollingStatsForCacheMisses.getDeviation() * 0.80
&& (double) totalFlushCount
< rollingStatsForFlushes.getMean() - rollingStatsForFlushes.getDeviation() * 0.80
) {
// more misses , increasing cache size
newTuneDirection = StepDirection.INCREASE_BLOCK_CACHE_SIZE;
tunerLog.append(
"Going to increase block cache size due to increase in number of cache misses.");
} else if (
(double) cacheMissCount
< rollingStatsForCacheMisses.getMean()
- rollingStatsForCacheMisses.getDeviation() * 0.80
&& (double) totalFlushCount
> rollingStatsForFlushes.getMean() + rollingStatsForFlushes.getDeviation() * 0.80
) {
// more flushes , increasing memstore size
newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
tunerLog.append("Going to increase memstore size due to increase in number of flushes.");
} else if (blockedFlushCount > 0 && prevTuneDirection == StepDirection.NEUTRAL) {
// we do not want blocked flushes
newTuneDirection = StepDirection.INCREASE_MEMSTORE_SIZE;
tunerLog.append(
"Going to increase memstore size due to" + blockedFlushCount + " blocked flushes.");
} else {
// Default. Not enough facts to do tuning.
tunerLog.append(
"Going to do nothing because we " + "could not determine best tuning direction");
newTuneDirection = StepDirection.NEUTRAL;
}
}
}
// Log NEUTRAL decisions at DEBUG, because they are the most frequent and not that interesting.
// Log other decisions at INFO because they are making meaningful operational changes.
switch (newTuneDirection) {
case NEUTRAL:
if (LOG.isDebugEnabled()) {
LOG.debug(tunerLog.toString());
}
break;
default:
LOG.info(tunerLog.toString());
break;
}
return newTuneDirection;
}
/**
* Add the given context to the rolling tuner stats.
* @param context The tuner context.
*/
private void addToRollingStats(TunerContext context) {
rollingStatsForCacheMisses.insertDataValue(context.getCacheMissCount());
rollingStatsForFlushes
.insertDataValue(context.getBlockedFlushCount() + context.getUnblockedFlushCount());
rollingStatsForEvictions.insertDataValue(context.getEvictCount());
}
@Override
public Configuration getConf() {
return this.conf;
}
@Override
public void setConf(Configuration conf) {
this.conf = conf;
this.maximumStepSize = conf.getFloat(MAX_STEP_KEY, DEFAULT_MAX_STEP_VALUE);
this.minimumStepSize = conf.getFloat(MIN_STEP_KEY, DEFAULT_MIN_STEP_VALUE);
this.step = this.maximumStepSize;
this.sufficientMemoryLevel =
conf.getFloat(SUFFICIENT_MEMORY_LEVEL_KEY, DEFAULT_SUFFICIENT_MEMORY_LEVEL_VALUE);
this.tunerLookupPeriods = conf.getInt(LOOKUP_PERIODS_KEY, DEFAULT_LOOKUP_PERIODS);
this.blockCachePercentMinRange = conf.getFloat(BLOCK_CACHE_SIZE_MIN_RANGE_KEY,
conf.getFloat(HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT));
this.blockCachePercentMaxRange = conf.getFloat(BLOCK_CACHE_SIZE_MAX_RANGE_KEY,
conf.getFloat(HFILE_BLOCK_CACHE_SIZE_KEY, HConstants.HFILE_BLOCK_CACHE_SIZE_DEFAULT));
this.globalMemStorePercentMinRange = conf.getFloat(MEMSTORE_SIZE_MIN_RANGE_KEY,
MemorySizeUtil.getGlobalMemStoreHeapPercent(conf, false));
this.globalMemStorePercentMaxRange = conf.getFloat(MEMSTORE_SIZE_MAX_RANGE_KEY,
MemorySizeUtil.getGlobalMemStoreHeapPercent(conf, false));
this.globalMemStoreLimitLowMarkPercent =
MemorySizeUtil.getGlobalMemStoreHeapLowerMark(conf, true);
// Default value of periods to ignore is number of lookup periods
this.numPeriodsToIgnore = conf.getInt(NUM_PERIODS_TO_IGNORE, this.tunerLookupPeriods);
this.rollingStatsForCacheMisses = new RollingStatCalculator(this.tunerLookupPeriods);
this.rollingStatsForFlushes = new RollingStatCalculator(this.tunerLookupPeriods);
this.rollingStatsForEvictions = new RollingStatCalculator(this.tunerLookupPeriods);
this.rollingStatsForTunerSteps = new RollingStatCalculator(this.tunerLookupPeriods);
}
private enum StepDirection {
// block cache size was increased
INCREASE_BLOCK_CACHE_SIZE,
// memstore size was increased
INCREASE_MEMSTORE_SIZE,
// no operation was performed
NEUTRAL
}
}