org.apache.hadoop.hbase.io.hfile.LruBlockCache Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hbase-server Show documentation
Server functionality for HBase
There is a newer version: 3.0.0-beta-1
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.io.hfile;

import static java.util.Objects.requireNonNull;

import java.lang.ref.WeakReference;
import java.util.EnumMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.LongAdder;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.commons.lang3.mutable.MutableBoolean;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.io.HeapSize;
import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding;
import org.apache.hadoop.hbase.util.ClassSize;
import org.apache.hadoop.util.StringUtils;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.base.MoreObjects;
import org.apache.hbase.thirdparty.com.google.common.base.Objects;
import org.apache.hbase.thirdparty.com.google.common.util.concurrent.ThreadFactoryBuilder;

/**
 * A block cache implementation that is memory-aware using {@link HeapSize}, memory-bound using an
 * LRU eviction algorithm, and concurrent: backed by a {@link ConcurrentHashMap} and with a
 * non-blocking eviction thread giving constant-time {@link #cacheBlock} and {@link #getBlock}
 * operations.
 * 
 * Contains three levels of block priority to allow for scan-resistance and in-memory families
 * {@link org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder#setInMemory(boolean)} (An
 * in-memory column family is a column family that should be served from memory if possible):
 * single-access, multiple-accesses, and in-memory priority. A block is added with an in-memory
 * priority flag if {@link org.apache.hadoop.hbase.client.ColumnFamilyDescriptor#isInMemory()},
 * otherwise a block becomes a single access priority the first time it is read into this block
 * cache. If a block is accessed again while in cache, it is marked as a multiple access priority
 * block. This delineation of blocks is used to prevent scans from thrashing the cache adding a
 * least-frequently-used element to the eviction algorithm.
 * 
 * Each priority is given its own chunk of the total cache to ensure fairness during eviction. Each
 * priority will retain close to its maximum size, however, if any priority is not using its entire
 * chunk the others are able to grow beyond their chunk size.
 * 

 * Instantiated at a minimum with the total size and average block size. All sizes are in bytes. The
 * block size is not especially important as this cache is fully dynamic in its sizing of blocks. It
 * is only used for pre-allocating data structures and in initial heap estimation of the map.
 * 

 * The detailed constructor defines the sizes for the three priorities (they should total to the
 * maximum size defined). It also sets the levels that trigger and control the eviction
 * thread.
 * 

 * The acceptable size is the cache size level which triggers the eviction process to
 * start. It evicts enough blocks to get the size below the minimum size specified.
 * 

 * Eviction happens in a separate thread and involves a single full-scan of the map. It determines
 * how many bytes must be freed to reach the minimum size, and then while scanning determines the
 * fewest least-recently-used blocks necessary from each of the three priorities (would be 3 times
 * bytes to free). It then uses the priority chunk sizes to evict fairly according to the relative
 * sizes and usage.
 */
@InterfaceAudience.Private
public class LruBlockCache implements FirstLevelBlockCache {

  private static final Logger LOG = LoggerFactory.getLogger(LruBlockCache.class);

  /**
   * Percentage of total size that eviction will evict until; e.g. if set to .8, then we will keep
   * evicting during an eviction run till the cache size is down to 80% of the total.
   */
  private static final String LRU_MIN_FACTOR_CONFIG_NAME = "hbase.lru.blockcache.min.factor";

  /**
   * Acceptable size of cache (no evictions if size < acceptable)
   */
  private static final String LRU_ACCEPTABLE_FACTOR_CONFIG_NAME =
    "hbase.lru.blockcache.acceptable.factor";

  /**
   * Hard capacity limit of cache, will reject any put if size > this * acceptable
   */
  static final String LRU_HARD_CAPACITY_LIMIT_FACTOR_CONFIG_NAME =
    "hbase.lru.blockcache.hard.capacity.limit.factor";
  private static final String LRU_SINGLE_PERCENTAGE_CONFIG_NAME =
    "hbase.lru.blockcache.single.percentage";
  private static final String LRU_MULTI_PERCENTAGE_CONFIG_NAME =
    "hbase.lru.blockcache.multi.percentage";
  private static final String LRU_MEMORY_PERCENTAGE_CONFIG_NAME =
    "hbase.lru.blockcache.memory.percentage";

  /**
   * Configuration key to force data-block always (except in-memory are too much) cached in memory
   * for in-memory hfile, unlike inMemory, which is a column-family configuration, inMemoryForceMode
   * is a cluster-wide configuration
   */
  private static final String LRU_IN_MEMORY_FORCE_MODE_CONFIG_NAME =
    "hbase.lru.rs.inmemoryforcemode";

  /* Default Configuration Parameters */

  /* Backing Concurrent Map Configuration */
  static final float DEFAULT_LOAD_FACTOR = 0.75f;
  static final int DEFAULT_CONCURRENCY_LEVEL = 16;

  /* Eviction thresholds */
  private static final float DEFAULT_MIN_FACTOR = 0.95f;
  static final float DEFAULT_ACCEPTABLE_FACTOR = 0.99f;

  /* Priority buckets */
  private static final float DEFAULT_SINGLE_FACTOR = 0.25f;
  private static final float DEFAULT_MULTI_FACTOR = 0.50f;
  private static final float DEFAULT_MEMORY_FACTOR = 0.25f;

  private static final float DEFAULT_HARD_CAPACITY_LIMIT_FACTOR = 1.2f;

  private static final boolean DEFAULT_IN_MEMORY_FORCE_MODE = false;

  /* Statistics thread */
  private static final int STAT_THREAD_PERIOD = 60 * 5;
  private static final String LRU_MAX_BLOCK_SIZE = "hbase.lru.max.block.size";
  private static final long DEFAULT_MAX_BLOCK_SIZE = 16L * 1024L * 1024L;

  /**
   * Defined the cache map as {@link ConcurrentHashMap} here, because in
   * {@link LruBlockCache#getBlock}, we need to guarantee the atomicity of map#k (key, func).
   * Besides, the func method must execute exactly once only when the key is present and under the
   * lock context, otherwise the reference count will be messed up. Notice that the
   * {@link java.util.concurrent.ConcurrentSkipListMap} can not guarantee that. Some code using
   * #computeIfPresent also expects the supplier to be executed only once. ConcurrentHashMap can
   * guarantee that. Other types may not.
   */
  private transient final ConcurrentHashMap map;

  /** Eviction lock (locked when eviction in process) */
  private transient final ReentrantLock evictionLock = new ReentrantLock(true);

  private final long maxBlockSize;

  /** Volatile boolean to track if we are in an eviction process or not */
  private volatile boolean evictionInProgress = false;

  /** Eviction thread */
  private transient final EvictionThread evictionThread;

  /** Statistics thread schedule pool (for heavy debugging, could remove) */
  private transient final ScheduledExecutorService scheduleThreadPool =
    Executors.newScheduledThreadPool(1, new ThreadFactoryBuilder()
      .setNameFormat("LruBlockCacheStatsExecutor").setDaemon(true).build());

  /** Current size of cache */
  private final AtomicLong size;

  /** Current size of data blocks */
  private final LongAdder dataBlockSize = new LongAdder();

  /** Current size of index blocks */
  private final LongAdder indexBlockSize = new LongAdder();

  /** Current size of bloom blocks */
  private final LongAdder bloomBlockSize = new LongAdder();

  /** Current number of cached elements */
  private final AtomicLong elements;

  /** Current number of cached data block elements */
  private final LongAdder dataBlockElements = new LongAdder();

  /** Current number of cached index block elements */
  private final LongAdder indexBlockElements = new LongAdder();

  /** Current number of cached bloom block elements */
  private final LongAdder bloomBlockElements = new LongAdder();

  /** Cache access count (sequential ID) */
  private final AtomicLong count;

  /** hard capacity limit */
  private float hardCapacityLimitFactor;

  /** Cache statistics */
  private final CacheStats stats;

  /** Maximum allowable size of cache (block put if size > max, evict) */
  private long maxSize;

  /** Approximate block size */
  private long blockSize;

  /** Acceptable size of cache (no evictions if size < acceptable) */
  private float acceptableFactor;

  /** Minimum threshold of cache (when evicting, evict until size < min) */
  private float minFactor;

  /** Single access bucket size */
  private float singleFactor;

  /** Multiple access bucket size */
  private float multiFactor;

  /** In-memory bucket size */
  private float memoryFactor;

  /** Overhead of the structure itself */
  private long overhead;

  /** Whether in-memory hfile's data block has higher priority when evicting */
  private boolean forceInMemory;

  /**
   * Where to send victims (blocks evicted/missing from the cache). This is used only when we use an
   * external cache as L2. Note: See org.apache.hadoop.hbase.io.hfile.MemcachedBlockCache
   */
  private transient BlockCache victimHandler = null;

  /**
   * Default constructor. Specify maximum size and expected average block size (approximation is
   * fine).
   * 

   * All other factors will be calculated based on defaults specified in this class.
   * @param maxSize   maximum size of cache, in bytes
   * @param blockSize approximate size of each block, in bytes
   */
  public LruBlockCache(long maxSize, long blockSize) {
    this(maxSize, blockSize, true);
  }

  /**
   * Constructor used for testing. Allows disabling of the eviction thread.
   */
  public LruBlockCache(long maxSize, long blockSize, boolean evictionThread) {
    this(maxSize, blockSize, evictionThread, (int) Math.ceil(1.2 * maxSize / blockSize),
      DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL, DEFAULT_MIN_FACTOR, DEFAULT_ACCEPTABLE_FACTOR,
      DEFAULT_SINGLE_FACTOR, DEFAULT_MULTI_FACTOR, DEFAULT_MEMORY_FACTOR,
      DEFAULT_HARD_CAPACITY_LIMIT_FACTOR, false, DEFAULT_MAX_BLOCK_SIZE);
  }

  public LruBlockCache(long maxSize, long blockSize, boolean evictionThread, Configuration conf) {
    this(maxSize, blockSize, evictionThread, (int) Math.ceil(1.2 * maxSize / blockSize),
      DEFAULT_LOAD_FACTOR, DEFAULT_CONCURRENCY_LEVEL,
      conf.getFloat(LRU_MIN_FACTOR_CONFIG_NAME, DEFAULT_MIN_FACTOR),
      conf.getFloat(LRU_ACCEPTABLE_FACTOR_CONFIG_NAME, DEFAULT_ACCEPTABLE_FACTOR),
      conf.getFloat(LRU_SINGLE_PERCENTAGE_CONFIG_NAME, DEFAULT_SINGLE_FACTOR),
      conf.getFloat(LRU_MULTI_PERCENTAGE_CONFIG_NAME, DEFAULT_MULTI_FACTOR),
      conf.getFloat(LRU_MEMORY_PERCENTAGE_CONFIG_NAME, DEFAULT_MEMORY_FACTOR),
      conf.getFloat(LRU_HARD_CAPACITY_LIMIT_FACTOR_CONFIG_NAME, DEFAULT_HARD_CAPACITY_LIMIT_FACTOR),
      conf.getBoolean(LRU_IN_MEMORY_FORCE_MODE_CONFIG_NAME, DEFAULT_IN_MEMORY_FORCE_MODE),
      conf.getLong(LRU_MAX_BLOCK_SIZE, DEFAULT_MAX_BLOCK_SIZE));
  }

  public LruBlockCache(long maxSize, long blockSize, Configuration conf) {
    this(maxSize, blockSize, true, conf);
  }

  /**
   * Configurable constructor. Use this constructor if not using defaults.
   * @param maxSize             maximum size of this cache, in bytes
   * @param blockSize           expected average size of blocks, in bytes
   * @param evictionThread      whether to run evictions in a bg thread or not
   * @param mapInitialSize      initial size of backing ConcurrentHashMap
   * @param mapLoadFactor       initial load factor of backing ConcurrentHashMap
   * @param mapConcurrencyLevel initial concurrency factor for backing CHM
   * @param minFactor           percentage of total size that eviction will evict until
   * @param acceptableFactor    percentage of total size that triggers eviction
   * @param singleFactor        percentage of total size for single-access blocks
   * @param multiFactor         percentage of total size for multiple-access blocks
   * @param memoryFactor        percentage of total size for in-memory blocks
   */
  public LruBlockCache(long maxSize, long blockSize, boolean evictionThread, int mapInitialSize,
    float mapLoadFactor, int mapConcurrencyLevel, float minFactor, float acceptableFactor,
    float singleFactor, float multiFactor, float memoryFactor, float hardLimitFactor,
    boolean forceInMemory, long maxBlockSize) {
    this.maxBlockSize = maxBlockSize;
    if (
      singleFactor + multiFactor + memoryFactor != 1 || singleFactor < 0 || multiFactor < 0
        || memoryFactor < 0
    ) {
      throw new IllegalArgumentException(
        "Single, multi, and memory factors " + " should be non-negative and total 1.0");
    }
    if (minFactor >= acceptableFactor) {
      throw new IllegalArgumentException("minFactor must be smaller than acceptableFactor");
    }
    if (minFactor >= 1.0f || acceptableFactor >= 1.0f) {
      throw new IllegalArgumentException("all factors must be < 1");
    }
    this.maxSize = maxSize;
    this.blockSize = blockSize;
    this.forceInMemory = forceInMemory;
    map = new ConcurrentHashMap<>(mapInitialSize, mapLoadFactor, mapConcurrencyLevel);
    this.minFactor = minFactor;
    this.acceptableFactor = acceptableFactor;
    this.singleFactor = singleFactor;
    this.multiFactor = multiFactor;
    this.memoryFactor = memoryFactor;
    this.stats = new CacheStats(this.getClass().getSimpleName());
    this.count = new AtomicLong(0);
    this.elements = new AtomicLong(0);
    this.overhead = calculateOverhead(maxSize, blockSize, mapConcurrencyLevel);
    this.size = new AtomicLong(this.overhead);
    this.hardCapacityLimitFactor = hardLimitFactor;
    if (evictionThread) {
      this.evictionThread = new EvictionThread(this);
      this.evictionThread.start(); // FindBugs SC_START_IN_CTOR
    } else {
      this.evictionThread = null;
    }
    // TODO: Add means of turning this off. Bit obnoxious running thread just to make a log
    // every five minutes.
    this.scheduleThreadPool.scheduleAtFixedRate(new StatisticsThread(this), STAT_THREAD_PERIOD,
      STAT_THREAD_PERIOD, TimeUnit.SECONDS);
  }

  @Override
  public void setVictimCache(BlockCache victimCache) {
    if (victimHandler != null) {
      throw new IllegalArgumentException("The victim cache has already been set");
    }
    victimHandler = requireNonNull(victimCache);
  }

  @Override
  public void setMaxSize(long maxSize) {
    this.maxSize = maxSize;
    if (this.size.get() > acceptableSize() && !evictionInProgress) {
      runEviction();
    }
  }

  /**
   * The block cached in LRUBlockCache will always be an heap block: on the one side, the heap
   * access will be more faster then off-heap, the small index block or meta block cached in
   * CombinedBlockCache will benefit a lot. on other side, the LRUBlockCache size is always
   * calculated based on the total heap size, if caching an off-heap block in LRUBlockCache, the
   * heap size will be messed up. Here we will clone the block into an heap block if it's an
   * off-heap block, otherwise just use the original block. The key point is maintain the refCnt of
   * the block (HBASE-22127): 

   * 1. if cache the cloned heap block, its refCnt is an totally new one, it's easy to handle; 

   * 2. if cache the original heap block, we're sure that it won't be tracked in ByteBuffAllocator's
   * reservoir, if both RPC and LRUBlockCache release the block, then it can be garbage collected by
   * JVM, so need a retain here.
   * @param buf the original block
   * @return an block with an heap memory backend.
   */
  private Cacheable asReferencedHeapBlock(Cacheable buf) {
    if (buf instanceof HFileBlock) {
      HFileBlock blk = ((HFileBlock) buf);
      if (blk.isSharedMem()) {
        return HFileBlock.deepCloneOnHeap(blk);
      }
    }
    // The block will be referenced by this LRUBlockCache, so should increase its refCnt here.
    return buf.retain();
  }

  // BlockCache implementation

  /**
   * Cache the block with the specified name and buffer.
   * 

   * It is assumed this will NOT be called on an already cached block. In rare cases (HBASE-8547)
   * this can happen, for which we compare the buffer contents.
   * @param cacheKey block's cache key
   * @param buf      block buffer
   * @param inMemory if block is in-memory
   */
  @Override
  public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf, boolean inMemory) {
    if (buf.heapSize() > maxBlockSize) {
      // If there are a lot of blocks that are too
      // big this can make the logs way too noisy.
      // So we log 2%
      if (stats.failInsert() % 50 == 0) {
        LOG.warn("Trying to cache too large a block " + cacheKey.getHfileName() + " @ "
          + cacheKey.getOffset() + " is " + buf.heapSize() + " which is larger than "
          + maxBlockSize);
      }
      return;
    }

    LruCachedBlock cb = map.get(cacheKey);
    if (cb != null && !BlockCacheUtil.shouldReplaceExistingCacheBlock(this, cacheKey, buf)) {
      return;
    }
    long currentSize = size.get();
    long currentAcceptableSize = acceptableSize();
    long hardLimitSize = (long) (hardCapacityLimitFactor * currentAcceptableSize);
    if (currentSize >= hardLimitSize) {
      stats.failInsert();
      if (LOG.isTraceEnabled()) {
        LOG.trace("LruBlockCache current size " + StringUtils.byteDesc(currentSize)
          + " has exceeded acceptable size " + StringUtils.byteDesc(currentAcceptableSize) + "."
          + " The hard limit size is " + StringUtils.byteDesc(hardLimitSize)
          + ", failed to put cacheKey:" + cacheKey + " into LruBlockCache.");
      }
      if (!evictionInProgress) {
        runEviction();
      }
      return;
    }
    // Ensure that the block is an heap one.
    buf = asReferencedHeapBlock(buf);
    cb = new LruCachedBlock(cacheKey, buf, count.incrementAndGet(), inMemory);
    long newSize = updateSizeMetrics(cb, false);
    map.put(cacheKey, cb);
    long val = elements.incrementAndGet();
    if (buf.getBlockType().isBloom()) {
      bloomBlockElements.increment();
    } else if (buf.getBlockType().isIndex()) {
      indexBlockElements.increment();
    } else if (buf.getBlockType().isData()) {
      dataBlockElements.increment();
    }
    if (LOG.isTraceEnabled()) {
      long size = map.size();
      assertCounterSanity(size, val);
    }
    if (newSize > currentAcceptableSize && !evictionInProgress) {
      runEviction();
    }
  }

  /**
   * Sanity-checking for parity between actual block cache content and metrics. Intended only for
   * use with TRACE level logging and -ea JVM.
   */
  private static void assertCounterSanity(long mapSize, long counterVal) {
    if (counterVal < 0) {
      LOG.trace("counterVal overflow. Assertions unreliable. counterVal=" + counterVal
        + ", mapSize=" + mapSize);
      return;
    }
    if (mapSize < Integer.MAX_VALUE) {
      double pct_diff = Math.abs((((double) counterVal) / ((double) mapSize)) - 1.);
      if (pct_diff > 0.05) {
        LOG.trace("delta between reported and actual size > 5%. counterVal=" + counterVal
          + ", mapSize=" + mapSize);
      }
    }
  }

  /**
   * Cache the block with the specified name and buffer.
   * 

   * TODO after HBASE-22005, we may cache an block which allocated from off-heap, but our LRU cache
   * sizing is based on heap size, so we should handle this in HBASE-22127. It will introduce an
   * switch whether make the LRU on-heap or not, if so we may need copy the memory to on-heap,
   * otherwise the caching size is based on off-heap.
   * @param cacheKey block's cache key
   * @param buf      block buffer
   */
  @Override
  public void cacheBlock(BlockCacheKey cacheKey, Cacheable buf) {
    cacheBlock(cacheKey, buf, false);
  }

  /**
   * Helper function that updates the local size counter and also updates any per-cf or
   * per-blocktype metrics it can discern from given {@link LruCachedBlock}
   */
  private long updateSizeMetrics(LruCachedBlock cb, boolean evict) {
    long heapsize = cb.heapSize();
    BlockType bt = cb.getBuffer().getBlockType();
    if (evict) {
      heapsize *= -1;
    }
    if (bt != null) {
      if (bt.isBloom()) {
        bloomBlockSize.add(heapsize);
      } else if (bt.isIndex()) {
        indexBlockSize.add(heapsize);
      } else if (bt.isData()) {
        dataBlockSize.add(heapsize);
      }
    }
    return size.addAndGet(heapsize);
  }

  /**
   * Get the buffer of the block with the specified name.
   * @param cacheKey           block's cache key
   * @param caching            true if the caller caches blocks on cache misses
   * @param repeat             Whether this is a repeat lookup for the same block (used to avoid
   *                           double counting cache misses when doing double-check locking)
   * @param updateCacheMetrics Whether to update cache metrics or not
   * @return buffer of specified cache key, or null if not in cache
   */
  @Override
  public Cacheable getBlock(BlockCacheKey cacheKey, boolean caching, boolean repeat,
    boolean updateCacheMetrics) {
    // Note: 'map' must be a ConcurrentHashMap or the supplier may be invoked more than once.
    LruCachedBlock cb = map.computeIfPresent(cacheKey, (key, val) -> {
      // It will be referenced by RPC path, so increase here. NOTICE: Must do the retain inside
      // this block. because if retain outside the map#computeIfPresent, the evictBlock may remove
      // the block and release, then we're retaining a block with refCnt=0 which is disallowed.
      // see HBASE-22422.
      val.getBuffer().retain();
      return val;
    });
    if (cb == null) {
      if (!repeat && updateCacheMetrics) {
        stats.miss(caching, cacheKey.isPrimary(), cacheKey.getBlockType());
      }
      // If there is another block cache then try and read there.
      // However if this is a retry ( second time in double checked locking )
      // And it's already a miss then the l2 will also be a miss.
      if (victimHandler != null && !repeat) {
        // The handler will increase result's refCnt for RPC, so need no extra retain.
        Cacheable result = victimHandler.getBlock(cacheKey, caching, repeat, updateCacheMetrics);
        // Promote this to L1.
        if (result != null) {
          if (caching) {
            cacheBlock(cacheKey, result, /* inMemory = */ false);
          }
        }
        return result;
      }
      return null;
    }
    if (updateCacheMetrics) {
      stats.hit(caching, cacheKey.isPrimary(), cacheKey.getBlockType());
    }
    cb.access(count.incrementAndGet());
    return cb.getBuffer();
  }

  /**
   * Whether the cache contains block with specified cacheKey
   * @return true if contains the block
   */
  @Override
  public boolean containsBlock(BlockCacheKey cacheKey) {
    return map.containsKey(cacheKey);
  }

  @Override
  public boolean evictBlock(BlockCacheKey cacheKey) {
    LruCachedBlock cb = map.get(cacheKey);
    return cb != null && evictBlock(cb, false) > 0;
  }

  /**
   * Evicts all blocks for a specific HFile. This is an expensive operation implemented as a
   * linear-time search through all blocks in the cache. Ideally this should be a search in a
   * log-access-time map.
   * 

   * This is used for evict-on-close to remove all blocks of a specific HFile.
   * @return the number of blocks evicted
   */
  @Override
  public int evictBlocksByHfileName(String hfileName) {
    int numEvicted = 0;
    for (BlockCacheKey key : map.keySet()) {
      if (key.getHfileName().equals(hfileName)) {
        if (evictBlock(key)) {
          ++numEvicted;
        }
      }
    }
    if (victimHandler != null) {
      numEvicted += victimHandler.evictBlocksByHfileName(hfileName);
    }
    return numEvicted;
  }

  /**
   * Evict the block, and it will be cached by the victim handler if exists && block may be
   * read again later
   * @param evictedByEvictionProcess true if the given block is evicted by EvictionThread
   * @return the heap size of evicted block
   */
  protected long evictBlock(LruCachedBlock block, boolean evictedByEvictionProcess) {
    final MutableBoolean evicted = new MutableBoolean(false);
    // Note: 'map' must be a ConcurrentHashMap or the supplier may be invoked more than once.
    map.computeIfPresent(block.getCacheKey(), (k, v) -> {
      // Run the victim handler before we remove the mapping in the L1 map. It must complete
      // quickly because other removal or insertion operations can be blocked in the meantime.
      if (evictedByEvictionProcess && victimHandler != null) {
        victimHandler.cacheBlock(k, v.getBuffer());
      }
      // Decrease the block's reference count, and if refCount is 0, then it'll auto-deallocate. DO
      // NOT move this up because if we do that then the victimHandler may access the buffer with
      // refCnt = 0 which is disallowed.
      v.getBuffer().release();
      evicted.setTrue();
      // By returning null from the supplier we remove the mapping from the L1 map.
      return null;
    });
    // If we didn't find anything to evict there is nothing more to do here.
    if (evicted.isFalse()) {
      return 0;
    }
    // We evicted the block so update L1 statistics.
    updateSizeMetrics(block, true);
    long val = elements.decrementAndGet();
    if (LOG.isTraceEnabled()) {
      long size = map.size();
      assertCounterSanity(size, val);
    }
    BlockType bt = block.getBuffer().getBlockType();
    if (bt.isBloom()) {
      bloomBlockElements.decrement();
    } else if (bt.isIndex()) {
      indexBlockElements.decrement();
    } else if (bt.isData()) {
      dataBlockElements.decrement();
    }
    if (evictedByEvictionProcess) {
      // When the eviction of the block happened because of invalidation of HFiles, no need to
      // update the stats counter.
      stats.evicted(block.getCachedTime(), block.getCacheKey().isPrimary());
    }
    return block.heapSize();
  }

  /**
   * Multi-threaded call to run the eviction process.
   */
  private void runEviction() {
    if (evictionThread == null || !evictionThread.isGo()) {
      evict();
    } else {
      evictionThread.evict();
    }
  }

  boolean isEvictionInProgress() {
    return evictionInProgress;
  }

  long getOverhead() {
    return overhead;
  }

  /**
   * Eviction method.
   */
  void evict() {

    // Ensure only one eviction at a time
    if (!evictionLock.tryLock()) {
      return;
    }

    try {
      evictionInProgress = true;
      long currentSize = this.size.get();
      long bytesToFree = currentSize - minSize();

      if (LOG.isTraceEnabled()) {
        LOG.trace("Block cache LRU eviction started; Attempting to free "
          + StringUtils.byteDesc(bytesToFree) + " of total=" + StringUtils.byteDesc(currentSize));
      }

      if (bytesToFree <= 0) {
        return;
      }

      // Instantiate priority buckets
      BlockBucket bucketSingle = new BlockBucket("single", bytesToFree, blockSize, singleSize());
      BlockBucket bucketMulti = new BlockBucket("multi", bytesToFree, blockSize, multiSize());
      BlockBucket bucketMemory = new BlockBucket("memory", bytesToFree, blockSize, memorySize());

      // Scan entire map putting into appropriate buckets
      for (LruCachedBlock cachedBlock : map.values()) {
        switch (cachedBlock.getPriority()) {
          case SINGLE: {
            bucketSingle.add(cachedBlock);
            break;
          }
          case MULTI: {
            bucketMulti.add(cachedBlock);
            break;
          }
          case MEMORY: {
            bucketMemory.add(cachedBlock);
            break;
          }
        }
      }

      long bytesFreed = 0;
      if (forceInMemory || memoryFactor > 0.999f) {
        long s = bucketSingle.totalSize();
        long m = bucketMulti.totalSize();
        if (bytesToFree > (s + m)) {
          // this means we need to evict blocks in memory bucket to make room,
          // so the single and multi buckets will be emptied
          bytesFreed = bucketSingle.free(s);
          bytesFreed += bucketMulti.free(m);
          if (LOG.isTraceEnabled()) {
            LOG.trace(
              "freed " + StringUtils.byteDesc(bytesFreed) + " from single and multi buckets");
          }
          bytesFreed += bucketMemory.free(bytesToFree - bytesFreed);
          if (LOG.isTraceEnabled()) {
            LOG.trace(
              "freed " + StringUtils.byteDesc(bytesFreed) + " total from all three buckets ");
          }
        } else {
          // this means no need to evict block in memory bucket,
          // and we try best to make the ratio between single-bucket and
          // multi-bucket is 1:2
          long bytesRemain = s + m - bytesToFree;
          if (3 * s <= bytesRemain) {
            // single-bucket is small enough that no eviction happens for it
            // hence all eviction goes from multi-bucket
            bytesFreed = bucketMulti.free(bytesToFree);
          } else if (3 * m <= 2 * bytesRemain) {
            // multi-bucket is small enough that no eviction happens for it
            // hence all eviction goes from single-bucket
            bytesFreed = bucketSingle.free(bytesToFree);
          } else {
            // both buckets need to evict some blocks
            bytesFreed = bucketSingle.free(s - bytesRemain / 3);
            if (bytesFreed < bytesToFree) {
              bytesFreed += bucketMulti.free(bytesToFree - bytesFreed);
            }
          }
        }
      } else {
        PriorityQueue bucketQueue = new PriorityQueue<>(3);

        bucketQueue.add(bucketSingle);
        bucketQueue.add(bucketMulti);
        bucketQueue.add(bucketMemory);

        int remainingBuckets = bucketQueue.size();

        BlockBucket bucket;
        while ((bucket = bucketQueue.poll()) != null) {
          long overflow = bucket.overflow();
          if (overflow > 0) {
            long bucketBytesToFree =
              Math.min(overflow, (bytesToFree - bytesFreed) / remainingBuckets);
            bytesFreed += bucket.free(bucketBytesToFree);
          }
          remainingBuckets--;
        }
      }
      if (LOG.isTraceEnabled()) {
        long single = bucketSingle.totalSize();
        long multi = bucketMulti.totalSize();
        long memory = bucketMemory.totalSize();
        LOG.trace(
          "Block cache LRU eviction completed; " + "freed=" + StringUtils.byteDesc(bytesFreed)
            + ", " + "total=" + StringUtils.byteDesc(this.size.get()) + ", " + "single="
            + StringUtils.byteDesc(single) + ", " + "multi=" + StringUtils.byteDesc(multi) + ", "
            + "memory=" + StringUtils.byteDesc(memory));
      }
    } finally {
      stats.evict();
      evictionInProgress = false;
      evictionLock.unlock();
    }
  }

  @Override
  public String toString() {
    return MoreObjects.toStringHelper(this).add("blockCount", getBlockCount())
      .add("currentSize", StringUtils.byteDesc(getCurrentSize()))
      .add("freeSize", StringUtils.byteDesc(getFreeSize()))
      .add("maxSize", StringUtils.byteDesc(getMaxSize()))
      .add("heapSize", StringUtils.byteDesc(heapSize()))
      .add("minSize", StringUtils.byteDesc(minSize())).add("minFactor", minFactor)
      .add("multiSize", StringUtils.byteDesc(multiSize())).add("multiFactor", multiFactor)
      .add("singleSize", StringUtils.byteDesc(singleSize())).add("singleFactor", singleFactor)
      .toString();
  }

  /**
   * Used to group blocks into priority buckets. There will be a BlockBucket for each priority
   * (single, multi, memory). Once bucketed, the eviction algorithm takes the appropriate number of
   * elements out of each according to configuration parameters and their relatives sizes.
   */
  private class BlockBucket implements Comparable {

    private final String name;
    private LruCachedBlockQueue queue;
    private long totalSize = 0;
    private long bucketSize;

    public BlockBucket(String name, long bytesToFree, long blockSize, long bucketSize) {
      this.name = name;
      this.bucketSize = bucketSize;
      queue = new LruCachedBlockQueue(bytesToFree, blockSize);
      totalSize = 0;
    }

    public void add(LruCachedBlock block) {
      totalSize += block.heapSize();
      queue.add(block);
    }

    public long free(long toFree) {
      if (LOG.isTraceEnabled()) {
        LOG.trace("freeing " + StringUtils.byteDesc(toFree) + " from " + this);
      }
      LruCachedBlock cb;
      long freedBytes = 0;
      while ((cb = queue.pollLast()) != null) {
        freedBytes += evictBlock(cb, true);
        if (freedBytes >= toFree) {
          return freedBytes;
        }
      }
      if (LOG.isTraceEnabled()) {
        LOG.trace("freed " + StringUtils.byteDesc(freedBytes) + " from " + this);
      }
      return freedBytes;
    }

    public long overflow() {
      return totalSize - bucketSize;
    }

    public long totalSize() {
      return totalSize;
    }

    @Override
    public int compareTo(BlockBucket that) {
      return Long.compare(this.overflow(), that.overflow());
    }

    @Override
    public boolean equals(Object that) {
      if (that == null || !(that instanceof BlockBucket)) {
        return false;
      }
      return compareTo((BlockBucket) that) == 0;
    }

    @Override
    public int hashCode() {
      return Objects.hashCode(name, bucketSize, queue, totalSize);
    }

    @Override
    public String toString() {
      return MoreObjects.toStringHelper(this).add("name", name)
        .add("totalSize", StringUtils.byteDesc(totalSize))
        .add("bucketSize", StringUtils.byteDesc(bucketSize)).toString();
    }
  }

  /**
   * Get the maximum size of this cache.
   * @return max size in bytes
   */

  @Override
  public long getMaxSize() {
    return this.maxSize;
  }

  @Override
  public long getCurrentSize() {
    return this.size.get();
  }

  @Override
  public long getCurrentDataSize() {
    return this.dataBlockSize.sum();
  }

  public long getCurrentIndexSize() {
    return this.indexBlockSize.sum();
  }

  public long getCurrentBloomSize() {
    return this.bloomBlockSize.sum();
  }

  @Override
  public long getFreeSize() {
    return getMaxSize() - getCurrentSize();
  }

  @Override
  public long size() {
    return getMaxSize();
  }

  @Override
  public long getBlockCount() {
    return this.elements.get();
  }

  @Override
  public long getDataBlockCount() {
    return this.dataBlockElements.sum();
  }

  public long getIndexBlockCount() {
    return this.indexBlockElements.sum();
  }

  public long getBloomBlockCount() {
    return this.bloomBlockElements.sum();
  }

  EvictionThread getEvictionThread() {
    return this.evictionThread;
  }

  /*
   * Eviction thread. Sits in waiting state until an eviction is triggered when the cache size grows
   * above the acceptable level.
 Thread is triggered into action by {@link
   * LruBlockCache#runEviction()}
   */
  static class EvictionThread extends Thread {

    private WeakReference cache;
    private volatile boolean go = true;
    // flag set after enter the run method, used for test
    private boolean enteringRun = false;

    public EvictionThread(LruBlockCache cache) {
      super(Thread.currentThread().getName() + ".LruBlockCache.EvictionThread");
      setDaemon(true);
      this.cache = new WeakReference<>(cache);
    }

    @Override
    public void run() {
      enteringRun = true;
      while (this.go) {
        synchronized (this) {
          try {
            this.wait(1000 * 10/* Don't wait for ever */);
          } catch (InterruptedException e) {
            LOG.warn("Interrupted eviction thread ", e);
            Thread.currentThread().interrupt();
          }
        }
        LruBlockCache cache = this.cache.get();
        if (cache == null) {
          this.go = false;
          break;
        }
        cache.evict();
      }
    }

    @edu.umd.cs.findbugs.annotations.SuppressWarnings(value = "NN_NAKED_NOTIFY",
        justification = "This is what we want")
    public void evict() {
      synchronized (this) {
        this.notifyAll();
      }
    }

    synchronized void shutdown() {
      this.go = false;
      this.notifyAll();
    }

    public boolean isGo() {
      return go;
    }

    /**
     * Used for the test.
     */
    boolean isEnteringRun() {
      return this.enteringRun;
    }
  }

  /*
   * Statistics thread. Periodically prints the cache statistics to the log.
   */
  static class StatisticsThread extends Thread {

    private final LruBlockCache lru;

    public StatisticsThread(LruBlockCache lru) {
      super("LruBlockCacheStats");
      setDaemon(true);
      this.lru = lru;
    }

    @Override
    public void run() {
      lru.logStats();
    }
  }

  public void logStats() {
    // Log size
    long totalSize = heapSize();
    long freeSize = maxSize - totalSize;
    LruBlockCache.LOG.info("totalSize=" + StringUtils.byteDesc(totalSize) + ", " + "freeSize="
      + StringUtils.byteDesc(freeSize) + ", " + "max=" + StringUtils.byteDesc(this.maxSize) + ", "
      + "blockCount=" + getBlockCount() + ", " + "accesses=" + stats.getRequestCount() + ", "
      + "hits=" + stats.getHitCount() + ", " + "hitRatio="
      + (stats.getHitCount() == 0
        ? "0"
        : (StringUtils.formatPercent(stats.getHitRatio(), 2) + ", "))
      + ", " + "cachingAccesses=" + stats.getRequestCachingCount() + ", " + "cachingHits="
      + stats.getHitCachingCount() + ", " + "cachingHitsRatio="
      + (stats.getHitCachingCount() == 0
        ? "0,"
        : (StringUtils.formatPercent(stats.getHitCachingRatio(), 2) + ", "))
      + "evictions=" + stats.getEvictionCount() + ", " + "evicted=" + stats.getEvictedCount() + ", "
      + "evictedPerRun=" + stats.evictedPerEviction());
  }

  /**
   * Get counter statistics for this cache.
   * 
   * Includes: total accesses, hits, misses, evicted blocks, and runs of the eviction processes.
   */
  @Override
  public CacheStats getStats() {
    return this.stats;
  }

  public final static long CACHE_FIXED_OVERHEAD =
    ClassSize.estimateBase(LruBlockCache.class, false);

  @Override
  public long heapSize() {
    return getCurrentSize();
  }

  private static long calculateOverhead(long maxSize, long blockSize, int concurrency) {
    // FindBugs ICAST_INTEGER_MULTIPLY_CAST_TO_LONG
    return CACHE_FIXED_OVERHEAD + ClassSize.CONCURRENT_HASHMAP
      + ((long) Math.ceil(maxSize * 1.2 / blockSize) * ClassSize.CONCURRENT_HASHMAP_ENTRY)
      + ((long) concurrency * ClassSize.CONCURRENT_HASHMAP_SEGMENT);
  }

  @Override
  public Iterator iterator() {
    final Iterator iterator = map.values().iterator();

    return new Iterator() {
      private final long now = System.nanoTime();

      @Override
      public boolean hasNext() {
        return iterator.hasNext();
      }

      @Override
      public CachedBlock next() {
        final LruCachedBlock b = iterator.next();
        return new CachedBlock() {
          @Override
          public String toString() {
            return BlockCacheUtil.toString(this, now);
          }

          @Override
          public BlockPriority getBlockPriority() {
            return b.getPriority();
          }

          @Override
          public BlockType getBlockType() {
            return b.getBuffer().getBlockType();
          }

          @Override
          public long getOffset() {
            return b.getCacheKey().getOffset();
          }

          @Override
          public long getSize() {
            return b.getBuffer().heapSize();
          }

          @Override
          public long getCachedTime() {
            return b.getCachedTime();
          }

          @Override
          public String getFilename() {
            return b.getCacheKey().getHfileName();
          }

          @Override
          public int compareTo(CachedBlock other) {
            int diff = this.getFilename().compareTo(other.getFilename());
            if (diff != 0) {
              return diff;
            }
            diff = Long.compare(this.getOffset(), other.getOffset());
            if (diff != 0) {
              return diff;
            }
            if (other.getCachedTime() < 0 || this.getCachedTime() < 0) {
              throw new IllegalStateException(this.getCachedTime() + ", " + other.getCachedTime());
            }
            return Long.compare(other.getCachedTime(), this.getCachedTime());
          }

          @Override
          public int hashCode() {
            return b.hashCode();
          }

          @Override
          public boolean equals(Object obj) {
            if (obj instanceof CachedBlock) {
              CachedBlock cb = (CachedBlock) obj;
              return compareTo(cb) == 0;
            } else {
              return false;
            }
          }
        };
      }

      @Override
      public void remove() {
        throw new UnsupportedOperationException();
      }
    };
  }

  // Simple calculators of sizes given factors and maxSize

  long acceptableSize() {
    return (long) Math.floor(this.maxSize * this.acceptableFactor);
  }

  private long minSize() {
    return (long) Math.floor(this.maxSize * this.minFactor);
  }

  private long singleSize() {
    return (long) Math.floor(this.maxSize * this.singleFactor * this.minFactor);
  }

  private long multiSize() {
    return (long) Math.floor(this.maxSize * this.multiFactor * this.minFactor);
  }

  private long memorySize() {
    return (long) Math.floor(this.maxSize * this.memoryFactor * this.minFactor);
  }

  @Override
  public void shutdown() {
    if (victimHandler != null) {
      victimHandler.shutdown();
    }
    this.scheduleThreadPool.shutdown();
    for (int i = 0; i < 10; i++) {
      if (!this.scheduleThreadPool.isShutdown()) {
        try {
          Thread.sleep(10);
        } catch (InterruptedException e) {
          LOG.warn("Interrupted while sleeping");
          Thread.currentThread().interrupt();
          break;
        }
      }
    }

    if (!this.scheduleThreadPool.isShutdown()) {
      List runnables = this.scheduleThreadPool.shutdownNow();
      LOG.debug("Still running " + runnables);
    }
    this.evictionThread.shutdown();
  }

  /** Clears the cache. Used in tests. */
  public void clearCache() {
    this.map.clear();
    this.elements.set(0);
  }

  /**
   * Used in testing. May be very inefficient.
   * @return the set of cached file names
   */
  SortedSet getCachedFileNamesForTest() {
    SortedSet fileNames = new TreeSet<>();
    for (BlockCacheKey cacheKey : map.keySet()) {
      fileNames.add(cacheKey.getHfileName());
    }
    return fileNames;
  }

  public Map getEncodingCountsForTest() {
    Map counts = new EnumMap<>(DataBlockEncoding.class);
    for (LruCachedBlock block : map.values()) {
      DataBlockEncoding encoding = ((HFileBlock) block.getBuffer()).getDataBlockEncoding();
      Integer count = counts.get(encoding);
      counts.put(encoding, (count == null ? 0 : count) + 1);
    }
    return counts;
  }

  Map getMapForTests() {
    return map;
  }

  @Override
  public BlockCache[] getBlockCaches() {
    if (victimHandler != null) {
      return new BlockCache[] { this, this.victimHandler };
    }
    return null;
  }
}