All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.llap.cache.LowLevelLrfuCachePolicy Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.llap.cache;

import com.google.common.annotations.VisibleForTesting;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.ReentrantLock;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.llap.DebugUtils;
import org.apache.hadoop.hive.llap.cache.LowLevelCache.Priority;
import org.apache.hadoop.hive.llap.io.api.impl.LlapIoImpl;

/**
 * Implementation of the algorithm from "On the Existence of a Spectrum of Policies
 * that Subsumes the Least Recently Used (LRU) and Least Frequently Used (LFU) Policies".
 * Additionally, buffer locking has to be handled (locked buffer cannot be evicted).
 */
public class LowLevelLrfuCachePolicy implements LowLevelCachePolicy {
  private final double lambda;
  private final double f(long x) {
    return Math.pow(0.5, lambda * x);
  }
  private static final double F0 = 1; // f(0) is always 1
  private final double touchPriority(long time, long lastAccess, double previous) {
    return F0 + f(time - lastAccess) * previous;
  }
  private final double expirePriority(long time, long lastAccess, double previous) {
    return f(time - lastAccess) * previous;
  }

  private final AtomicLong timer = new AtomicLong(0);
  /**
   * The heap and list. Currently synchronized on the object, which is not good. If this becomes
   * a problem (which it probably will), we can partition the cache policy, or use some better
   * structure. Heap should not be locked while holding the lock on list.
   * As of now, eviction in most cases will only need the list; locking doesn't do anything;
   * unlocking actually places item in evictable cache - unlocking is done after processing,
   * so this most expensive part (and only access to heap in most cases) will not affect it.
   * Perhaps we should use ConcurrentDoubleLinkedList (in public domain).
   * ONLY LIST REMOVAL is allowed under list lock.
   */
  private final LlapCacheableBuffer[] heap;
  private final ReentrantLock listLock = new ReentrantLock();
  private LlapCacheableBuffer listHead, listTail;
  /** Number of elements. */
  private int heapSize = 0;
  private EvictionListener evictionListener;
  private LlapOomDebugDump parentDebugDump;

  public LowLevelLrfuCachePolicy(Configuration conf) {
    this((int)HiveConf.getSizeVar(conf, ConfVars.LLAP_ALLOCATOR_MIN_ALLOC),
        HiveConf.getSizeVar(conf, ConfVars.LLAP_IO_MEMORY_MAX_SIZE), conf);
  }

  @VisibleForTesting
  public LowLevelLrfuCachePolicy(int minBufferSize, long maxSize, Configuration conf) {
    lambda = HiveConf.getFloatVar(conf, HiveConf.ConfVars.LLAP_LRFU_LAMBDA);
    int maxBuffers = (int)Math.ceil((maxSize * 1.0) / minBufferSize);
    int maxHeapSize = -1;
    if (lambda == 0) {
      maxHeapSize = maxBuffers; // lrfuThreshold is +inf in this case
    } else {
      int lrfuThreshold = (int)((Math.log(1 - Math.pow(0.5, lambda)) / Math.log(0.5)) / lambda);
      maxHeapSize = Math.min(lrfuThreshold, maxBuffers);
    }
    LlapIoImpl.LOG.info("LRFU cache policy with min buffer size {} and lambda {} (heap size {})",
        minBufferSize, lambda, maxHeapSize);

    heap = new LlapCacheableBuffer[maxHeapSize];
    listHead = listTail = null;
  }

  @Override
  public void cache(LlapCacheableBuffer buffer, Priority priority) {
    // LRFU cache policy doesn't store locked blocks. When we cache, the block is locked, so
    // we simply do nothing here. The fact that it was never updated will allow us to add it
    // properly on the first notifyUnlock.
    // We'll do is set priority, to account for the inbound one. No lock - not in heap.
    assert buffer.lastUpdate == -1;
    long time = timer.incrementAndGet();
    buffer.priority = F0;
    buffer.lastUpdate = time;
    if (priority == Priority.HIGH) {
      // This is arbitrary. Note that metadata may come from a big scan and nuke all the data
      // from some small frequently accessed tables, because it gets such a large priority boost
      // to start with. Think of the multiplier as the number of accesses after which the data
      // becomes more important than some random read-once metadata, in a pure-LFU scheme.
      buffer.priority *= 3;
    } else {
      assert priority == Priority.NORMAL;
    }
  }

  @Override
  public void notifyLock(LlapCacheableBuffer buffer) {
    // We do not proactively remove locked items from the heap, and opportunistically try to
    // remove from the list (since eviction is mostly from the list). If eviction stumbles upon
    // a locked item in either, it will remove it from cache; when we unlock, we are going to
    // put it back or update it, depending on whether this has happened. This should cause
    // most of the expensive cache update work to happen in unlock, not blocking processing.
    if (buffer.indexInHeap != LlapCacheableBuffer.IN_LIST) return;
    if (!listLock.tryLock()) return;
    removeFromListAndUnlock(buffer);
  }

  @Override
  public void notifyUnlock(LlapCacheableBuffer buffer) {
    long time = timer.incrementAndGet();
    if (LlapIoImpl.CACHE_LOGGER.isTraceEnabled()) {
      LlapIoImpl.CACHE_LOGGER.trace("Touching {} at {}", buffer, time);
    }
    synchronized (heap) {
      // First, update buffer priority - we have just been using it.
      buffer.priority = (buffer.lastUpdate == -1) ? F0
          : touchPriority(time, buffer.lastUpdate, buffer.priority);
      buffer.lastUpdate = time;
      // Then, if the buffer was in the list, remove it.
      if (buffer.indexInHeap == LlapCacheableBuffer.IN_LIST) {
        listLock.lock();
        removeFromListAndUnlock(buffer);
      }
      // The only concurrent change that can happen when we hold the heap lock is list removal;
      // we have just ensured the item is not in the list, so we have a definite state now.
      if (buffer.indexInHeap >= 0) {
        // The buffer has lived in the heap all along. Restore heap property.
        heapifyDownUnderLock(buffer, time);
      } else if (heapSize == heap.length) {
        // The buffer is not in the (full) heap. Demote the top item of the heap into the list.
        LlapCacheableBuffer demoted = heap[0];
        listLock.lock();
        try {
          assert demoted.indexInHeap == 0; // Noone could have moved it, we have the heap lock.
          demoted.indexInHeap = LlapCacheableBuffer.IN_LIST;
          demoted.prev = null;
          if (listHead != null) {
            demoted.next = listHead;
            listHead.prev = demoted;
            listHead = demoted;
          } else {
            listHead = listTail = demoted;
            demoted.next = null;
          }
        } finally {
          listLock.unlock();
        }
        // Now insert the buffer in its place and restore heap property.
        buffer.indexInHeap = 0;
        heapifyDownUnderLock(buffer, time);
      } else {
        // Heap is not full, add the buffer to the heap and restore heap property up.
        assert heapSize < heap.length : heap.length + " < " + heapSize;
        buffer.indexInHeap = heapSize;
        heapifyUpUnderLock(buffer, time);
        ++heapSize;
      }
    }
  }

  @Override
  public void setEvictionListener(EvictionListener listener) {
    this.evictionListener = listener;
  }

  @Override
  public void setParentDebugDumper(LlapOomDebugDump dumper) {
    this.parentDebugDump = dumper;
  }


  @Override
  public long evictSomeBlocks(long memoryToReserve) {
    // In normal case, we evict the items from the list.
    long evicted = evictFromList(memoryToReserve);
    if (evicted >= memoryToReserve) return evicted;
    // This should not happen unless we are evicting a lot at once, or buffers are large (so
    // there's a small number of buffers and they all live in the heap).
    long time = timer.get();
    while (evicted < memoryToReserve) {
      LlapCacheableBuffer buffer = null;
      synchronized (heap) {
        buffer = evictFromHeapUnderLock(time);
      }
      if (buffer == null) return evicted;
      evicted += buffer.getMemoryUsage();
      evictionListener.notifyEvicted(buffer);
    }
    return evicted;
  }

  @Override
  public int tryEvictContiguousData(int allocationSize, int count) {
    int evicted = evictDataFromList(allocationSize, count);
    count -= evicted;
    if (count > 0) {
      evicted += evictDataFromHeap(timer.get(), count, allocationSize);
    }
    return evicted;
  }

  private long evictFromList(long memoryToReserve) {
    long evicted = 0;
    LlapCacheableBuffer nextCandidate = null, firstCandidate = null;
    listLock.lock();
    // We assume that there are no locked blocks in the list; or if they are, they can be dropped.
    // Therefore we always evict one contiguous sequence from the tail. We can find it in one pass,
    // splice it out and then finalize the eviction outside of the list lock.
    try {
      nextCandidate = firstCandidate = listTail;
      while (evicted < memoryToReserve && nextCandidate != null) {
        if (!nextCandidate.invalidate()) {
          // Locked buffer was in the list - just drop it; will be re-added on unlock.
          LlapCacheableBuffer lockedBuffer = nextCandidate;
          if (firstCandidate == nextCandidate) {
            firstCandidate = nextCandidate.prev;
          }
          nextCandidate = nextCandidate.prev;
          removeFromListUnderLock(lockedBuffer);
          continue;
        }
        // Update the state to removed-from-list, so that parallel notifyUnlock doesn't modify us.
        nextCandidate.indexInHeap = LlapCacheableBuffer.NOT_IN_CACHE;
        evicted += nextCandidate.getMemoryUsage();
        nextCandidate = nextCandidate.prev;
      }
      if (firstCandidate != nextCandidate) {
        if (nextCandidate == null) {
          listHead = listTail = null; // We have evicted the entire list.
        } else {
          // Splice the section that we have evicted out of the list.
          // We have already updated the state above so no need to do that again.
          removeFromListUnderLockNoStateUpdate(nextCandidate.next, firstCandidate);
        }
      }
    } finally {
      listLock.unlock();
    }
    while (firstCandidate != nextCandidate) {
      evictionListener.notifyEvicted(firstCandidate);
      firstCandidate = firstCandidate.prev;
    }
    return evicted;
  }


  private int evictDataFromList(int minSize, int count) {
    int evictedCount = 0;
    // Unlike the normal evictFromList, we don't necessarily evict a sequence of blocks. We won't
    // bother with optimization here and will just evict blocks one by one.
    List evictedBuffers = new ArrayList<>(count);
    listLock.lock();
    try {
      LlapCacheableBuffer candidate = listTail;
      while (evictedCount < count && candidate != null) {
        LlapCacheableBuffer current = candidate;
        candidate = candidate.prev;
        long memUsage = current.getMemoryUsage();
        if (memUsage < minSize || !(current instanceof LlapDataBuffer)) continue;
        if (!current.invalidate()) {
          // Locked buffer was in the list - just drop it; will be re-added on unlock.
          removeFromListUnderLock(current);
          continue;
        }
        // Remove the buffer from the list.
        removeFromListUnderLock(current);
        // This makes granularity assumptions.
        assert memUsage % minSize == 0;
        evictedCount += (memUsage / minSize);
        evictedBuffers.add(current);
      }
    } finally {
      listLock.unlock();
    }
    for (LlapCacheableBuffer buffer : evictedBuffers) {
      evictionListener.notifyEvicted(buffer);
    }
    return evictedCount;
  }


  // Note: rarely called (unless buffers are very large or we evict a lot, or in LFU case).
  private LlapCacheableBuffer evictFromHeapUnderLock(long time) {
    while (true) {
      if (heapSize == 0) return null;
      LlapCacheableBuffer result = evictHeapElementUnderLock(time, 0);
      if (result != null) return result;
    }
  }

  // Note: almost never called (unless buffers are very large or we evict a lot, or LFU).
  private int evictDataFromHeap(long time, int count, int minSize) {
    LlapCacheableBuffer evicted = null;
    int evictedCount = 0;
    synchronized (heap) {
      // Priorities go out of the window here.
      int index = 0;
      while (index < heapSize && evictedCount < count) {
        LlapCacheableBuffer buffer = heap[index];
        long memUsage = buffer.getMemoryUsage();
        if (memUsage < minSize || !(buffer instanceof LlapDataBuffer)) {
          ++index;
          continue;
        }
        LlapCacheableBuffer result = evictHeapElementUnderLock(time, index);
        // Don't advance the index - the buffer has been removed either way.
        if (result != null) {
          assert memUsage % minSize == 0;
          evictedCount += (memUsage / minSize);
          if (evicted != null) {
            evictionListener.notifyEvicted(evicted);
          }
          evicted = result;
        }
      }
    }
    if (evicted != null) {
      evictionListener.notifyEvicted(evicted);
    }
    return evictedCount;
  }
  
  private void heapifyUpUnderLock(LlapCacheableBuffer buffer, long time) {
    // See heapifyDown comment.
    int ix = buffer.indexInHeap;
    double priority = buffer.priority;
    while (true) {
      if (ix == 0) break; // Buffer is at the top of the heap.
      int parentIx = (ix - 1) >>> 1;
      LlapCacheableBuffer parent = heap[parentIx];
      double parentPri = getHeapifyPriority(parent, time);
      if (priority >= parentPri) break;
      heap[ix] = parent;
      parent.indexInHeap = ix;
      ix = parentIx;
    }
    buffer.indexInHeap = ix;
    heap[ix] = buffer;
  }

  private LlapCacheableBuffer evictHeapElementUnderLock(long time, int ix) {
    LlapCacheableBuffer result = heap[ix];
    if (LlapIoImpl.CACHE_LOGGER.isTraceEnabled()) {
      LlapIoImpl.CACHE_LOGGER.info("Evicting {} at {}", result, time);
    }
    result.indexInHeap = LlapCacheableBuffer.NOT_IN_CACHE;
    --heapSize;
    boolean canEvict = result.invalidate();
    if (heapSize > 0) {
      LlapCacheableBuffer newRoot = heap[heapSize];
      newRoot.indexInHeap = ix;
      if (newRoot.lastUpdate != time) {
        newRoot.priority = expirePriority(time, newRoot.lastUpdate, newRoot.priority);
        newRoot.lastUpdate = time;
      }
      heapifyDownUnderLock(newRoot, time);
    }
    // Otherwise we just removed a locked item from heap; unlock will re-add it, we continue.
    return canEvict ? result : null;
  }

  private void heapifyDownUnderLock(LlapCacheableBuffer buffer, long time) {
    // Relative positions of the blocks don't change over time; priorities we expire can only
    // decrease; we only have one block that could have broken heap rule and we always move it
    // down; therefore, we can update priorities of other blocks as we go for part of the heap -
    // we correct any discrepancy w/the parent after expiring priority, and any block we expire
    // the priority for already has lower priority than that of its children.
    int ix = buffer.indexInHeap;
    double priority = buffer.priority;
    while (true) {
      int newIx = moveMinChildUp(ix, time, priority);
      if (newIx == -1) break;
      ix = newIx;
    }
    buffer.indexInHeap = ix;
    heap[ix] = buffer;
  }

  /**
   * Moves the minimum child of targetPos block up to targetPos; optionally compares priorities
   * and terminates if targetPos element has lesser value than either of its children.
   * @return the index of the child that was moved up; -1 if nothing was moved due to absence
   *         of the children, or a failed priority check.
   */
  private int moveMinChildUp(int targetPos, long time, double comparePri) {
    int leftIx = (targetPos << 1) + 1, rightIx = leftIx + 1;
    if (leftIx >= heapSize) return -1; // Buffer is at the leaf node.
    LlapCacheableBuffer left = heap[leftIx], right = null;
    if (rightIx < heapSize) {
      right = heap[rightIx];
    }
    double leftPri = getHeapifyPriority(left, time), rightPri = getHeapifyPriority(right, time);
    if (comparePri >= 0 && comparePri <= leftPri && comparePri <= rightPri) {
      return -1;
    }
    if (leftPri <= rightPri) { // prefer left, cause right might be missing
      heap[targetPos] = left;
      left.indexInHeap = targetPos;
      return leftIx;
    } else {
      heap[targetPos] = right;
      right.indexInHeap = targetPos;
      return rightIx;
    }
  }

  private double getHeapifyPriority(LlapCacheableBuffer buf, long time) {
    if (buf == null) return Double.MAX_VALUE;
    if (buf.lastUpdate != time && time >= 0) {
      buf.priority = expirePriority(time, buf.lastUpdate, buf.priority);
      buf.lastUpdate = time;
    }
    return buf.priority;
  }

  private void removeFromListAndUnlock(LlapCacheableBuffer buffer) {
    try {
      if (buffer.indexInHeap != LlapCacheableBuffer.IN_LIST) return;
      removeFromListUnderLock(buffer);
    } finally {
      listLock.unlock();
    }
  }

  private void removeFromListUnderLock(LlapCacheableBuffer buffer) {
    buffer.indexInHeap = LlapCacheableBuffer.NOT_IN_CACHE;
    boolean isTail = buffer == listTail, isHead = buffer == listHead;
    if ((isTail != (buffer.next == null)) || (isHead != (buffer.prev == null))) {
      debugDumpListOnError(buffer);
      throw new AssertionError("LRFU list is corrupted.");
    }
    if (isTail) {
      listTail = buffer.prev;
    } else {
      buffer.next.prev = buffer.prev;
    }
    if (isHead) {
      listHead = buffer.next;
    } else {
      buffer.prev.next = buffer.next;
    }
  }

  private void removeFromListUnderLockNoStateUpdate(
      LlapCacheableBuffer from, LlapCacheableBuffer to) {
    boolean isToTail = to == listTail, isFromHead = from == listHead;
    if ((isToTail != (to.next == null)) || (isFromHead != (from.prev == null))) {
      debugDumpListOnError(from, to);
      throw new AssertionError("LRFU list is corrupted.");
    }
    if (isToTail) {
      listTail = from.prev;
    } else {
      to.next.prev = from.prev;
    }
    if (isFromHead) {
      listHead = to.next;
    } else {
      from.prev.next = to.next;
    }
  }

  private void debugDumpListOnError(LlapCacheableBuffer... buffers) {
    // Hopefully this will be helpful in case of NPEs.
    StringBuilder listDump = new StringBuilder("Invalid list removal. List: ");
    try {
      dumpList(listDump, listHead, listTail);
      int i = 0;
      for (LlapCacheableBuffer buffer : buffers) {
        listDump.append("; list from the buffer #").append(i).append(" being removed: ");
        dumpList(listDump, buffer, null);
      }
    } catch (Throwable t) {
      LlapIoImpl.LOG.error("Error dumping the lists on error", t);
    }
    LlapIoImpl.LOG.error(listDump.toString());
  }

  public String debugDumpHeap() {
    StringBuilder result = new StringBuilder("List: ");
    dumpList(result, listHead, listTail);
    result.append("\nHeap:");
    if (heapSize == 0) {
      result.append(" \n");
      return result.toString();
    }
    result.append("\n");
    int levels = 32 - Integer.numberOfLeadingZeros(heapSize);
    int ix = 0;
    int spacesCount = heap[0].toStringForCache().length() + 3;
    String full = StringUtils.repeat(" ", spacesCount),
        half = StringUtils.repeat(" ", spacesCount / 2);
    int maxWidth = 1 << (levels - 1);
    for (int i = 0; i < levels; ++i) {
      int width = 1 << i;
      int middleGap = (maxWidth - width) / width;
      for (int j = 0; j < (middleGap >>> 1); ++j) {
        result.append(full);
      }
      if ((middleGap & 1) == 1) {
        result.append(half);
      }
      for (int j = 0; j < width && ix < heapSize; ++j, ++ix) {
        if (j != 0) {
          for (int k = 0; k < middleGap; ++k) {
            result.append(full);
          }
          if (middleGap == 0) {
            result.append(" ");
          }
        }
        if ((j & 1) == 0) {
          result.append("(");
        }
        result.append(heap[ix].toStringForCache());
        if ((j & 1) == 1) {
          result.append(")");
        }
      }
      result.append("\n");
    }
    return result.toString();
  }

  private static void dumpList(StringBuilder result,
      LlapCacheableBuffer listHeadLocal, LlapCacheableBuffer listTailLocal) {
    if (listHeadLocal == null) {
      result.append("");
      return;
    }
    LlapCacheableBuffer listItem = listHeadLocal;
    while (listItem.prev != null) {
      listItem = listItem.prev;  // To detect incorrect lists.
    }
    while (listItem != null) {
      result.append(listItem.toStringForCache());
      if (listItem == listTailLocal) {
        result.append("(tail)"); // To detect incorrect lists.
      }
      if (listItem == listHeadLocal) {
        result.append("(head)"); // To detect incorrect lists.
      }
      result.append(" -> ");
      listItem = listItem.next;
    }
  }

  @Override
  public String debugDumpForOom() {
    String result = debugDumpHeap();
    if (parentDebugDump != null) {
      result += "\n" + parentDebugDump.debugDumpForOom();
    }
    return result;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy