All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.metastore.AggregateStatsCache Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hadoop.hive.metastore;

import com.facebook.presto.hive.$internal.org.apache.commons.logging.Log;
import com.facebook.presto.hive.$internal.org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj;
import org.apache.hive.common.util.BloomFilter;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock;

public class AggregateStatsCache {

  private static final Log LOG = LogFactory.getLog(AggregateStatsCache.class.getName());
  private static AggregateStatsCache self = null;

  // Backing store for this cache
  private final ConcurrentHashMap cacheStore;
  // Cache size
  private final int maxCacheNodes;
  // Current nodes in the cache
  private AtomicInteger currentNodes = new AtomicInteger(0);
  // Run the cleaner thread when the cache is maxFull% full
  private final float maxFull;
  // Run the cleaner thread until cache is cleanUntil% occupied
  private final float cleanUntil;
  // Nodes go stale after this
  private final long timeToLiveMs;
  // Max time when waiting for write locks on node list
  private final long maxWriterWaitTime;
  // Max time when waiting for read locks on node list
  private final long maxReaderWaitTime;
  // Maximum number of paritions aggregated per cache node
  private final int maxPartsPerCacheNode;
  // Bloom filter false positive probability
  private final float falsePositiveProbability;
  // Max tolerable variance for matches
  private final float maxVariance;
  // Used to determine if cleaner thread is already running
  private boolean isCleaning = false;
  private AtomicLong cacheHits = new AtomicLong(0);
  private AtomicLong cacheMisses = new AtomicLong(0);
  // To track cleaner metrics
  int numRemovedTTL = 0, numRemovedLRU = 0;

  private AggregateStatsCache(int maxCacheNodes, int maxPartsPerCacheNode, long timeToLiveMs,
      float falsePositiveProbability, float maxVariance, long maxWriterWaitTime,
      long maxReaderWaitTime, float maxFull, float cleanUntil) {
    this.maxCacheNodes = maxCacheNodes;
    this.maxPartsPerCacheNode = maxPartsPerCacheNode;
    this.timeToLiveMs = timeToLiveMs;
    this.falsePositiveProbability = falsePositiveProbability;
    this.maxVariance = maxVariance;
    this.maxWriterWaitTime = maxWriterWaitTime;
    this.maxReaderWaitTime = maxReaderWaitTime;
    this.maxFull = maxFull;
    this.cleanUntil = cleanUntil;
    this.cacheStore = new ConcurrentHashMap();
  }

  public static synchronized AggregateStatsCache getInstance(Configuration conf) {
    if (self == null) {
      int maxCacheNodes =
          HiveConf.getIntVar(conf, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_SIZE);
      // The number of partitions aggregated per cache node
      // If the number of partitions requested is > this value, we'll fetch directly from Metastore
      int maxPartitionsPerCacheNode =
          HiveConf
              .getIntVar(conf, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_PARTITIONS);
      long timeToLiveMs =
          HiveConf.getTimeVar(conf, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_TTL,
              TimeUnit.SECONDS)*1000;
      // False positives probability we are ready to tolerate for the underlying bloom filter
      float falsePositiveProbability =
          HiveConf.getFloatVar(conf, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_FPP);
      // Maximum tolerable variance in number of partitions between cached node and our request
      float maxVariance =
          HiveConf
              .getFloatVar(conf, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_VARIANCE);
      long maxWriterWaitTime =
          HiveConf.getTimeVar(conf,
              HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_WRITER_WAIT,
              TimeUnit.MILLISECONDS);
      long maxReaderWaitTime =
          HiveConf.getTimeVar(conf,
              HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_READER_WAIT,
              TimeUnit.MILLISECONDS);
      float maxFull =
          HiveConf.getFloatVar(conf, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_MAX_FULL);
      float cleanUntil =
          HiveConf.getFloatVar(conf, HiveConf.ConfVars.METASTORE_AGGREGATE_STATS_CACHE_CLEAN_UNTIL);
      self =
          new AggregateStatsCache(maxCacheNodes, maxPartitionsPerCacheNode, timeToLiveMs,
              falsePositiveProbability, maxVariance, maxWriterWaitTime, maxReaderWaitTime, maxFull,
              cleanUntil);
    }
    return self;
  }

  public int getMaxCacheNodes() {
    return maxCacheNodes;
  }

  public int getCurrentNodes() {
    return currentNodes.intValue();
  }

  public float getFullPercent() {
    return (currentNodes.intValue() / (float) maxCacheNodes) * 100;
  }

  public int getMaxPartsPerCacheNode() {
    return maxPartsPerCacheNode;
  }

  public float getFalsePositiveProbability() {
    return falsePositiveProbability;
  }

  public Float getHitRatio() {
    if (cacheHits.longValue() + cacheMisses.longValue() > 0) {
      return (float) (cacheHits.longValue()) / (cacheHits.longValue() + cacheMisses.longValue());
    }
    return null;
  }

  /**
   * Return aggregate stats for a column from the cache or null.
   * While reading from the nodelist for a key, we wait maxReaderWaitTime to acquire the lock,
   * failing which we return a cache miss (i.e. null)
   *
   * @param dbName
   * @param tblName
   * @param colName
   * @param partNames
   * @return
   */
  public AggrColStats get(String dbName, String tblName, String colName, List partNames) {
    // Cache key
    Key key = new Key(dbName, tblName, colName);
    AggrColStatsList candidateList = cacheStore.get(key);
    // No key, or no nodes in candidate list
    if ((candidateList == null) || (candidateList.nodes.size() == 0)) {
      LOG.info("No aggregate stats cached for " + key.toString());
      return null;
    }
    // Find the value object
    // Update the timestamp of the key,value if value matches the criteria
    // Return the value
    AggrColStats match = null;
    boolean isLocked = false;
    try {
      // Try to readlock the candidateList; timeout after maxReaderWaitTime
      isLocked = candidateList.readLock.tryLock(maxReaderWaitTime, TimeUnit.MILLISECONDS);
      if (isLocked) {
        match = findBestMatch(partNames, candidateList.nodes);
      }
      if (match != null) {
        // Ok to not lock the list for this and use a volatile lastAccessTime instead
        candidateList.updateLastAccessTime();
        cacheHits.incrementAndGet();
        LOG.info("Returning aggregate stats from the cache; total hits: " + cacheHits.longValue()
            + ", total misses: " + cacheMisses.longValue() + ", hit ratio: " + getHitRatio());
      }
      else {
        cacheMisses.incrementAndGet();
      }
    } catch (InterruptedException e) {
      LOG.debug(e);
    } finally {
      if (isLocked) {
        candidateList.readLock.unlock();
      }
    }
    return match;
  }

  /**
   * Find the best match using the configurable error tolerance and time to live value
   *
   * @param partNames
   * @param candidates
   * @return best matched node or null
   */
  private AggrColStats findBestMatch(List partNames, List candidates) {
    // Hits, misses tracked for a candidate node
    MatchStats matchStats;
    // MatchStats for each candidate
    Map candidateMatchStats = new HashMap();
    // The final match we intend to return
    AggrColStats bestMatch = null;
    // To compare among potentially multiple matches
    int bestMatchHits = 0;
    int numPartsRequested = partNames.size();
    // 1st pass at marking invalid candidates
    // Checks based on variance and TTL
    // Note: we're not creating a copy of the list for saving memory
    for (AggrColStats candidate : candidates) {
      // Variance check
      if ((float) Math.abs((candidate.getNumPartsCached() - numPartsRequested) / numPartsRequested)
          > maxVariance) {
        continue;
      }
      // TTL check
      if (isExpired(candidate)) {
        continue;
      } else {
        candidateMatchStats.put(candidate, new MatchStats(0, 0));
      }
    }
    // We'll count misses as we iterate
    int maxMisses = (int) maxVariance * numPartsRequested;
    for (String partName : partNames) {
      for (Map.Entry entry : candidateMatchStats.entrySet()) {
        AggrColStats candidate = entry.getKey();
        matchStats = entry.getValue();
        if (candidate.getBloomFilter().test(partName.getBytes())) {
          ++matchStats.hits;
        } else {
          ++matchStats.misses;
        }
        // 2nd pass at removing invalid candidates
        // If misses so far exceed max tolerable misses
        if (matchStats.misses > maxMisses) {
          candidateMatchStats.remove(candidate);
          continue;
        }
        // Check if this is the best match so far
        if (matchStats.hits > bestMatchHits) {
          bestMatch = candidate;
        }
      }
    }
    if (bestMatch != null) {
      // Update the last access time for this node
      bestMatch.updateLastAccessTime();
    }
    return bestMatch;
  }

  /**
   * Add a new node to the cache; may trigger the cleaner thread if the cache is near full capacity.
   * We'll however add the node even if we temporaily exceed maxCacheNodes, because the cleaner
   * will eventually create space from expired nodes or by removing LRU nodes.
   *
   * @param dbName
   * @param tblName
   * @param colName
   * @param numPartsCached
   * @param colStats
   * @param bloomFilter
   */
  // TODO: make add asynchronous: add shouldn't block the higher level calls
  public void add(String dbName, String tblName, String colName, long numPartsCached,
      ColumnStatisticsObj colStats, BloomFilter bloomFilter) {
    // If we have no space in the cache, run cleaner thread
    if (getCurrentNodes() / maxCacheNodes > maxFull) {
      spawnCleaner();
    }
    // Cache key
    Key key = new Key(dbName, tblName, colName);
    // Add new node to the cache
    AggrColStats node = new AggrColStats(numPartsCached, bloomFilter, colStats);
    AggrColStatsList nodeList;
    AggrColStatsList newNodeList = new AggrColStatsList();
    newNodeList.nodes = new ArrayList();
    nodeList = cacheStore.putIfAbsent(key, newNodeList);
    if (nodeList == null) {
      nodeList = newNodeList;
    }
    boolean isLocked = false;
    try {
      isLocked = nodeList.writeLock.tryLock(maxWriterWaitTime, TimeUnit.MILLISECONDS);
      if (isLocked) {
        nodeList.nodes.add(node);
        node.updateLastAccessTime();
        nodeList.updateLastAccessTime();
        currentNodes.getAndIncrement();
      }
    } catch (InterruptedException e) {
      LOG.debug(e);
    } finally {
      if (isLocked) {
        nodeList.writeLock.unlock();
      }
    }
  }

  /**
   * Cleans the expired nodes or removes LRU nodes of the cache,
   * until the cache size reduces to cleanUntil% full.
   */
  private void spawnCleaner() {
    // This spawns a separate thread to walk through the cache and removes expired nodes.
    // Only one cleaner thread should be running at any point.
    synchronized (this) {
      if (isCleaning) {
        return;
      }
      isCleaning = true;
    }
    Thread cleaner = new Thread("AggregateStatsCache-CleanerThread") {
      @Override
      public void run() {
        numRemovedTTL = 0;
        numRemovedLRU = 0;
        long cleanerStartTime = System.currentTimeMillis();
        LOG.info("AggregateStatsCache is " + getFullPercent() + "% full, with "
            + getCurrentNodes() + " nodes; starting cleaner thread");
        try {
          Iterator> mapIterator = cacheStore.entrySet().iterator();
          while (mapIterator.hasNext()) {
            Map.Entry pair =
                (Map.Entry) mapIterator.next();
            AggrColStats node;
            AggrColStatsList candidateList = (AggrColStatsList) pair.getValue();
            List nodes = candidateList.nodes;
            if (nodes.size() == 0) {
              mapIterator.remove();
              continue;
            }
            boolean isLocked = false;
            try {
              isLocked = candidateList.writeLock.tryLock(maxWriterWaitTime, TimeUnit.MILLISECONDS);
              if (isLocked) {
                for (Iterator listIterator = nodes.iterator(); listIterator.hasNext();) {
                  node = listIterator.next();
                  // Remove the node if it has expired
                  if (isExpired(node)) {
                    listIterator.remove();
                    numRemovedTTL++;
                    currentNodes.getAndDecrement();
                  }
                }
              }
            } catch (InterruptedException e) {
              LOG.debug(e);
            } finally {
              if (isLocked) {
                candidateList.writeLock.unlock();
              }
            }
            // We want to make sure this runs at a low priority in the background
            Thread.yield();
          }
          // If the expired nodes did not result in cache being cleanUntil% in size,
          // start removing LRU nodes
          while (getCurrentNodes() / maxCacheNodes > cleanUntil) {
            evictOneNode();
          }
        } finally {
          isCleaning = false;
          LOG.info("Stopping cleaner thread; AggregateStatsCache is now " + getFullPercent()
              + "% full, with " + getCurrentNodes() + " nodes");
          LOG.info("Number of expired nodes removed: " + numRemovedTTL);
          LOG.info("Number of LRU nodes removed: " + numRemovedLRU);
          LOG.info("Cleaner ran for: " + (System.currentTimeMillis() - cleanerStartTime) + "ms");
        }
      }
    };
    cleaner.setPriority(Thread.MIN_PRIORITY);
    cleaner.setDaemon(true);
    cleaner.start();
  }

  /**
   * Evict an LRU node or expired node whichever we find first
   */
  private void evictOneNode() {
    // Get the LRU key, value
    Key lruKey = null;
    AggrColStatsList lruValue = null;
    for (Map.Entry entry : cacheStore.entrySet()) {
      Key key = entry.getKey();
      AggrColStatsList value = entry.getValue();
      if (lruKey == null) {
        lruKey = key;
        lruValue = value;
        continue;
      }
      if ((value.lastAccessTime < lruValue.lastAccessTime) && !(value.nodes.isEmpty())) {
        lruKey = key;
        lruValue = value;
      }
    }
    // Now delete a node for this key's list
    AggrColStatsList candidateList = cacheStore.get(lruKey);
    boolean isLocked = false;
    try {
      isLocked = candidateList.writeLock.tryLock(maxWriterWaitTime, TimeUnit.MILLISECONDS);
      if (isLocked) {
        AggrColStats candidate;
        AggrColStats lruNode = null;
        int currentIndex = 0;
        int deleteIndex = 0;
        for (Iterator iterator = candidateList.nodes.iterator(); iterator.hasNext();) {
          candidate = iterator.next();
          // Since we have to create space for 1, if we find an expired node we will remove it &
          // return
          if (isExpired(candidate)) {
            iterator.remove();
            currentNodes.getAndDecrement();
            numRemovedTTL++;
            return;
          }
          // Sorry, too many ifs but this form looks optimal
          // Update the LRU node from what we've seen so far
          if (lruNode == null) {
            lruNode = candidate;
            ++currentIndex;
            continue;
          }
          if (lruNode != null) {
            if (candidate.lastAccessTime < lruNode.lastAccessTime) {
              lruNode = candidate;
              deleteIndex = currentIndex;
            }
          }
        }
        candidateList.nodes.remove(deleteIndex);
        currentNodes.getAndDecrement();
        numRemovedLRU++;
      }
    } catch (InterruptedException e) {
      LOG.debug(e);
    } finally {
      if (isLocked) {
        candidateList.writeLock.unlock();
      }
    }
  }

  private boolean isExpired(AggrColStats aggrColStats) {
    return (System.currentTimeMillis() - aggrColStats.lastAccessTime) > timeToLiveMs;
  }

  /**
   * Key object for the stats cache hashtable
   */
  static class Key {
    private final String dbName;
    private final String tblName;
    private final String colName;

    Key(String db, String table, String col) {
      // Don't construct an illegal cache key
      if ((db == null) || (table == null) || (col == null)) {
        throw new IllegalArgumentException("dbName, tblName, colName can't be null");
      }
      dbName = db;
      tblName = table;
      colName = col;
    }

    @Override
    public boolean equals(Object other) {
      if ((other == null) || !(other instanceof Key)) {
        return false;
      }
      Key that = (Key) other;
      return dbName.equals(that.dbName) && tblName.equals(that.tblName)
          && colName.equals(that.colName);
    }

    @Override
    public int hashCode() {
      return dbName.hashCode() * 31 + tblName.hashCode() * 31 + colName.hashCode();
    }

    @Override
    public String toString() {
      return "database:" + dbName + ", table:" + tblName + ", column:" + colName;
    }

  }

  static class AggrColStatsList {
    // TODO: figure out a better data structure for node list(?)
    private List nodes = new ArrayList();
    private ReadWriteLock lock = new ReentrantReadWriteLock();
    // Read lock for get operation
    private Lock readLock = lock.readLock();
    // Write lock for add, evict and clean operation
    private Lock writeLock = lock.writeLock();
    // Using volatile instead of locking updates to this variable,
    // since we can rely on approx lastAccessTime but don't want a performance hit
    private volatile long lastAccessTime = 0;

    List getNodes() {
      return nodes;
    }

    void updateLastAccessTime() {
      this.lastAccessTime = System.currentTimeMillis();
    }
  }

  public static class AggrColStats {
    private final long numPartsCached;
    private final BloomFilter bloomFilter;
    private final ColumnStatisticsObj colStats;
    private volatile long lastAccessTime;

    public AggrColStats(long numPartsCached, BloomFilter bloomFilter,
        ColumnStatisticsObj colStats) {
      this.numPartsCached = numPartsCached;
      this.bloomFilter = bloomFilter;
      this.colStats = colStats;
      this.lastAccessTime = System.currentTimeMillis();
    }

    public long getNumPartsCached() {
      return numPartsCached;
    }

    public ColumnStatisticsObj getColStats() {
      return colStats;
    }

    public BloomFilter getBloomFilter() {
      return bloomFilter;
    }

    void updateLastAccessTime() {
      this.lastAccessTime = System.currentTimeMillis();
    }
  }

  /**
   * Intermediate object, used to collect hits & misses for each cache node that is evaluate for an
   * incoming request
   */
  private static class MatchStats {
    private int hits = 0;
    private int misses = 0;

    MatchStats(int hits, int misses) {
      this.hits = hits;
      this.misses = misses;
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy