All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.exec.TopNHash Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec;

import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryMXBean;
import java.util.Arrays;
import java.util.Comparator;
import java.util.TreeMap;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.llap.LlapDaemonInfo;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.io.HiveKey;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.WritableComparator;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;

import com.facebook.presto.hive.$internal.com.google.common.collect.MinMaxPriorityQueue;

/**
 * Stores binary key/value in sorted manner to get top-n key/value
 * TODO: rename to TopNHeap?
 */
public class TopNHash {
  private static final Logger LOG = LoggerFactory.getLogger(TopNHash.class);

  /**
   * For interaction between operator and top-n hash.
   * Currently only used to forward key/values stored in hash.
   */
  public static interface BinaryCollector {
    public void collect(byte[] key, byte[] value, int hash) throws IOException;
  }

  public static final int FORWARD = -1; // Forward the row to reducer as is.
  public static final int EXCLUDE = -2; // Discard the row.
  private static final int MAY_FORWARD = -3; // Vectorized - may forward the row, not sure yet.

  protected BinaryCollector collector;
  protected int topN;

  protected long threshold;   // max heap size
  protected long usage;

  // binary keys, values and hashCodes of rows, lined up by index
  private byte[][] keys;
  private byte[][] values;
  private int[] hashes;
  private int[] distKeyLengths;
  private IndexStore indexes; // The heap over the keys, storing indexes in the array.

  private int evicted; // recently evicted index (used for next key/value)
  private int excluded; // count of excluded rows from previous flush

  // temporary single-batch context used for vectorization
  private int batchNumForwards = 0; // whether current batch has any forwarded keys
  private int[] indexToBatchIndex; // mapping of index (lined up w/keys) to index in the batch
  protected int[] batchIndexToResult; // mapping of index in the batch (linear) to hash result
  protected int batchSize; // Size of the current batch.

  protected boolean isEnabled = false;

  private final Comparator C = new Comparator() {
    public int compare(Integer o1, Integer o2) {
      byte[] key1 = keys[o1];
      byte[] key2 = keys[o2];
      int length1 = distKeyLengths[o1];
      int length2 = distKeyLengths[o2];
      return WritableComparator.compareBytes(key1, 0, length1, key2, 0, length2);
    }
  };

  public void initialize(
    int topN, float memUsage, boolean isMapGroupBy, BinaryCollector collector, final OperatorDesc conf,
    final Configuration hconf) {
    assert topN >= 0 && memUsage > 0;
    assert !this.isEnabled;
    this.isEnabled = false;
    this.topN = topN;
    this.collector = collector;
    if (topN == 0) {
      isEnabled = true;
      return; // topN == 0 will cause a short-circuit, don't need any initialization
    }

    final boolean isTez = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez");
    final boolean isLlap = LlapDaemonInfo.INSTANCE.isLlap();
    final int numExecutors = isLlap ? LlapDaemonInfo.INSTANCE.getNumExecutors() : 1;

    // Used Memory = totalMemory() - freeMemory();
    // Total Free Memory = maxMemory() - Used Memory;
    long totalFreeMemory = Runtime.getRuntime().maxMemory() -
      Runtime.getRuntime().totalMemory() + Runtime.getRuntime().freeMemory();

    if (isTez) {
      MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
      // TODO: For LLAP, assumption is off-heap cache.
      final long memoryUsedPerExecutor = (memoryMXBean.getHeapMemoryUsage().getUsed() / numExecutors);
      // this is total free memory available per executor in case of LLAP
      totalFreeMemory = conf.getMaxMemoryAvailable() - memoryUsedPerExecutor;
    }

    // limit * 64 : compensation of arrays for key/value/hashcodes
    this.threshold = (long) (memUsage * totalFreeMemory) - topN * 64L;
    if (threshold < 0) {
      return;
    }
    this.indexes = isMapGroupBy ? new HashForGroup() : new HashForRow();
    this.keys = new byte[topN + 1][];
    this.values = new byte[topN + 1][];
    this.hashes = new int[topN + 1];
    this.distKeyLengths = new int[topN + 1];
    this.evicted = topN;
    this.isEnabled = true;
  }

  /**
   * Try store the non-vectorized key.
   * @param key Serialized key.
   * @return TopNHash.FORWARD if the row should be forwarded;
   *         TopNHash.EXCLUDED if the row should be discarded;
   *         any other number if the row is to be stored; the index should be passed to storeValue.
   */
  public int tryStoreKey(HiveKey key, boolean partColsIsNull) throws HiveException, IOException {
    if (!isEnabled) {
      return FORWARD; // short-circuit quickly - forward all rows
    }
    if (topN == 0) {
      return EXCLUDE; // short-circuit quickly - eat all rows
    }
    int index = insertKeyIntoHeap(key);
    if (index >= 0) {
      usage += key.getLength();
      return index;
    }
    // IndexStore is trying to tell us something.
    switch (index) {
      case FORWARD:  return FORWARD;
      case EXCLUDE: return EXCLUDE; // skip the row.
      default: {
        assert false;
        throw new HiveException("Invalid result trying to store the key: " + index);
      }
    }
  }


  /**
   * Perform basic checks and initialize TopNHash for the new vectorized row batch.
   * @param size batch size
   * @return TopNHash.FORWARD if all rows should be forwarded w/o trying to call TopN;
   *         TopNHash.EXCLUDED if all rows should be discarded w/o trying to call TopN;
   *         any other result means the batch has been started.
   */
  public int startVectorizedBatch(int size) throws IOException, HiveException {
    if (!isEnabled) {
      return FORWARD; // short-circuit quickly - forward all rows
    } else if (topN == 0) {
      return EXCLUDE; // short-circuit quickly - eat all rows
    }
    // Flush here if the memory usage is too high. After that, we have the entire
    // batch already in memory anyway so we will bypass the memory checks.
    if (usage > threshold) {
      int excluded = this.excluded;
      LOG.info("Top-N hash is flushing rows");
      flushInternal();
      if (excluded == 0) {
        LOG.info("Top-N hash has been disabled");
        isEnabled = false;
        return FORWARD; // Hash is ineffective, disable.
      }
    }
    // Started ok; initialize context for new batch.
    batchSize = size;
    if (batchIndexToResult == null || batchIndexToResult.length < batchSize) {
      batchIndexToResult = new int[Math.max(batchSize, VectorizedRowBatch.DEFAULT_SIZE)];
    }
    if (indexToBatchIndex == null) {
      indexToBatchIndex = new int[topN + 1];
    }
    Arrays.fill(indexToBatchIndex, -1);
    batchNumForwards = 0;
    return 0;
  }

  /**
   * Try to put the key from the current vectorized batch into the heap.
   * @param key the key.
   * @param batchIndex The index of the key in the vectorized batch (sequential, not .selected).
   */
  public void tryStoreVectorizedKey(HiveKey key, boolean partColsIsNull, int batchIndex)
      throws HiveException, IOException {
    // Assumption - batchIndex is increasing; startVectorizedBatch was called
    int size = indexes.size();
    int index = size < topN ? size : evicted;
    keys[index] = Arrays.copyOf(key.getBytes(), key.getLength());
    distKeyLengths[index] = key.getDistKeyLength();
    hashes[index] = key.hashCode();
    Integer collisionIndex = indexes.store(index);
    if (null != collisionIndex) {
      /*
       * since there is a collision index will be used for the next value 
       * so have the map point back to original index.
       */
      if ( indexes instanceof HashForGroup ) {
        indexes.store(collisionIndex);
      }
      // forward conditional on the survival of the corresponding key currently in indexes.
      ++batchNumForwards;
      batchIndexToResult[batchIndex] = MAY_FORWARD - collisionIndex;
      return;
    }
    indexToBatchIndex[index] = batchIndex;
    batchIndexToResult[batchIndex] = index;
    if (size != topN) return;
    evicted = indexes.removeBiggest();  // remove the biggest key
    if (index == evicted) {
      excluded++;
      batchIndexToResult[batchIndex] = EXCLUDE;
      indexToBatchIndex[index] = -1;
      return; // input key is bigger than any of keys in hash
    }
    removed(evicted);
    int evictedBatchIndex = indexToBatchIndex[evicted];
    if (evictedBatchIndex >= 0) {
      // reset the result for the evicted index
      batchIndexToResult[evictedBatchIndex] = EXCLUDE;
      indexToBatchIndex[evicted] = -1;
    }
    // Evict all results grouped with this index; it cannot be any key further in the batch.
    // If we evict a key from this batch, the keys grouped with it cannot be earlier that that key.
    // If we evict a key that is not from this batch, initial i = (-1) + 1 = 0, as intended.
    int evictedForward = (MAY_FORWARD - evicted);
    for (int i = evictedBatchIndex + 1; i < batchIndex && (batchNumForwards > 0); ++i) {
      if (batchIndexToResult[i] == evictedForward) {
        batchIndexToResult[i] = EXCLUDE;
        --batchNumForwards;
      }
    }
  }

  /**
   * Get vectorized batch result for particular index.
   * @param batchIndex index of the key in the batch.
   * @return the result, same as from {@link #tryStoreKey(HiveKey)}
   */
  public int getVectorizedBatchResult(int batchIndex) {
    int result = batchIndexToResult[batchIndex];
    return (result <= MAY_FORWARD) ? FORWARD : result;
  }

  /**
   * After vectorized batch is processed, can return the key that caused a particular row
   * to be forwarded. Because the row could only be marked to forward because it has
   * the same key with some row already in the heap (for GBY), we can use that key from the
   * heap to emit the forwarded row.
   * @param batchIndex index of the key in the batch.
   * @return The key corresponding to the index.
   */
  public HiveKey getVectorizedKeyToForward(int batchIndex) {
    int index = MAY_FORWARD - batchIndexToResult[batchIndex];
    HiveKey hk = new HiveKey();
    hk.set(keys[index], 0, keys[index].length);
    hk.setHashCode(hashes[index]);
    hk.setDistKeyLength(distKeyLengths[index]);
    return hk;
  }

  /**
   * After vectorized batch is processed, can return distribution keys length of a key.
   * @param batchIndex index of the key in the batch.
   * @return The distribution length corresponding to the key.
   */
  public int getVectorizedKeyDistLength(int batchIndex) {
    return distKeyLengths[batchIndexToResult[batchIndex]];
  }

  /**
   * After vectorized batch is processed, can return hashCode of a key.
   * @param batchIndex index of the key in the batch.
   * @return The hashCode corresponding to the key.
   */
  public int getVectorizedKeyHashCode(int batchIndex) {
    return hashes[batchIndexToResult[batchIndex]];
  }
  
  /**
   * Stores the value for the key in the heap.
   * @param index The index, either from tryStoreKey or from tryStoreVectorizedKey result.
   * @param hasCode hashCode of key, used by ptfTopNHash.
   * @param value The value to store.
   * @param keyHash The key hash to store.
   * @param vectorized Whether the result is coming from a vectorized batch.
   */
  public void storeValue(int index, int hashCode, BytesWritable value, boolean vectorized) {
    values[index] = Arrays.copyOf(value.getBytes(), value.getLength());
    // Vectorized doesn't adjust usage for the keys while processing the batch
    usage += values[index].length + (vectorized ? keys[index].length : 0);
  }

  /**
   * Flushes all the rows cached in the heap.
   */
  public void flush() throws HiveException {
    if (!isEnabled || (topN == 0)) return;
    try {
      flushInternal();
    } catch (IOException ex) {
      throw new HiveException(ex);
    }
  }

  /**
   * returns index for key/value/hashcode if it's acceptable.
   * -1, -2, -3, -4 can be returned for other actions.
   * 

* -1 for FORWARD : should be forwarded to output collector (for GBY) * -2 for EXCLUDED : not in top-k. ignore it */ private int insertKeyIntoHeap(HiveKey key) throws IOException, HiveException { if (usage > threshold) { flushInternal(); if (excluded == 0) { LOG.info("Top-N hash is disabled"); isEnabled = false; } // we can now retry adding key/value into hash, which is flushed. // but for simplicity, just forward them return FORWARD; } int size = indexes.size(); int index = size < topN ? size : evicted; keys[index] = Arrays.copyOf(key.getBytes(), key.getLength()); distKeyLengths[index] = key.getDistKeyLength(); hashes[index] = key.hashCode(); if (null != indexes.store(index)) { // it's only for GBY which should forward all values associated with the key in the range // of limit. new value should be attatched with the key but in current implementation, // only one values is allowed. with map-aggreagtion which is true by default, // this is not common case, so just forward new key/value and forget that (todo) return FORWARD; } if (size == topN) { evicted = indexes.removeBiggest(); // remove the biggest key if (index == evicted) { excluded++; return EXCLUDE; // input key is bigger than any of keys in hash } removed(evicted); } return index; } // key/value of the index is removed. retrieve memory usage private void removed(int index) { usage -= keys[index].length; keys[index] = null; if (values[index] != null) { usage -= values[index].length; values[index] = null; } hashes[index] = -1; distKeyLengths[index] = -1; } private void flushInternal() throws IOException, HiveException { for (int index : indexes.indexes()) { if (index != evicted && values[index] != null) { collector.collect(keys[index], values[index], hashes[index]); usage -= values[index].length; values[index] = null; hashes[index] = -1; } } excluded = 0; } private interface IndexStore { int size(); /** * @return the index which caused the item to be rejected; or null if accepted */ Integer store(int index); int removeBiggest(); Iterable indexes(); } /** * for order by, same keys are counted (For 1-2-2-3-4, limit 3 is 1-2-2) * MinMaxPriorityQueue is used because it alows duplication and fast access to biggest one */ private class HashForRow implements IndexStore { private final MinMaxPriorityQueue indexes = MinMaxPriorityQueue.orderedBy(C).create(); public int size() { return indexes.size(); } // returns null always public Integer store(int index) { boolean result = indexes.add(index); assert result; return null; } public int removeBiggest() { return indexes.removeLast(); } public Iterable indexes() { Integer[] array = indexes.toArray(new Integer[indexes.size()]); Arrays.sort(array, 0, array.length, C); return Arrays.asList(array); } } /** * for group by, same keys are not counted (For 1-2-2-3-4, limit 3 is 1-2-(2)-3) * simple TreeMap is used because group by does not need keep duplicated keys */ private class HashForGroup implements IndexStore { // TreeSet anyway uses TreeMap; so use plain TreeMap to be able to get value in collisions. private final TreeMap indexes = new TreeMap(C); public int size() { return indexes.size(); } // returns false if index already exists in map public Integer store(int index) { return indexes.put(index, index); } public int removeBiggest() { Integer last = indexes.lastKey(); indexes.remove(last); return last; } public Iterable indexes() { return indexes.keySet(); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy