org.apache.hadoop.hive.ql.exec.TopNHash Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec;
import java.io.IOException;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryMXBean;
import java.util.Arrays;
import java.util.Comparator;
import java.util.TreeMap;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.io.HiveKey;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.WritableComparator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.MinMaxPriorityQueue;
/**
* Stores binary key/value in sorted manner to get top-n key/value
* TODO: rename to TopNHeap?
*/
public class TopNHash {
private static final Logger LOG = LoggerFactory.getLogger(TopNHash.class);
/**
* For interaction between operator and top-n hash.
* Currently only used to forward key/values stored in hash.
*/
public static interface BinaryCollector {
public void collect(byte[] key, byte[] value, int hash) throws IOException;
}
public static final int FORWARD = -1; // Forward the row to reducer as is.
public static final int EXCLUDE = -2; // Discard the row.
private static final int MAY_FORWARD = -3; // Vectorized - may forward the row, not sure yet.
protected BinaryCollector collector;
protected int topN;
protected long threshold; // max heap size
protected long usage;
// binary keys, values and hashCodes of rows, lined up by index
private byte[][] keys;
private byte[][] values;
private int[] hashes;
private int[] distKeyLengths;
private IndexStore indexes; // The heap over the keys, storing indexes in the array.
private int evicted; // recently evicted index (used for next key/value)
private int excluded; // count of excluded rows from previous flush
// temporary single-batch context used for vectorization
private int batchNumForwards = 0; // whether current batch has any forwarded keys
private int[] indexToBatchIndex; // mapping of index (lined up w/keys) to index in the batch
protected int[] batchIndexToResult; // mapping of index in the batch (linear) to hash result
protected int batchSize; // Size of the current batch.
protected boolean isEnabled = false;
private final Comparator C = new Comparator() {
public int compare(Integer o1, Integer o2) {
byte[] key1 = keys[o1];
byte[] key2 = keys[o2];
int length1 = distKeyLengths[o1];
int length2 = distKeyLengths[o2];
return WritableComparator.compareBytes(key1, 0, length1, key2, 0, length2);
}
};
public void initialize(
int topN, float memUsage, boolean isMapGroupBy, BinaryCollector collector, final OperatorDesc conf,
final Configuration hconf) {
assert topN >= 0 && memUsage > 0;
assert !this.isEnabled;
this.isEnabled = false;
this.topN = topN;
this.collector = collector;
if (topN == 0) {
isEnabled = true;
return; // topN == 0 will cause a short-circuit, don't need any initialization
}
final boolean isTez = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("tez");
final boolean isLlap = isTez && HiveConf.getVar(hconf, HiveConf.ConfVars.HIVE_EXECUTION_MODE).equals("llap");
final int numExecutors = isLlap ? HiveConf.getIntVar(hconf, HiveConf.ConfVars.LLAP_DAEMON_NUM_EXECUTORS) : 1;
// Used Memory = totalMemory() - freeMemory();
// Total Free Memory = maxMemory() - Used Memory;
long totalFreeMemory = Runtime.getRuntime().maxMemory() -
Runtime.getRuntime().totalMemory() + Runtime.getRuntime().freeMemory();
if (isTez) {
MemoryMXBean memoryMXBean = ManagementFactory.getMemoryMXBean();
// TODO: For LLAP, assumption is off-heap cache.
final long memoryUsedPerExecutor = (memoryMXBean.getHeapMemoryUsage().getUsed() / numExecutors);
// this is total free memory available per executor in case of LLAP
totalFreeMemory = conf.getMaxMemoryAvailable() - memoryUsedPerExecutor;
}
// limit * 64 : compensation of arrays for key/value/hashcodes
this.threshold = (long) (memUsage * totalFreeMemory) - topN * 64L;
if (threshold < 0) {
return;
}
this.indexes = isMapGroupBy ? new HashForGroup() : new HashForRow();
this.keys = new byte[topN + 1][];
this.values = new byte[topN + 1][];
this.hashes = new int[topN + 1];
this.distKeyLengths = new int[topN + 1];
this.evicted = topN;
this.isEnabled = true;
}
/**
* Try store the non-vectorized key.
* @param key Serialized key.
* @return TopNHash.FORWARD if the row should be forwarded;
* TopNHash.EXCLUDED if the row should be discarded;
* any other number if the row is to be stored; the index should be passed to storeValue.
*/
public int tryStoreKey(HiveKey key, boolean partColsIsNull) throws HiveException, IOException {
if (!isEnabled) {
return FORWARD; // short-circuit quickly - forward all rows
}
if (topN == 0) {
return EXCLUDE; // short-circuit quickly - eat all rows
}
int index = insertKeyIntoHeap(key);
if (index >= 0) {
usage += key.getLength();
return index;
}
// IndexStore is trying to tell us something.
switch (index) {
case FORWARD: return FORWARD;
case EXCLUDE: return EXCLUDE; // skip the row.
default: {
assert false;
throw new HiveException("Invalid result trying to store the key: " + index);
}
}
}
/**
* Perform basic checks and initialize TopNHash for the new vectorized row batch.
* @param size batch size
* @return TopNHash.FORWARD if all rows should be forwarded w/o trying to call TopN;
* TopNHash.EXCLUDED if all rows should be discarded w/o trying to call TopN;
* any other result means the batch has been started.
*/
public int startVectorizedBatch(int size) throws IOException, HiveException {
if (!isEnabled) {
return FORWARD; // short-circuit quickly - forward all rows
} else if (topN == 0) {
return EXCLUDE; // short-circuit quickly - eat all rows
}
// Flush here if the memory usage is too high. After that, we have the entire
// batch already in memory anyway so we will bypass the memory checks.
if (usage > threshold) {
int excluded = this.excluded;
LOG.info("Top-N hash is flushing rows");
flushInternal();
if (excluded == 0) {
LOG.info("Top-N hash has been disabled");
isEnabled = false;
return FORWARD; // Hash is ineffective, disable.
}
}
// Started ok; initialize context for new batch.
batchSize = size;
if (batchIndexToResult == null || batchIndexToResult.length < batchSize) {
batchIndexToResult = new int[Math.max(batchSize, VectorizedRowBatch.DEFAULT_SIZE)];
}
if (indexToBatchIndex == null) {
indexToBatchIndex = new int[topN + 1];
}
Arrays.fill(indexToBatchIndex, -1);
batchNumForwards = 0;
return 0;
}
/**
* Try to put the key from the current vectorized batch into the heap.
* @param key the key.
* @param batchIndex The index of the key in the vectorized batch (sequential, not .selected).
*/
public void tryStoreVectorizedKey(HiveKey key, boolean partColsIsNull, int batchIndex)
throws HiveException, IOException {
// Assumption - batchIndex is increasing; startVectorizedBatch was called
int size = indexes.size();
int index = size < topN ? size : evicted;
keys[index] = Arrays.copyOf(key.getBytes(), key.getLength());
distKeyLengths[index] = key.getDistKeyLength();
hashes[index] = key.hashCode();
Integer collisionIndex = indexes.store(index);
if (null != collisionIndex) {
/*
* since there is a collision index will be used for the next value
* so have the map point back to original index.
*/
if ( indexes instanceof HashForGroup ) {
indexes.store(collisionIndex);
}
// forward conditional on the survival of the corresponding key currently in indexes.
++batchNumForwards;
batchIndexToResult[batchIndex] = MAY_FORWARD - collisionIndex;
return;
}
indexToBatchIndex[index] = batchIndex;
batchIndexToResult[batchIndex] = index;
if (size != topN) return;
evicted = indexes.removeBiggest(); // remove the biggest key
if (index == evicted) {
excluded++;
batchIndexToResult[batchIndex] = EXCLUDE;
indexToBatchIndex[index] = -1;
return; // input key is bigger than any of keys in hash
}
removed(evicted);
int evictedBatchIndex = indexToBatchIndex[evicted];
if (evictedBatchIndex >= 0) {
// reset the result for the evicted index
batchIndexToResult[evictedBatchIndex] = EXCLUDE;
indexToBatchIndex[evicted] = -1;
}
// Evict all results grouped with this index; it cannot be any key further in the batch.
// If we evict a key from this batch, the keys grouped with it cannot be earlier that that key.
// If we evict a key that is not from this batch, initial i = (-1) + 1 = 0, as intended.
int evictedForward = (MAY_FORWARD - evicted);
for (int i = evictedBatchIndex + 1; i < batchIndex && (batchNumForwards > 0); ++i) {
if (batchIndexToResult[i] == evictedForward) {
batchIndexToResult[i] = EXCLUDE;
--batchNumForwards;
}
}
}
/**
* Get vectorized batch result for particular index.
* @param batchIndex index of the key in the batch.
* @return the result, same as from {@link #tryStoreKey(HiveKey)}
*/
public int getVectorizedBatchResult(int batchIndex) {
int result = batchIndexToResult[batchIndex];
return (result <= MAY_FORWARD) ? FORWARD : result;
}
/**
* After vectorized batch is processed, can return the key that caused a particular row
* to be forwarded. Because the row could only be marked to forward because it has
* the same key with some row already in the heap (for GBY), we can use that key from the
* heap to emit the forwarded row.
* @param batchIndex index of the key in the batch.
* @return The key corresponding to the index.
*/
public HiveKey getVectorizedKeyToForward(int batchIndex) {
int index = MAY_FORWARD - batchIndexToResult[batchIndex];
HiveKey hk = new HiveKey();
hk.set(keys[index], 0, keys[index].length);
hk.setHashCode(hashes[index]);
hk.setDistKeyLength(distKeyLengths[index]);
return hk;
}
/**
* After vectorized batch is processed, can return distribution keys length of a key.
* @param batchIndex index of the key in the batch.
* @return The distribution length corresponding to the key.
*/
public int getVectorizedKeyDistLength(int batchIndex) {
return distKeyLengths[batchIndexToResult[batchIndex]];
}
/**
* After vectorized batch is processed, can return hashCode of a key.
* @param batchIndex index of the key in the batch.
* @return The hashCode corresponding to the key.
*/
public int getVectorizedKeyHashCode(int batchIndex) {
return hashes[batchIndexToResult[batchIndex]];
}
/**
* Stores the value for the key in the heap.
* @param index The index, either from tryStoreKey or from tryStoreVectorizedKey result.
* @param hasCode hashCode of key, used by ptfTopNHash.
* @param value The value to store.
* @param keyHash The key hash to store.
* @param vectorized Whether the result is coming from a vectorized batch.
*/
public void storeValue(int index, int hashCode, BytesWritable value, boolean vectorized) {
values[index] = Arrays.copyOf(value.getBytes(), value.getLength());
// Vectorized doesn't adjust usage for the keys while processing the batch
usage += values[index].length + (vectorized ? keys[index].length : 0);
}
/**
* Flushes all the rows cached in the heap.
*/
public void flush() throws HiveException {
if (!isEnabled || (topN == 0)) return;
try {
flushInternal();
} catch (IOException ex) {
throw new HiveException(ex);
}
}
/**
* returns index for key/value/hashcode if it's acceptable.
* -1, -2, -3, -4 can be returned for other actions.
*
* -1 for FORWARD : should be forwarded to output collector (for GBY)
* -2 for EXCLUDED : not in top-k. ignore it
*/
private int insertKeyIntoHeap(HiveKey key) throws IOException, HiveException {
if (usage > threshold) {
flushInternal();
if (excluded == 0) {
LOG.info("Top-N hash is disabled");
isEnabled = false;
}
// we can now retry adding key/value into hash, which is flushed.
// but for simplicity, just forward them
return FORWARD;
}
int size = indexes.size();
int index = size < topN ? size : evicted;
keys[index] = Arrays.copyOf(key.getBytes(), key.getLength());
distKeyLengths[index] = key.getDistKeyLength();
hashes[index] = key.hashCode();
if (null != indexes.store(index)) {
// it's only for GBY which should forward all values associated with the key in the range
// of limit. new value should be attatched with the key but in current implementation,
// only one values is allowed. with map-aggreagtion which is true by default,
// this is not common case, so just forward new key/value and forget that (todo)
return FORWARD;
}
if (size == topN) {
evicted = indexes.removeBiggest(); // remove the biggest key
if (index == evicted) {
excluded++;
return EXCLUDE; // input key is bigger than any of keys in hash
}
removed(evicted);
}
return index;
}
// key/value of the index is removed. retrieve memory usage
private void removed(int index) {
usage -= keys[index].length;
keys[index] = null;
if (values[index] != null) {
usage -= values[index].length;
values[index] = null;
}
hashes[index] = -1;
distKeyLengths[index] = -1;
}
private void flushInternal() throws IOException, HiveException {
for (int index : indexes.indexes()) {
if (index != evicted && values[index] != null) {
collector.collect(keys[index], values[index], hashes[index]);
usage -= values[index].length;
values[index] = null;
hashes[index] = -1;
}
}
excluded = 0;
}
private interface IndexStore {
int size();
/**
* @return the index which caused the item to be rejected; or null if accepted
*/
Integer store(int index);
int removeBiggest();
Iterable indexes();
}
/**
* for order by, same keys are counted (For 1-2-2-3-4, limit 3 is 1-2-2)
* MinMaxPriorityQueue is used because it alows duplication and fast access to biggest one
*/
private class HashForRow implements IndexStore {
private final MinMaxPriorityQueue indexes = MinMaxPriorityQueue.orderedBy(C).create();
public int size() {
return indexes.size();
}
// returns null always
public Integer store(int index) {
boolean result = indexes.add(index);
assert result;
return null;
}
public int removeBiggest() {
return indexes.removeLast();
}
public Iterable indexes() {
Integer[] array = indexes.toArray(new Integer[indexes.size()]);
Arrays.sort(array, 0, array.length, C);
return Arrays.asList(array);
}
}
/**
* for group by, same keys are not counted (For 1-2-2-3-4, limit 3 is 1-2-(2)-3)
* simple TreeMap is used because group by does not need keep duplicated keys
*/
private class HashForGroup implements IndexStore {
// TreeSet anyway uses TreeMap; so use plain TreeMap to be able to get value in collisions.
private final TreeMap indexes = new TreeMap(C);
public int size() {
return indexes.size();
}
// returns false if index already exists in map
public Integer store(int index) {
return indexes.put(index, index);
}
public int removeBiggest() {
Integer last = indexes.lastKey();
indexes.remove(last);
return last;
}
public Iterable indexes() {
return indexes.keySet();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy