All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.hadoop.hive.ql.exec.PTFTopNHash Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec;
import java.io.IOException;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch;
import org.apache.hadoop.hive.ql.io.HiveKey;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.io.BytesWritable;
public class PTFTopNHash extends TopNHash {
protected float memUsage;
protected boolean isMapGroupBy;
private Map partitionHeaps;
private TopNHash largestPartition;
private boolean prevIndexPartIsNull;
private Set indexesWithNullPartKey;
private OperatorDesc conf;
private Configuration hconf;
public void initialize(
int topN, float memUsage, boolean isMapGroupBy, BinaryCollector collector, final OperatorDesc conf,
final Configuration hconf) {
super.initialize(topN, memUsage, isMapGroupBy, collector, conf, hconf);
this.conf = conf;
this.hconf = hconf;
this.isMapGroupBy = isMapGroupBy;
this.memUsage = memUsage;
partitionHeaps = new HashMap();
indexesWithNullPartKey = new HashSet();
}
public int tryStoreKey(HiveKey key, boolean partColsIsNull) throws HiveException, IOException {
prevIndexPartIsNull = partColsIsNull;
return _tryStoreKey(key, partColsIsNull, -1);
}
private void updateLargest(TopNHash p) {
if ( largestPartition == null || largestPartition.usage < p.usage) {
largestPartition = p;
}
}
private void findLargest() {
for(TopNHash p : partitionHeaps.values() ) {
updateLargest(p);
}
}
public int _tryStoreKey(HiveKey key, boolean partColsIsNull, int batchIndex) throws HiveException, IOException {
if (!isEnabled) {
return FORWARD; // short-circuit quickly - forward all rows
}
if (topN == 0) {
return EXCLUDE; // short-circuit quickly - eat all rows
}
Key pk = new Key(partColsIsNull, key.hashCode());
TopNHash partHeap = partitionHeaps.get(pk);
if ( partHeap == null ) {
partHeap = new TopNHash();
partHeap.initialize(topN, memUsage, isMapGroupBy, collector, conf, hconf);
if ( batchIndex >= 0 ) {
partHeap.startVectorizedBatch(batchSize);
}
partitionHeaps.put(pk, partHeap);
}
usage = usage - partHeap.usage;
int r = 0;
if ( batchIndex >= 0 ) {
partHeap.tryStoreVectorizedKey(key, false, batchIndex);
} else {
r = partHeap.tryStoreKey(key, false);
}
usage = usage + partHeap.usage;
updateLargest(partHeap);
if ( usage > threshold ) {
usage -= largestPartition.usage;
largestPartition.flush();
usage += largestPartition.usage;
largestPartition = null;
findLargest();
}
return r;
}
public void storeValue(int index, int hashCode, BytesWritable value, boolean vectorized) {
Key pk = new Key(prevIndexPartIsNull, hashCode);
TopNHash partHeap = partitionHeaps.get(pk);
usage = usage - partHeap.usage;
partHeap.storeValue(index, hashCode, value, vectorized);
usage = usage + partHeap.usage;
updateLargest(partHeap);
}
public void flush() throws HiveException {
if (!isEnabled || (topN == 0)) return;
for(TopNHash partHash : partitionHeaps.values()) {
partHash.flush();
}
}
public int startVectorizedBatch(int size) throws IOException, HiveException {
if (!isEnabled) {
return FORWARD; // short-circuit quickly - forward all rows
} else if (topN == 0) {
return EXCLUDE; // short-circuit quickly - eat all rows
}
for(TopNHash partHash : partitionHeaps.values()) {
usage = usage - partHash.usage;
partHash.startVectorizedBatch(size);
usage = usage + partHash.usage;
updateLargest(partHash);
}
batchSize = size;
if (batchIndexToResult == null || batchIndexToResult.length < batchSize) {
batchIndexToResult = new int[Math.max(batchSize, VectorizedRowBatch.DEFAULT_SIZE)];
}
indexesWithNullPartKey.clear();
return 0;
}
public void tryStoreVectorizedKey(HiveKey key, boolean partColsIsNull, int batchIndex)
throws HiveException, IOException {
_tryStoreKey(key, partColsIsNull, batchIndex);
if ( partColsIsNull ) {
indexesWithNullPartKey.add(batchIndex);
}
batchIndexToResult[batchIndex] = key.hashCode();
}
public int getVectorizedBatchResult(int batchIndex) {
prevIndexPartIsNull = indexesWithNullPartKey.contains(batchIndex);
Key pk = new Key(prevIndexPartIsNull, batchIndexToResult[batchIndex]);
TopNHash partHeap = partitionHeaps.get(pk);
return partHeap.getVectorizedBatchResult(batchIndex);
}
public HiveKey getVectorizedKeyToForward(int batchIndex) {
prevIndexPartIsNull = indexesWithNullPartKey.contains(batchIndex);
Key pk = new Key(prevIndexPartIsNull, batchIndexToResult[batchIndex]);
TopNHash partHeap = partitionHeaps.get(pk);
return partHeap.getVectorizedKeyToForward(batchIndex);
}
public int getVectorizedKeyDistLength(int batchIndex) {
prevIndexPartIsNull = indexesWithNullPartKey.contains(batchIndex);
Key pk = new Key(prevIndexPartIsNull, batchIndexToResult[batchIndex]);
TopNHash partHeap = partitionHeaps.get(pk);
return partHeap.getVectorizedKeyDistLength(batchIndex);
}
public int getVectorizedKeyHashCode(int batchIndex) {
prevIndexPartIsNull = indexesWithNullPartKey.contains(batchIndex);
Key pk = new Key(prevIndexPartIsNull, batchIndexToResult[batchIndex]);
TopNHash partHeap = partitionHeaps.get(pk);
return partHeap.getVectorizedKeyHashCode(batchIndex);
}
static class Key {
boolean isNull;
int hashCode;
public Key(boolean isNull, int hashCode) {
super();
this.isNull = isNull;
this.hashCode = hashCode;
}
@Override
public int hashCode() {
return hashCode;
}
@Override
public boolean equals(Object obj) {
if (this == obj)
return true;
if (obj == null)
return false;
if (getClass() != obj.getClass())
return false;
Key other = (Key) obj;
if (hashCode != other.hashCode)
return false;
if (isNull != other.isNull)
return false;
return true;
}
@Override
public String toString() {
return "" + hashCode + "," + isNull;
}
}
}