Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec.persistence;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.MemoryEstimate;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper;
import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.ByteStream.RandomAccessOutput;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.WriteBuffers;
import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryFactory;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils;
import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hive.common.util.HashCodeUtil;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;
/**
* Table container that serializes keys and values using LazyBinarySerDe into
* BytesBytesMultiHashMap, with very low memory overhead. However,
* there may be some perf overhead when retrieving rows.
*/
public class MapJoinBytesTableContainer
implements MapJoinTableContainer, MapJoinTableContainerDirectAccess {
private static final Logger LOG = LoggerFactory.getLogger(MapJoinTableContainer.class);
// TODO: For object inspector fields, assigning 16KB for now. To better estimate the memory size every
// object inspectors have to implement MemoryEstimate interface which is a lot of change with little benefit compared
// to writing an instrumentation agent for object size estimation
public static final long DEFAULT_OBJECT_INSPECTOR_MEMORY_SIZE = 16 * 1024L;
private final BytesBytesMultiHashMap hashMap;
/** The OI used to deserialize values. We never deserialize keys. */
private LazyBinaryStructObjectInspector internalValueOi;
/**
* This is kind of hacky. Currently we get BinarySortableSerDe-serialized keys; we could
* re-serialize them into LazyBinarySerDe, but instead we just reuse the bytes. However, to
* compare the large table keys correctly when we do, we need to serialize them with correct
* ordering. Hence, remember the ordering here; it is null if we do use LazyBinarySerDe.
*/
private boolean[] sortableSortOrders;
private byte[] nullMarkers;
private byte[] notNullMarkers;
private KeyValueHelper writeHelper;
private DirectKeyValueWriter directWriteHelper;
private final List EMPTY_LIST = new ArrayList(0);
public MapJoinBytesTableContainer(Configuration hconf,
MapJoinObjectSerDeContext valCtx, long keyCount, long memUsage) throws SerDeException {
this(HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT),
HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD),
HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR),
HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE),
valCtx, keyCount, memUsage);
}
private MapJoinBytesTableContainer(float keyCountAdj, int threshold, float loadFactor,
int wbSize, MapJoinObjectSerDeContext valCtx, long keyCount, long memUsage)
throws SerDeException {
int newThreshold = HashMapWrapper.calculateTableSize(
keyCountAdj, threshold, loadFactor, keyCount);
hashMap = new BytesBytesMultiHashMap(newThreshold, loadFactor, wbSize, memUsage);
directWriteHelper = new DirectKeyValueWriter();
}
public MapJoinBytesTableContainer(BytesBytesMultiHashMap hashMap) {
this.hashMap = hashMap;
}
private LazyBinaryStructObjectInspector createInternalOi(
MapJoinObjectSerDeContext valCtx) throws SerDeException {
// We are going to use LBSerDe to serialize values; create OI for retrieval.
List extends StructField> fields = ((StructObjectInspector)
valCtx.getSerDe().getObjectInspector()).getAllStructFieldRefs();
List colNames = new ArrayList(fields.size());
List colOis = new ArrayList(fields.size());
for (int i = 0; i < fields.size(); ++i) {
StructField field = fields.get(i);
colNames.add(field.getFieldName());
// It would be nice if OI could return typeInfo...
TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(
field.getFieldObjectInspector().getTypeName());
colOis.add(LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(typeInfo));
}
return LazyBinaryObjectInspectorFactory
.getLazyBinaryStructObjectInspector(colNames, colOis);
}
public void setInternalValueOi(LazyBinaryStructObjectInspector internalValueOi) {
this.internalValueOi = internalValueOi;
}
public void setSortableSortOrders(boolean[] sortableSortOrders) {
this.sortableSortOrders = sortableSortOrders;
}
public void setNullMarkers(byte[] nullMarkers) {
this.nullMarkers = nullMarkers;
}
public void setNotNullMarkers(byte[] notNullMarkers) {
this.notNullMarkers = notNullMarkers;
}
public static interface KeyValueHelper extends BytesBytesMultiHashMap.KvSource, MemoryEstimate {
void setKeyValue(Writable key, Writable val) throws SerDeException;
/** Get hash value from the key. */
int getHashFromKey() throws SerDeException;
}
private static class KeyValueWriter implements KeyValueHelper {
private final AbstractSerDe keySerDe, valSerDe;
private final StructObjectInspector keySoi, valSoi;
private final List keyOis, valOis;
private final Object[] keyObjs, valObjs;
private final boolean hasFilterTag;
public KeyValueWriter(
AbstractSerDe keySerDe, AbstractSerDe valSerDe, boolean hasFilterTag) throws SerDeException {
this.keySerDe = keySerDe;
this.valSerDe = valSerDe;
keySoi = (StructObjectInspector)keySerDe.getObjectInspector();
valSoi = (StructObjectInspector)valSerDe.getObjectInspector();
List extends StructField> keyFields = keySoi.getAllStructFieldRefs(),
valFields = valSoi.getAllStructFieldRefs();
keyOis = new ArrayList(keyFields.size());
valOis = new ArrayList(valFields.size());
for (int i = 0; i < keyFields.size(); ++i) {
keyOis.add(keyFields.get(i).getFieldObjectInspector());
}
for (int i = 0; i < valFields.size(); ++i) {
valOis.add(valFields.get(i).getFieldObjectInspector());
}
keyObjs = new Object[keyOis.size()];
valObjs = new Object[valOis.size()];
this.hasFilterTag = hasFilterTag;
}
@Override
public void writeKey(RandomAccessOutput dest) throws SerDeException {
LazyBinarySerDe.serializeStruct(dest, keyObjs, keyOis);
}
@Override
public void writeValue(RandomAccessOutput dest) throws SerDeException {
LazyBinarySerDe.serializeStruct(dest, valObjs, valOis);
}
@Override
public void setKeyValue(Writable key, Writable val) throws SerDeException {
Object keyObj = keySerDe.deserialize(key), valObj = valSerDe.deserialize(val);
List extends StructField> keyFields = keySoi.getAllStructFieldRefs(),
valFields = valSoi.getAllStructFieldRefs();
for (int i = 0; i < keyFields.size(); ++i) {
keyObjs[i] = keySoi.getStructFieldData(keyObj, keyFields.get(i));
}
for (int i = 0; i < valFields.size(); ++i) {
valObjs[i] = valSoi.getStructFieldData(valObj, valFields.get(i));
}
}
@Override
public byte updateStateByte(Byte previousValue) {
if (!hasFilterTag) return (byte)0xff;
byte aliasFilter = (previousValue == null) ? (byte)0xff : previousValue.byteValue();
aliasFilter &= ((ShortWritable)valObjs[valObjs.length - 1]).get();
return aliasFilter;
}
@Override
public int getHashFromKey() throws SerDeException {
throw new UnsupportedOperationException("Not supported for MapJoinBytesTableContainer");
}
@Override
public long getEstimatedMemorySize() {
JavaDataModel jdm = JavaDataModel.get();
long size = 0;
size += keySerDe == null ? 0 : jdm.object();
size += valSerDe == null ? 0 : jdm.object();
size += keySoi == null ? 0 : DEFAULT_OBJECT_INSPECTOR_MEMORY_SIZE;
size += valSoi == null ? 0 : DEFAULT_OBJECT_INSPECTOR_MEMORY_SIZE;
size += keyOis == null ? 0 : jdm.arrayList() + keyOis.size() * DEFAULT_OBJECT_INSPECTOR_MEMORY_SIZE;
size += valOis == null ? 0 : jdm.arrayList() + valOis.size() * DEFAULT_OBJECT_INSPECTOR_MEMORY_SIZE;
size += keyObjs == null ? 0 : jdm.array() + keyObjs.length * jdm.object();
size += valObjs == null ? 0 : jdm.array() + valObjs.length * jdm.object();
size += jdm.primitive1();
return size;
}
}
static class LazyBinaryKvWriter implements KeyValueHelper {
private final LazyBinaryStruct.SingleFieldGetter filterGetter;
private Writable key, value;
private final AbstractSerDe keySerDe;
private Boolean hasTag = null; // sanity check - we should not receive keys with tags
public LazyBinaryKvWriter(AbstractSerDe keySerDe, LazyBinaryStructObjectInspector valSoi,
boolean hasFilterTag) throws SerDeException {
this.keySerDe = keySerDe;
if (hasFilterTag) {
List extends StructField> fields = valSoi.getAllStructFieldRefs();
int ix = fields.size() - 1;
if (!(fields.get(ix).getFieldObjectInspector() instanceof ShortObjectInspector)) {
throw new SerDeException("Has filter tag, but corresponding OI is " +
fields.get(ix).getFieldObjectInspector());
}
filterGetter = new LazyBinaryStruct.SingleFieldGetter(valSoi, fields.size() - 1);
} else {
filterGetter = null;
}
}
@Override
public void writeKey(RandomAccessOutput dest) throws SerDeException {
if (!(key instanceof BinaryComparable)) {
throw new SerDeException("Unexpected type " + key.getClass().getCanonicalName());
}
sanityCheckKeyForTag();
BinaryComparable b = (BinaryComparable)key;
dest.write(b.getBytes(), 0, b.getLength() - (hasTag ? 1 : 0));
}
@Override
public int getHashFromKey() throws SerDeException {
if (!(key instanceof BinaryComparable)) {
throw new SerDeException("Unexpected type " + key.getClass().getCanonicalName());
}
sanityCheckKeyForTag();
BinaryComparable b = (BinaryComparable)key;
return HashCodeUtil.murmurHash(b.getBytes(), 0, b.getLength() - (hasTag ? 1 : 0));
}
/**
* If we received data with tags from ReduceSinkOperators, no keys will match. This should
* not happen, but is important enough that we want to find out and work around it if some
* optimized change causes RSO to pass on tags.
*/
private void sanityCheckKeyForTag() throws SerDeException {
if (hasTag != null) return;
BinaryComparable b = (BinaryComparable)key;
Object o = keySerDe.deserialize(key);
StructObjectInspector soi = (StructObjectInspector)keySerDe.getObjectInspector();
List extends StructField> fields = soi.getAllStructFieldRefs();
Object[] data = new Object[fields.size()];
List fois = new ArrayList(fields.size());
for (int i = 0; i < fields.size(); i++) {
data[i] = soi.getStructFieldData(o, fields.get(i));
fois.add(fields.get(i).getFieldObjectInspector());
}
Output output = new Output();
boolean[] sortableSortOrders = new boolean[fields.size()];
Arrays.fill(sortableSortOrders, false);
byte[] columnNullMarker = new byte[fields.size()];
Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO);
byte[] columnNotNullMarker = new byte[fields.size()];
Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE);
BinarySortableSerDe.serializeStruct(output, data, fois, sortableSortOrders,
columnNullMarker, columnNotNullMarker);
hasTag = (output.getLength() != b.getLength());
if (hasTag) {
LOG.error("Tag found in keys and will be removed. This should not happen.");
if (output.getLength() != (b.getLength() - 1)) {
throw new SerDeException(
"Unexpected tag: " + b.getLength() + " reserialized to " + output.getLength());
}
}
}
@Override
public void writeValue(RandomAccessOutput dest) throws SerDeException {
if (!(value instanceof BinaryComparable)) {
throw new SerDeException("Unexpected type " + value.getClass().getCanonicalName());
}
BinaryComparable b = (BinaryComparable)value;
dest.write(b.getBytes(), 0, b.getLength());
}
@Override
public void setKeyValue(Writable key, Writable val) {
this.key = key;
this.value = val;
}
@Override
public byte updateStateByte(Byte previousValue) {
if (filterGetter == null) return (byte)0xff;
byte aliasFilter = (previousValue == null) ? (byte)0xff : previousValue.byteValue();
filterGetter.init((BinaryComparable)value);
aliasFilter &= filterGetter.getShort();
return aliasFilter;
}
@Override
public long getEstimatedMemorySize() {
JavaDataModel jdm = JavaDataModel.get();
long size = 0;
size += (4 * jdm.object());
size += jdm.primitive1();
return size;
}
}
/*
* An implementation of KvSource that can handle key and value as BytesWritable objects.
*/
protected static class DirectKeyValueWriter implements KeyValueHelper {
private BytesWritable key;
private BytesWritable val;
@Override
public void setKeyValue(Writable key, Writable val) throws SerDeException {
this.key = (BytesWritable) key;
this.val = (BytesWritable) val;
}
@Override
public void writeKey(RandomAccessOutput dest) throws SerDeException {
byte[] keyBytes = key.getBytes();
int keyLength = key.getLength();
dest.write(keyBytes, 0, keyLength);
}
@Override
public void writeValue(RandomAccessOutput dest) throws SerDeException {
byte[] valueBytes = val.getBytes();
int valueLength = val.getLength();
dest.write(valueBytes, 0 , valueLength);
}
@Override
public byte updateStateByte(Byte previousValue) {
// Not used by the direct access client -- native vector map join.
throw new UnsupportedOperationException("Updating the state by not supported");
}
@Override
public int getHashFromKey() throws SerDeException {
byte[] keyBytes = key.getBytes();
int keyLength = key.getLength();
return HashCodeUtil.murmurHash(keyBytes, 0, keyLength);
}
@Override
public long getEstimatedMemorySize() {
JavaDataModel jdm = JavaDataModel.get();
long size = 0;
size += jdm.object() + (key == null ? 0 : key.getCapacity());
size += jdm.object() + (val == null ? 0 : val.getCapacity());
return size;
}
}
@Override
public void setSerde(MapJoinObjectSerDeContext keyContext, MapJoinObjectSerDeContext valueContext)
throws SerDeException {
AbstractSerDe keySerde = keyContext.getSerDe(), valSerde = valueContext.getSerDe();
if (writeHelper == null) {
LOG.info("Initializing container with " + keySerde.getClass().getName() + " and "
+ valSerde.getClass().getName());
if (keySerde instanceof BinarySortableSerDe && valSerde instanceof LazyBinarySerDe) {
LazyBinaryStructObjectInspector valSoi =
(LazyBinaryStructObjectInspector) valSerde.getObjectInspector();
writeHelper = new LazyBinaryKvWriter(keySerde, valSoi, valueContext.hasFilterTag());
internalValueOi = valSoi;
sortableSortOrders = ((BinarySortableSerDe) keySerde).getSortOrders();
nullMarkers = ((BinarySortableSerDe) keySerde).getNullMarkers();
notNullMarkers = ((BinarySortableSerDe) keySerde).getNotNullMarkers();
} else {
writeHelper = new KeyValueWriter(keySerde, valSerde, valueContext.hasFilterTag());
internalValueOi = createInternalOi(valueContext);
sortableSortOrders = null;
nullMarkers = null;
notNullMarkers = null;
}
}
}
@SuppressWarnings("deprecation")
@Override
public MapJoinKey putRow(Writable currentKey, Writable currentValue) throws SerDeException {
writeHelper.setKeyValue(currentKey, currentValue);
hashMap.put(writeHelper, -1);
return null; // there's no key to return
}
@Override
public void clear() {
// Don't clear the hash table - reuse is possible. GC will take care of it.
}
@Override
public MapJoinKey getAnyKey() {
return null; // This table has no keys.
}
@Override
public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) {
if (keyTypeFromLoader != null) {
throw new AssertionError("No key expected from loader but got " + keyTypeFromLoader);
}
return new GetAdaptor();
}
@Override
public void seal() {
hashMap.seal();
}
// Direct access interfaces.
@Override
public void put(Writable currentKey, Writable currentValue) throws SerDeException {
directWriteHelper.setKeyValue(currentKey, currentValue);
hashMap.put(directWriteHelper, -1);
}
public static boolean hasComplexObjects(LazyBinaryStructObjectInspector lazyBinaryStructObjectInspector) {
List extends StructField> fields = lazyBinaryStructObjectInspector.getAllStructFieldRefs();
for (StructField field : fields) {
if (field.getFieldObjectInspector().getCategory() != Category.PRIMITIVE) {
return true;
}
}
return false;
}
/*
* For primitive types, use LazyBinary's object.
* For complex types, make a standard (Java) object from LazyBinary's object.
*/
public static List getComplexFieldsAsList(LazyBinaryStruct lazyBinaryStruct,
ArrayList objectArrayBuffer, LazyBinaryStructObjectInspector lazyBinaryStructObjectInspector) {
List extends StructField> fields = lazyBinaryStructObjectInspector.getAllStructFieldRefs();
for (int i = 0; i < fields.size(); i++) {
StructField field = fields.get(i);
ObjectInspector objectInspector = field.getFieldObjectInspector();
Category category = objectInspector.getCategory();
Object object = lazyBinaryStruct.getField(i);
if (category == Category.PRIMITIVE) {
objectArrayBuffer.set(i, object);
} else {
objectArrayBuffer.set(i, ObjectInspectorUtils.copyToStandardObject(
object, objectInspector, ObjectInspectorCopyOption.WRITABLE));
}
}
return objectArrayBuffer;
}
/** Implementation of ReusableGetAdaptor that has Output for key serialization; row
* container is also created once and reused for every row. */
private class GetAdaptor implements ReusableGetAdaptor, ReusableGetAdaptorDirectAccess {
private Object[] currentKey;
private boolean[] nulls;
private List vectorKeyOIs;
private final ReusableRowContainer currentValue;
private final Output output;
public GetAdaptor() {
currentValue = new ReusableRowContainer();
output = new Output();
}
@Override
public JoinUtil.JoinResult setFromVector(VectorHashKeyWrapper kw,
VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch)
throws HiveException {
if (nulls == null) {
nulls = new boolean[keyOutputWriters.length];
currentKey = new Object[keyOutputWriters.length];
vectorKeyOIs = new ArrayList();
for (int i = 0; i < keyOutputWriters.length; i++) {
vectorKeyOIs.add(keyOutputWriters[i].getObjectInspector());
}
} else {
assert nulls.length == keyOutputWriters.length;
}
for (int i = 0; i < keyOutputWriters.length; i++) {
currentKey[i] = keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]);
nulls[i] = currentKey[i] == null;
}
return currentValue.setFromOutput(
MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs,
sortableSortOrders, nullMarkers, notNullMarkers));
}
@Override
public JoinUtil.JoinResult setFromRow(Object row, List fields,
List ois) throws HiveException {
if (nulls == null) {
nulls = new boolean[fields.size()];
currentKey = new Object[fields.size()];
}
for (int keyIndex = 0; keyIndex < fields.size(); ++keyIndex) {
currentKey[keyIndex] = fields.get(keyIndex).evaluate(row);
nulls[keyIndex] = currentKey[keyIndex] == null;
}
return currentValue.setFromOutput(
MapJoinKey.serializeRow(output, currentKey, ois,
sortableSortOrders, nullMarkers, notNullMarkers));
}
@Override
public JoinUtil.JoinResult setFromOther(ReusableGetAdaptor other) {
assert other instanceof GetAdaptor;
GetAdaptor other2 = (GetAdaptor)other;
nulls = other2.nulls;
currentKey = other2.currentKey;
return currentValue.setFromOutput(other2.output);
}
@Override
public boolean hasAnyNulls(int fieldCount, boolean[] nullsafes) {
if (nulls == null || nulls.length == 0) return false;
for (int i = 0; i < nulls.length; i++) {
if (nulls[i] && (nullsafes == null || !nullsafes[i])) {
return true;
}
}
return false;
}
@Override
public MapJoinRowContainer getCurrentRows() {
return !currentValue.hasRows() ? null : currentValue;
}
@Override
public Object[] getCurrentKey() {
return currentKey;
}
// Direct access interfaces.
@Override
public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length,
BytesBytesMultiHashMap.Result hashMapResult) {
return currentValue.setDirect(bytes, offset, length, hashMapResult);
}
@Override
public int directSpillPartitionId() {
throw new UnsupportedOperationException("Getting the spill hash partition not supported");
}
}
/** Row container that gets and deserializes the rows on demand from bytes provided. */
private class ReusableRowContainer
implements MapJoinRowContainer, AbstractRowContainer.RowIterator> {
private byte aliasFilter;
/** Hash table wrapper specific to the container. */
private final BytesBytesMultiHashMap.Result hashMapResult;
/**
* Sometimes, when container is empty in multi-table mapjoin, we need to add a dummy row.
* This container does not normally support adding rows; this is for the dummy row.
*/
private List dummyRow = null;
private final ByteArrayRef uselessIndirection; // LBStruct needs ByteArrayRef
private final LazyBinaryStruct valueStruct;
private final boolean needsComplexObjectFixup;
private final ArrayList complexObjectArrayBuffer;
public ReusableRowContainer() {
if (internalValueOi != null) {
valueStruct = (LazyBinaryStruct)
LazyBinaryFactory.createLazyBinaryObject(internalValueOi);
needsComplexObjectFixup = hasComplexObjects(internalValueOi);
if (needsComplexObjectFixup) {
complexObjectArrayBuffer =
new ArrayList(
Collections.nCopies(internalValueOi.getAllStructFieldRefs().size(), null));
} else {
complexObjectArrayBuffer = null;
}
} else {
valueStruct = null; // No rows?
needsComplexObjectFixup = false;
complexObjectArrayBuffer = null;
}
uselessIndirection = new ByteArrayRef();
hashMapResult = new BytesBytesMultiHashMap.Result();
clearRows();
}
public JoinUtil.JoinResult setFromOutput(Output output) {
aliasFilter = hashMap.getValueResult(
output.getData(), 0, output.getLength(), hashMapResult);
dummyRow = null;
if (hashMapResult.hasRows()) {
return JoinUtil.JoinResult.MATCH;
} else {
aliasFilter = (byte) 0xff;
return JoinUtil.JoinResult.NOMATCH;
}
}
@Override
public boolean hasRows() {
return hashMapResult.hasRows() || (dummyRow != null);
}
@Override
public boolean isSingleRow() {
if (!hashMapResult.hasRows()) {
return (dummyRow != null);
}
return hashMapResult.isSingleRow();
}
// Implementation of row container
@Override
public AbstractRowContainer.RowIterator> rowIter() throws HiveException {
return this;
}
@Override
public int rowCount() throws HiveException {
// For performance reasons we do not want to chase the values to the end to determine
// the count. Use hasRows and isSingleRow instead.
throw new UnsupportedOperationException("Getting the row count not supported");
}
@Override
public void clearRows() {
// Doesn't clear underlying hashtable
hashMapResult.forget();
dummyRow = null;
aliasFilter = (byte) 0xff;
}
@Override
public byte getAliasFilter() throws HiveException {
return aliasFilter;
}
@Override
public MapJoinRowContainer copy() throws HiveException {
return this; // Independent of hashtable and can be modified, no need to copy.
}
// Implementation of row iterator
@Override
public List first() throws HiveException {
// A little strange that we forget the dummy row on read.
if (dummyRow != null) {
List result = dummyRow;
dummyRow = null;
return result;
}
WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first();
if (byteSegmentRef == null) {
return null;
} else {
return unpack(byteSegmentRef);
}
}
@Override
public List next() throws HiveException {
WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.next();
if (byteSegmentRef == null) {
return null;
} else {
return unpack(byteSegmentRef);
}
}
private List unpack(WriteBuffers.ByteSegmentRef ref) throws HiveException {
if (ref.getLength() == 0) {
return EMPTY_LIST; // shortcut, 0 length means no fields
}
uselessIndirection.setData(ref.getBytes());
valueStruct.init(uselessIndirection, (int)ref.getOffset(), ref.getLength());
List result;
if (!needsComplexObjectFixup) {
// Good performance for common case where small table has no complex objects.
result = valueStruct.getFieldsAsList();
} else {
// Convert the complex LazyBinary objects to standard (Java) objects so downstream
// operators like FileSinkOperator can serialize complex objects in the form they expect
// (i.e. Java objects).
result = getComplexFieldsAsList(
valueStruct, complexObjectArrayBuffer, internalValueOi);
}
return result;
}
@Override
public void addRow(List t) {
if (dummyRow != null || hashMapResult.hasRows()) {
throw new RuntimeException("Cannot add rows when not empty");
}
dummyRow = t;
}
// Various unsupported methods.
@Override
public void addRow(Object[] value) {
throw new RuntimeException(this.getClass().getCanonicalName() + " cannot add arrays");
}
@Override
public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out) {
throw new RuntimeException(this.getClass().getCanonicalName() + " cannot be serialized");
}
// Direct access.
public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length,
BytesBytesMultiHashMap.Result hashMapResult) {
aliasFilter = hashMap.getValueResult(bytes, offset, length, hashMapResult);
dummyRow = null;
if (hashMapResult.hasRows()) {
return JoinUtil.JoinResult.MATCH;
} else {
aliasFilter = (byte) 0xff;
return JoinUtil.JoinResult.NOMATCH;
}
}
}
public static boolean isSupportedKey(ObjectInspector keyOi) {
List extends StructField> keyFields = ((StructObjectInspector)keyOi).getAllStructFieldRefs();
for (StructField field : keyFields) {
if (!MapJoinKey.isSupportedField(field.getFieldObjectInspector())) {
return false;
}
}
return true;
}
@Override
public void dumpMetrics() {
hashMap.debugDumpMetrics();
}
@Override
public boolean hasSpill() {
return false;
}
@Override
public int size() {
return hashMap.size();
}
@Override
public long getEstimatedMemorySize() {
JavaDataModel jdm = JavaDataModel.get();
long size = 0;
size += hashMap.getEstimatedMemorySize();
size += directWriteHelper == null ? 0 : directWriteHelper.getEstimatedMemorySize();
size += writeHelper == null ? 0 : writeHelper.getEstimatedMemorySize();
size += sortableSortOrders == null ? 0 : jdm.lengthForBooleanArrayOfSize(sortableSortOrders.length);
size += nullMarkers == null ? 0 : jdm.lengthForByteArrayOfSize(nullMarkers.length);
size += notNullMarkers == null ? 0 : jdm.lengthForByteArrayOfSize(notNullMarkers.length);
size += jdm.arrayList(); // empty list
size += DEFAULT_OBJECT_INSPECTOR_MEMORY_SIZE;
return size;
}
}