All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec.persistence;


import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.MemoryEstimate;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator;
import org.apache.hadoop.hive.ql.exec.JoinUtil;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.NonMatchedSmallTableIterator;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBase;
import org.apache.hadoop.hive.ql.exec.vector.wrapper.VectorHashKeyWrapperBatch;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.util.JavaDataModel;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.ByteStream.RandomAccessOutput;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.WriteBuffers;
import org.apache.hadoop.hive.serde2.WriteBuffers.ByteSegmentRef;
import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.lazy.ByteArrayRef;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryFactory;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryStruct;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils;
import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.lazybinary.objectinspector.LazyBinaryStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.ShortObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.BinaryComparable;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hive.common.util.HashCodeUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Preconditions;

/**
 * Table container that serializes keys and values using LazyBinarySerDe into
 * BytesBytesMultiHashMap, with very low memory overhead. However,
 * there may be some perf overhead when retrieving rows.
 */
public class MapJoinBytesTableContainer
         implements MapJoinTableContainer, MapJoinTableContainerDirectAccess {
  private static final Logger LOG = LoggerFactory.getLogger(MapJoinTableContainer.class);

  // TODO: For object inspector fields, assigning 16KB for now. To better estimate the memory size every
  // object inspectors have to implement MemoryEstimate interface which is a lot of change with little benefit compared
  // to writing an instrumentation agent for object size estimation
  public static final long DEFAULT_OBJECT_INSPECTOR_MEMORY_SIZE = 16 * 1024L;

  private final BytesBytesMultiHashMap hashMap;
  /** The OI used to deserialize values. We never deserialize keys. */
  private LazyBinaryStructObjectInspector internalValueOi;
  /**
   * This is kind of hacky. Currently we get BinarySortableSerDe-serialized keys; we could
   * re-serialize them into LazyBinarySerDe, but instead we just reuse the bytes. However, to
   * compare the large table keys correctly when we do, we need to serialize them with correct
   * ordering. Hence, remember the ordering here; it is null if we do use LazyBinarySerDe.
   */
  private AbstractSerDe keySerde;
  private boolean[] sortableSortOrders;
  private byte[] nullMarkers;
  private byte[] notNullMarkers;
  private KeyValueHelper writeHelper;
  private DirectKeyValueWriter directWriteHelper;

  private final List EMPTY_LIST = new ArrayList(0);
  private String key;

  public MapJoinBytesTableContainer(Configuration hconf,
      MapJoinObjectSerDeContext valCtx, long keyCount, long memUsage) throws SerDeException {
    this(HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEKEYCOUNTADJUSTMENT),
        HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLETHRESHOLD),
        HiveConf.getFloatVar(hconf, HiveConf.ConfVars.HIVEHASHTABLELOADFACTOR),
        HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEHASHTABLEWBSIZE),
        valCtx, keyCount, memUsage);
  }

  private MapJoinBytesTableContainer(float keyCountAdj, int threshold, float loadFactor,
      int wbSize, MapJoinObjectSerDeContext valCtx, long keyCount, long memUsage)
          throws SerDeException {
    int newThreshold = HashMapWrapper.calculateTableSize(
        keyCountAdj, threshold, loadFactor, keyCount);
    hashMap = new BytesBytesMultiHashMap(newThreshold, loadFactor, wbSize, memUsage);
    directWriteHelper = new DirectKeyValueWriter();
  }

  public MapJoinBytesTableContainer(BytesBytesMultiHashMap hashMap) {
    this.hashMap = hashMap;
  }

  private LazyBinaryStructObjectInspector createInternalOi(
      MapJoinObjectSerDeContext valCtx) throws SerDeException {
    // We are going to use LBSerDe to serialize values; create OI for retrieval.
    List fields = ((StructObjectInspector)
        valCtx.getSerDe().getObjectInspector()).getAllStructFieldRefs();
    List colNames = new ArrayList(fields.size());
    List colOis = new ArrayList(fields.size());
    for (int i = 0; i < fields.size(); ++i) {
      StructField field = fields.get(i);
      colNames.add(field.getFieldName());
      // It would be nice if OI could return typeInfo...
      TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(
          field.getFieldObjectInspector().getTypeName());
      colOis.add(LazyBinaryUtils.getLazyBinaryObjectInspectorFromTypeInfo(typeInfo));
    }

    return LazyBinaryObjectInspectorFactory
        .getLazyBinaryStructObjectInspector(colNames, colOis);
  }

  public void setInternalValueOi(LazyBinaryStructObjectInspector internalValueOi) {
    this.internalValueOi = internalValueOi;
  }

  public void setSortableSortOrders(boolean[] sortableSortOrders) {
    this.sortableSortOrders = sortableSortOrders;
  }

  public void setNullMarkers(byte[] nullMarkers) {
    this.nullMarkers = nullMarkers;
  }

  public void setNotNullMarkers(byte[] notNullMarkers) {
    this.notNullMarkers = notNullMarkers;
  }

  public static interface KeyValueHelper extends BytesBytesMultiHashMap.KvSource, MemoryEstimate {
    void setKeyValue(Writable key, Writable val) throws SerDeException;
    /** Get hash value from the key. */
    int getHashFromKey() throws SerDeException;
  }

  private static class KeyValueWriter implements KeyValueHelper {
    private final AbstractSerDe keySerDe, valSerDe;
    private final StructObjectInspector keySoi, valSoi;
    private final List keyOis, valOis;
    private final Object[] keyObjs, valObjs;
    private final boolean hasFilterTag;

    public KeyValueWriter(
        AbstractSerDe keySerDe, AbstractSerDe valSerDe, boolean hasFilterTag) throws SerDeException {
      this.keySerDe = keySerDe;
      this.valSerDe = valSerDe;
      keySoi = (StructObjectInspector)keySerDe.getObjectInspector();
      valSoi = (StructObjectInspector)valSerDe.getObjectInspector();
      List keyFields = keySoi.getAllStructFieldRefs(),
          valFields = valSoi.getAllStructFieldRefs();
      keyOis = new ArrayList(keyFields.size());
      valOis = new ArrayList(valFields.size());
      for (int i = 0; i < keyFields.size(); ++i) {
        keyOis.add(keyFields.get(i).getFieldObjectInspector());
      }
      for (int i = 0; i < valFields.size(); ++i) {
        valOis.add(valFields.get(i).getFieldObjectInspector());
      }
      keyObjs = new Object[keyOis.size()];
      valObjs = new Object[valOis.size()];
      this.hasFilterTag = hasFilterTag;
    }

    @Override
    public void writeKey(RandomAccessOutput dest) throws SerDeException {
      LazyBinarySerDe.serializeStruct(dest, keyObjs, keyOis);
    }

    @Override
    public void writeValue(RandomAccessOutput dest) throws SerDeException {
      LazyBinarySerDe.serializeStruct(dest, valObjs, valOis);
    }

    @Override
    public void setKeyValue(Writable key, Writable val) throws SerDeException {
      Object keyObj = keySerDe.deserialize(key), valObj = valSerDe.deserialize(val);
      List keyFields = keySoi.getAllStructFieldRefs(),
          valFields = valSoi.getAllStructFieldRefs();
      for (int i = 0; i < keyFields.size(); ++i) {
        keyObjs[i] = keySoi.getStructFieldData(keyObj, keyFields.get(i));
      }
      for (int i = 0; i < valFields.size(); ++i) {
        valObjs[i] = valSoi.getStructFieldData(valObj, valFields.get(i));
      }
    }

    @Override
    public byte updateStateByte(Byte previousValue) {
      if (!hasFilterTag) return (byte)0xff;
      byte aliasFilter = (previousValue == null) ? (byte)0xff : previousValue.byteValue();
      aliasFilter &= ((ShortWritable)valObjs[valObjs.length - 1]).get();
      return aliasFilter;
    }

    @Override
    public int getHashFromKey() throws SerDeException {
      throw new UnsupportedOperationException("Not supported for MapJoinBytesTableContainer");
    }

    @Override
    public long getEstimatedMemorySize() {
      JavaDataModel jdm = JavaDataModel.get();
      long size = 0;
      size += keySerDe == null ? 0 : jdm.object();
      size += valSerDe == null ? 0 : jdm.object();
      size += keySoi == null ? 0 : DEFAULT_OBJECT_INSPECTOR_MEMORY_SIZE;
      size += valSoi == null ? 0 : DEFAULT_OBJECT_INSPECTOR_MEMORY_SIZE;
      size += keyOis == null ? 0 : jdm.arrayList() + keyOis.size() * DEFAULT_OBJECT_INSPECTOR_MEMORY_SIZE;
      size += valOis == null ? 0 : jdm.arrayList() + valOis.size() * DEFAULT_OBJECT_INSPECTOR_MEMORY_SIZE;
      size += keyObjs == null ? 0 : jdm.array() + keyObjs.length * jdm.object();
      size += valObjs == null ? 0 : jdm.array() + valObjs.length * jdm.object();
      size += jdm.primitive1();
      return size;
    }
  }

  static class LazyBinaryKvWriter implements KeyValueHelper {
    private final LazyBinaryStruct.SingleFieldGetter filterGetter;
    private Writable key, value;
    private final AbstractSerDe keySerDe;
    private Boolean hasTag = null; // sanity check - we should not receive keys with tags

    public LazyBinaryKvWriter(AbstractSerDe keySerDe, LazyBinaryStructObjectInspector valSoi,
        boolean hasFilterTag) throws SerDeException {
      this.keySerDe = keySerDe;
      if (hasFilterTag) {
        List fields = valSoi.getAllStructFieldRefs();
        int ix = fields.size() - 1;
        if (!(fields.get(ix).getFieldObjectInspector() instanceof ShortObjectInspector)) {
          throw new SerDeException("Has filter tag, but corresponding OI is " +
              fields.get(ix).getFieldObjectInspector());
        }
        filterGetter = new LazyBinaryStruct.SingleFieldGetter(valSoi, fields.size() - 1);
      } else {
        filterGetter = null;
      }
    }

    @Override
    public void writeKey(RandomAccessOutput dest) throws SerDeException {
      if (!(key instanceof BinaryComparable)) {
        throw new SerDeException("Unexpected type " + key.getClass().getCanonicalName());
      }
      sanityCheckKeyForTag();
      BinaryComparable b = (BinaryComparable)key;
      dest.write(b.getBytes(), 0, b.getLength() - (hasTag ? 1 : 0));
    }

    @Override
    public int getHashFromKey() throws SerDeException {
      if (!(key instanceof BinaryComparable)) {
        throw new SerDeException("Unexpected type " + key.getClass().getCanonicalName());
      }
      sanityCheckKeyForTag();
      BinaryComparable b = (BinaryComparable)key;
      return HashCodeUtil.murmurHash(b.getBytes(), 0, b.getLength() - (hasTag ? 1 : 0));
    }

    /**
     * If we received data with tags from ReduceSinkOperators, no keys will match. This should
     * not happen, but is important enough that we want to find out and work around it if some
     * optimized change causes RSO to pass on tags.
     */
    private void sanityCheckKeyForTag() throws SerDeException {
      if (hasTag != null) return;
      BinaryComparable b = (BinaryComparable)key;
      Object o = keySerDe.deserialize(key);
      StructObjectInspector soi = (StructObjectInspector)keySerDe.getObjectInspector();
      List fields = soi.getAllStructFieldRefs();
      Object[] data = new Object[fields.size()];
      List fois = new ArrayList(fields.size());
      for (int i = 0; i < fields.size(); i++) {
        data[i] = soi.getStructFieldData(o, fields.get(i));
        fois.add(fields.get(i).getFieldObjectInspector());
      }
      Output output = new Output();
      boolean[] sortableSortOrders = new boolean[fields.size()];
      Arrays.fill(sortableSortOrders, false);
      byte[] columnNullMarker = new byte[fields.size()];
      Arrays.fill(columnNullMarker, BinarySortableSerDe.ZERO);
      byte[] columnNotNullMarker = new byte[fields.size()];
      Arrays.fill(columnNotNullMarker, BinarySortableSerDe.ONE);
      BinarySortableSerDe.serializeStruct(output, data, fois, sortableSortOrders,
              columnNullMarker, columnNotNullMarker);
      hasTag = (output.getLength() != b.getLength());
      if (hasTag) {
        LOG.error("Tag found in keys and will be removed. This should not happen.");
        if (output.getLength() != (b.getLength() - 1)) {
          throw new SerDeException(
              "Unexpected tag: " + b.getLength() + " reserialized to " + output.getLength());
        }
      }
    }

    @Override
    public void writeValue(RandomAccessOutput dest) throws SerDeException {
      if (!(value instanceof BinaryComparable)) {
        throw new SerDeException("Unexpected type " + value.getClass().getCanonicalName());
      }
      BinaryComparable b = (BinaryComparable)value;
      dest.write(b.getBytes(), 0, b.getLength());
    }

    @Override
    public void setKeyValue(Writable key, Writable val) {
      this.key = key;
      this.value = val;
    }

    @Override
    public byte updateStateByte(Byte previousValue) {
      if (!hasTag || filterGetter == null) {
        return (byte) 0xff;
      }
      byte aliasFilter = (previousValue == null) ? (byte)0xff : previousValue.byteValue();
      BinaryComparable binaryComparableValue = (BinaryComparable) value;
      if (binaryComparableValue.getLength() == 0) {

        // Skip empty values just like MapJoinEagerRowContainer.read does.
        return (byte) 0xff;
      }
      filterGetter.init(binaryComparableValue);
      aliasFilter &= filterGetter.getShort();
      return aliasFilter;
    }

    @Override
    public long getEstimatedMemorySize() {
      JavaDataModel jdm = JavaDataModel.get();
      long size = 0;
      size += (4 * jdm.object());
      size += jdm.primitive1();
      return size;
    }
  }

  /*
   * An implementation of KvSource that can handle key and value as BytesWritable objects.
   */
  protected static class DirectKeyValueWriter implements KeyValueHelper {

    private BytesWritable key;
    private BytesWritable val;

    @Override
    public void setKeyValue(Writable key, Writable val) throws SerDeException {
      this.key = (BytesWritable) key;
      this.val = (BytesWritable) val;
    }

    @Override
    public void writeKey(RandomAccessOutput dest) throws SerDeException {
      byte[] keyBytes = key.getBytes();
      int keyLength = key.getLength();
      dest.write(keyBytes, 0, keyLength);
    }

    @Override
    public void writeValue(RandomAccessOutput dest) throws SerDeException {
      byte[] valueBytes = val.getBytes();
      int valueLength = val.getLength();
      dest.write(valueBytes, 0 , valueLength);
    }

    @Override
    public byte updateStateByte(Byte previousValue) {
      // Not used by the direct access client -- native vector map join.
      throw new UnsupportedOperationException("Updating the state by not supported");
    }

    @Override
    public int getHashFromKey() throws SerDeException {
      byte[] keyBytes = key.getBytes();
      int keyLength = key.getLength();
      return HashCodeUtil.murmurHash(keyBytes, 0, keyLength);
    }

    @Override
    public long getEstimatedMemorySize() {
      JavaDataModel jdm = JavaDataModel.get();
      long size = 0;
      size += jdm.object() + (key == null ? 0 : key.getCapacity());
      size += jdm.object() + (val == null ? 0 : val.getCapacity());
      return size;
    }
  }

  @Override
  public void setSerde(MapJoinObjectSerDeContext keyContext, MapJoinObjectSerDeContext valueContext)
      throws SerDeException {
    keySerde = keyContext.getSerDe();
    AbstractSerDe valSerde = valueContext.getSerDe();
    if (writeHelper == null) {
      LOG.info("Initializing container with " + keySerde.getClass().getName() + " and "
          + valSerde.getClass().getName());
      if (keySerde instanceof BinarySortableSerDe && valSerde instanceof LazyBinarySerDe) {
        LazyBinaryStructObjectInspector valSoi =
            (LazyBinaryStructObjectInspector) valSerde.getObjectInspector();
        writeHelper = new LazyBinaryKvWriter(keySerde, valSoi, valueContext.hasFilterTag());
        internalValueOi = valSoi;
        sortableSortOrders = ((BinarySortableSerDe) keySerde).getSortOrders();
        nullMarkers = ((BinarySortableSerDe) keySerde).getNullMarkers();
        notNullMarkers = ((BinarySortableSerDe) keySerde).getNotNullMarkers();
      } else {
        writeHelper = new KeyValueWriter(keySerde, valSerde, valueContext.hasFilterTag());
        internalValueOi = createInternalOi(valueContext);
        sortableSortOrders = null;
        nullMarkers = null;
        notNullMarkers = null;
      }
    }
  }

  @Override
  public void setKey(String key) {
    this.key = key;
  }

  @Override
  public String getKey() {
    return key;
  }

  @SuppressWarnings("deprecation")
  @Override
  public MapJoinKey putRow(Writable currentKey, Writable currentValue) throws SerDeException {
    writeHelper.setKeyValue(currentKey, currentValue);
    hashMap.put(writeHelper, -1);
    return null; // there's no key to return
  }

  @Override
  public void clear() {
    // Don't clear the hash table - reuse is possible. GC will take care of it.
  }

  @Override
  public MapJoinKey getAnyKey() {
    return null; // This table has no keys.
  }

  @Override
  public ReusableGetAdaptor createGetter(MapJoinKey keyTypeFromLoader) {
    if (keyTypeFromLoader != null) {
      throw new AssertionError("No key expected from loader but got " + keyTypeFromLoader);
    }
    return new GetAdaptor();
  }

  @Override
  public NonMatchedSmallTableIterator createNonMatchedSmallTableIterator(
      MatchTracker matchTracker) {
    return new NonMatchedSmallTableIteratorImpl(matchTracker);
  }

  @Override
  public void seal() {
    hashMap.seal();
  }

  // Direct access interfaces.

  @Override
  public void put(Writable currentKey, Writable currentValue) throws SerDeException {
    directWriteHelper.setKeyValue(currentKey, currentValue);
    hashMap.put(directWriteHelper, -1);
  }

  public static boolean hasComplexObjects(LazyBinaryStructObjectInspector lazyBinaryStructObjectInspector) {
    List fields = lazyBinaryStructObjectInspector.getAllStructFieldRefs();

    for (StructField field : fields) {
      if (field.getFieldObjectInspector().getCategory() != Category.PRIMITIVE) {
        return true;
      }
    }
    return false;
  }

  /*
   * For primitive types, use LazyBinary's object.
   * For complex types, make a standard (Java) object from LazyBinary's object.
   */
  public static List getComplexFieldsAsList(LazyBinaryStruct lazyBinaryStruct,
      ArrayList objectArrayBuffer, LazyBinaryStructObjectInspector lazyBinaryStructObjectInspector) {

    List fields = lazyBinaryStructObjectInspector.getAllStructFieldRefs();
    for (int i = 0; i < fields.size(); i++) {
      StructField field = fields.get(i);
      ObjectInspector objectInspector = field.getFieldObjectInspector();
      Category category = objectInspector.getCategory();
      Object object = lazyBinaryStruct.getField(i);
      if (category == Category.PRIMITIVE) {
        objectArrayBuffer.set(i, object);
      } else {
        objectArrayBuffer.set(i, ObjectInspectorUtils.copyToStandardObject(
            object, objectInspector, ObjectInspectorCopyOption.WRITABLE));
      }
    }
    return objectArrayBuffer;
  }

  /** Implementation of ReusableGetAdaptor that has Output for key serialization; row
   * container is also created once and reused for every row. */
  private class GetAdaptor implements ReusableGetAdaptor, ReusableGetAdaptorDirectAccess {

    private Object[] currentKey;
    private boolean[] nulls;
    private List vectorKeyOIs;

    private final ReusableRowContainer currentValue;
    private final Output output;

    public GetAdaptor() {
      currentValue = new ReusableRowContainer();
      output = new Output();
    }

    @Override
    public JoinUtil.JoinResult setFromVector(VectorHashKeyWrapperBase kw,
        VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch)
        throws HiveException {
      if (nulls == null) {
        nulls = new boolean[keyOutputWriters.length];
        currentKey = new Object[keyOutputWriters.length];
        vectorKeyOIs = new ArrayList();
        for (int i = 0; i < keyOutputWriters.length; i++) {
          vectorKeyOIs.add(keyOutputWriters[i].getObjectInspector());
        }
      } else {
        assert nulls.length == keyOutputWriters.length;
      }
      for (int i = 0; i < keyOutputWriters.length; i++) {
        currentKey[i] = keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]);
        nulls[i] = currentKey[i] == null;
      }
      return currentValue.setFromOutput(
          MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs,
                  sortableSortOrders, nullMarkers, notNullMarkers));
    }

    /*
     * This variation is for FULL OUTER MapJoin.  It does key match tracking only if the key has
     * no NULLs.
     */
    @Override
    public JoinUtil.JoinResult setFromVectorNoNulls(VectorHashKeyWrapperBase kw,
        VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch,
        MatchTracker matchTracker)
        throws HiveException {
      if (nulls == null) {
        nulls = new boolean[keyOutputWriters.length];
        currentKey = new Object[keyOutputWriters.length];
        vectorKeyOIs = new ArrayList();
        for (int i = 0; i < keyOutputWriters.length; i++) {
          vectorKeyOIs.add(keyOutputWriters[i].getObjectInspector());
        }
      } else {
        assert nulls.length == keyOutputWriters.length;
      }
      boolean hasNulls = false;
      for (int i = 0; i < keyOutputWriters.length; i++) {
        currentKey[i] = keyWrapperBatch.getWritableKeyValue(kw, i, keyOutputWriters[i]);
        if (currentKey[i] == null) {
          nulls[i] = true;
          hasNulls = true;
        } else {
          nulls[i] = false;
        }
      }
      if (hasNulls) {
        currentValue.reset();
        return JoinUtil.JoinResult.NOMATCH;
      }
      return currentValue.setFromOutput(
          MapJoinKey.serializeRow(output, currentKey, vectorKeyOIs,
                  sortableSortOrders, nullMarkers, notNullMarkers), matchTracker);
    }

    @Override
    public JoinUtil.JoinResult setFromRow(Object row, List fields,
        List ois) throws HiveException {
      if (nulls == null) {
        nulls = new boolean[fields.size()];
        currentKey = new Object[fields.size()];
      }
      for (int keyIndex = 0; keyIndex < fields.size(); ++keyIndex) {
        currentKey[keyIndex] = fields.get(keyIndex).evaluate(row);
        nulls[keyIndex] = currentKey[keyIndex] == null;
      }
      return currentValue.setFromOutput(
          MapJoinKey.serializeRow(output, currentKey, ois,
                  sortableSortOrders, nullMarkers, notNullMarkers));
    }

    /*
     * This variation is for FULL OUTER MapJoin.  It does key match tracking only if the key has
     * no NULLs.
     */
    @Override
    public JoinUtil.JoinResult setFromRowNoNulls(Object row, List fields,
        List ois, MatchTracker matchTracker) throws HiveException {
      if (nulls == null) {
        nulls = new boolean[fields.size()];
        currentKey = new Object[fields.size()];
      }
      boolean hasNulls = false;
      for (int keyIndex = 0; keyIndex < fields.size(); ++keyIndex) {
        currentKey[keyIndex] = fields.get(keyIndex).evaluate(row);
        if (currentKey[keyIndex] == null) {
          nulls[keyIndex] = true;
          hasNulls = true;
        } else {
          nulls[keyIndex] = false;
        }
      }
      if (hasNulls) {
        currentValue.reset();
        return JoinUtil.JoinResult.NOMATCH;
      }
      return currentValue.setFromOutput(
          MapJoinKey.serializeRow(output, currentKey, ois,
                  sortableSortOrders, nullMarkers, notNullMarkers), matchTracker);
    }

    @Override
    public JoinUtil.JoinResult setFromOther(ReusableGetAdaptor other) {
      assert other instanceof GetAdaptor;
      GetAdaptor other2 = (GetAdaptor)other;
      nulls = other2.nulls;
      currentKey = other2.currentKey;
      return currentValue.setFromOutput(other2.output);
    }

    @Override
    public boolean hasAnyNulls(int fieldCount, boolean[] nullsafes) {
      if (nulls == null || nulls.length == 0) return false;
      for (int i = 0; i < nulls.length; i++) {
        if (nulls[i] && (nullsafes == null || !nullsafes[i])) {
          return true;
        }
      }
      return false;
    }

    @Override
    public MapJoinRowContainer getCurrentRows() {
      return !currentValue.hasRows() ? null : currentValue;
    }

    @Override
    public Object[] getCurrentKey() {
      return currentKey;
    }

    // Direct access interfaces.

    @Override
    public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length,
        BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) {
      return currentValue.setDirect(
          bytes, offset, length, hashMapResult, matchTracker);
    }

    @Override
    public MatchTracker createMatchTracker() {
      return MatchTracker.create(hashMap.getNumHashBuckets());
    }

    @Override
    public int directSpillPartitionId() {
      throw new UnsupportedOperationException("Getting the spill hash partition not supported");
    }
  }

  /** Row container that gets and deserializes the rows on demand from bytes provided. */
  private class ReusableRowContainer
    implements MapJoinRowContainer, AbstractRowContainer.RowIterator> {
    private byte aliasFilter;

    /** Hash table wrapper specific to the container. */
    private final BytesBytesMultiHashMap.Result hashMapResult;

    /**
     * Sometimes, when container is empty in multi-table mapjoin, we need to add a dummy row.
     * This container does not normally support adding rows; this is for the dummy row.
     */
    private List dummyRow = null;
    // TODO: the API here is not ideal, first/next + hasRows are redundant.
    private boolean wasFirstCalledOnDummyRow = false;

    private final ByteArrayRef uselessIndirection; // LBStruct needs ByteArrayRef
    private final LazyBinaryStruct valueStruct;
    private final boolean needsComplexObjectFixup;
    private final ArrayList complexObjectArrayBuffer;
    private final WriteBuffers.Position noResultReadPos;

    public ReusableRowContainer() {
      if (internalValueOi != null) {
        valueStruct = (LazyBinaryStruct)
            LazyBinaryFactory.createLazyBinaryObject(internalValueOi);
        needsComplexObjectFixup = hasComplexObjects(internalValueOi);
        if (needsComplexObjectFixup) {
          complexObjectArrayBuffer =
              new ArrayList(
                  Collections.nCopies(internalValueOi.getAllStructFieldRefs().size(), null));
        } else {
          complexObjectArrayBuffer = null;
        }
      } else {
        valueStruct = null; // No rows?
        needsComplexObjectFixup =  false;
        complexObjectArrayBuffer = null;
      }
      uselessIndirection = new ByteArrayRef();
      hashMapResult = new BytesBytesMultiHashMap.Result();
      noResultReadPos = new WriteBuffers.Position();
      clearRows();
    }

    public BytesBytesMultiHashMap.Result getHashMapResult() {
      return hashMapResult;
    }

    public JoinUtil.JoinResult setFromOutput(Output output) {

      aliasFilter = hashMap.getValueResult(
              output.getData(), 0, output.getLength(), hashMapResult, /* matchTracker */ null);
      dummyRow = null;
      wasFirstCalledOnDummyRow = false;
      if (hashMapResult.hasRows()) {
        return JoinUtil.JoinResult.MATCH;
      } else {
        aliasFilter = (byte) 0xff;
        return JoinUtil.JoinResult.NOMATCH;
      }
    }

    public JoinUtil.JoinResult setFromOutput(Output output, MatchTracker matchTracker) {

      aliasFilter = hashMap.getValueResult(
              output.getData(), 0, output.getLength(), hashMapResult, matchTracker);
      dummyRow = null;
      wasFirstCalledOnDummyRow = false;
      if (hashMapResult.hasRows()) {
        return JoinUtil.JoinResult.MATCH;
      } else {
        aliasFilter = (byte) 0xff;
        return JoinUtil.JoinResult.NOMATCH;
      }
    }

    public void reset() {
      hashMapResult.forget();
    }

    @Override
    public boolean hasRows() {
      return hashMapResult.hasRows() || (dummyRow != null && !wasFirstCalledOnDummyRow);
    }

    @Override
    public boolean isSingleRow() {
      if (!hashMapResult.hasRows()) {
        return (dummyRow != null);
      }
      return hashMapResult.isSingleRow();
    }

    // Implementation of row container
    @Override
    public AbstractRowContainer.RowIterator> rowIter() throws HiveException {
      return this;
    }

    @Override
    public int rowCount() throws HiveException {
      // For performance reasons we do not want to chase the values to the end to determine
      // the count.  Use hasRows and isSingleRow instead.
      throw new UnsupportedOperationException("Getting the row count not supported");
    }

    @Override
    public void clearRows() {
      // Doesn't clear underlying hashtable
      hashMapResult.forget();
      dummyRow = null;
      wasFirstCalledOnDummyRow = false;
      aliasFilter = (byte) 0xff;
    }

    @Override
    public byte getAliasFilter() throws HiveException {
      return aliasFilter;
    }

    @Override
    public MapJoinRowContainer copy() throws HiveException {
      return this; // Independent of hashtable and can be modified, no need to copy.
    }

    // Implementation of row iterator
    @Override
    public List first() throws HiveException {
      if (dummyRow != null) {
        wasFirstCalledOnDummyRow = true;
        return dummyRow;
      }

      WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.first();
      if (byteSegmentRef == null) {
        return null;
      } else {
        return unpack(byteSegmentRef);
      }

    }

    @Override
    public List next() throws HiveException {
      if (dummyRow != null) {
        // TODO: what should we do if first was never called? for now, assert for clarity
        if (!wasFirstCalledOnDummyRow) {
          throw new AssertionError("next called without first");
        }
        return null;
      }

      WriteBuffers.ByteSegmentRef byteSegmentRef = hashMapResult.next();
      if (byteSegmentRef == null) {
        return null;
      } else {
        return unpack(byteSegmentRef);
      }

    }

    private List unpack(WriteBuffers.ByteSegmentRef ref) throws HiveException {
      if (ref.getLength() == 0) {
        return EMPTY_LIST; // shortcut, 0 length means no fields
      }
      uselessIndirection.setData(ref.getBytes());
      valueStruct.init(uselessIndirection, (int)ref.getOffset(), ref.getLength());
      List result;
      if (!needsComplexObjectFixup) {
        // Good performance for common case where small table has no complex objects.
        result = valueStruct.getFieldsAsList();
      } else {
        // Convert the complex LazyBinary objects to standard (Java) objects so downstream
        // operators like FileSinkOperator can serialize complex objects in the form they expect
        // (i.e. Java objects).
        result = getComplexFieldsAsList(
            valueStruct, complexObjectArrayBuffer, internalValueOi);
      }
      return result;
    }

    @Override
    public void addRow(List t) {
      if (dummyRow != null || hashMapResult.hasRows()) {
        throw new RuntimeException("Cannot add rows when not empty");
      }
      dummyRow = t;
      wasFirstCalledOnDummyRow = false;
    }

    // Various unsupported methods.
    @Override
    public void addRow(Object[] value) {
      throw new RuntimeException(this.getClass().getCanonicalName() + " cannot add arrays");
    }
    @Override
    public void write(MapJoinObjectSerDeContext valueContext, ObjectOutputStream out) {
      throw new RuntimeException(this.getClass().getCanonicalName() + " cannot be serialized");
    }

    // Direct access.

    public JoinUtil.JoinResult setDirect(byte[] bytes, int offset, int length,
        BytesBytesMultiHashMap.Result hashMapResult, MatchTracker matchTracker) {
      aliasFilter = hashMap.getValueResult(bytes, offset, length, hashMapResult, matchTracker);
      dummyRow = null;
      if (hashMapResult.hasRows()) {
        return JoinUtil.JoinResult.MATCH;
      } else {
        aliasFilter = (byte) 0xff;
        return JoinUtil.JoinResult.NOMATCH;
      }
    }
  }

  /**
   * For FULL OUTER MapJoin: Iterates through the Small Table hash table and returns the key and
   * value rows for any non-matched keys.
   */
  private class NonMatchedSmallTableIteratorImpl implements NonMatchedSmallTableIterator {

    private final MatchTracker matchTracker;

    private int currentIndex;

    private final WriteBuffers.ByteSegmentRef keyRef;
    private final BytesWritable bytesWritable;
    private final ReusableRowContainer currentValue;

    NonMatchedSmallTableIteratorImpl(MatchTracker matchTracker) {
      this.matchTracker = matchTracker;

      Preconditions.checkState(keySerde != null);

      currentIndex = -1;

      keyRef = new WriteBuffers.ByteSegmentRef();
      bytesWritable = new BytesWritable();

      currentValue = new ReusableRowContainer();
    }

    @Override
    public boolean isNext() {

      // If another non-matched key is found, the key bytes will be referenced by keyRef, and
      // our ReusableRowContainer's BytesBytesMultiHashMap.Result will reference the value rows.
      currentIndex =
          hashMap.findNextNonMatched(
              currentIndex, keyRef, currentValue.getHashMapResult(), matchTracker);
      return (currentIndex != -1);
    }

    @Override
    public List getCurrentKey() throws HiveException {
      List deserializedList =
          MapJoinKey.deserializeRow(
              keyRef.getBytes(),
              (int) keyRef.getOffset(),
              keyRef.getLength(),
              bytesWritable, keySerde);
      return deserializedList;
    }

    @Override
    public ByteSegmentRef getCurrentKeyAsRef() {
      return keyRef;
    }

    @Override
    public MapJoinRowContainer getCurrentRows() {
      return currentValue;
    }

    @Override
    public BytesBytesMultiHashMap.Result getHashMapResult() {
      return currentValue.getHashMapResult();
    }
  }

  public static boolean isSupportedKey(ObjectInspector keyOi) {
    List keyFields = ((StructObjectInspector)keyOi).getAllStructFieldRefs();
    for (StructField field : keyFields) {
      if (!MapJoinKey.isSupportedField(field.getFieldObjectInspector())) {
        return false;
      }
    }
    return true;
  }

  @Override
  public void dumpMetrics() {
    hashMap.debugDumpMetrics();
  }

  @Override
  public boolean hasSpill() {
    return false;
  }

  @Override
  public int size() {
    return hashMap.size();
  }

  @Override
  public long getEstimatedMemorySize() {
    JavaDataModel jdm = JavaDataModel.get();
    long size = 0;
    size += hashMap.getEstimatedMemorySize();
    size += directWriteHelper == null ? 0 : directWriteHelper.getEstimatedMemorySize();
    size += writeHelper == null ? 0 : writeHelper.getEstimatedMemorySize();
    size += sortableSortOrders == null ? 0 : jdm.lengthForBooleanArrayOfSize(sortableSortOrders.length);
    size += nullMarkers == null ? 0 : jdm.lengthForByteArrayOfSize(nullMarkers.length);
    size += notNullMarkers == null ? 0 : jdm.lengthForByteArrayOfSize(notNullMarkers.length);
    size += jdm.arrayList(); // empty list
    size += DEFAULT_OBJECT_INSPECTOR_MEMORY_SIZE;
    return size;
  }
}