org.apache.hadoop.hive.ql.exec.MapJoinOperator Maven / Gradle / Ivy

Go to download
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec;

import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Future;
import java.util.concurrent.locks.ReentrantLock;

import com.facebook.presto.hive.$internal.org.apache.commons.lang3.tuple.ImmutablePair;
import com.facebook.presto.hive.$internal.org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.common.ObjectPair;
import org.apache.hadoop.hive.conf.Constants;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.conf.HiveConf.ConfVars;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.HashTableLoaderFactory;
import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext;
import org.apache.hadoop.hive.ql.exec.persistence.BytesBytesMultiHashMap;
import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer;
import org.apache.hadoop.hive.ql.exec.persistence.HybridHashTableContainer.HashPartition;
import org.apache.hadoop.hive.ql.exec.persistence.KeyValueContainer;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinBytesTableContainer.KeyValueHelper;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectSerDeContext;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinRowContainer;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainer.ReusableGetAdaptor;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinTableContainerSerDe;
import org.apache.hadoop.hive.ql.exec.persistence.ObjectContainer;
import org.apache.hadoop.hive.ql.exec.persistence.UnwrapRowContainer;
import org.apache.hadoop.hive.ql.exec.spark.SparkUtilities;
import org.apache.hadoop.hive.ql.exec.tez.LlapObjectCache;
import org.apache.hadoop.hive.ql.exec.tez.LlapObjectSubCache;
import org.apache.hadoop.hive.ql.io.HiveKey;
import org.apache.hadoop.hive.ql.log.PerfLogger;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.JoinCondDesc;
import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.MapJoinDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hive.common.util.ReflectionUtil;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;

import com.facebook.presto.hive.$internal.com.google.common.annotations.VisibleForTesting;

import com.esotericsoftware.kryo.KryoException;

/**
 * Map side Join operator implementation.
 */
public class MapJoinOperator extends AbstractMapJoinOperator implements Serializable {

  private static final long serialVersionUID = 1L;
  private static final Logger LOG = LoggerFactory.getLogger(MapJoinOperator.class.getName());
  private static final String CLASS_NAME = MapJoinOperator.class.getName();
  private transient final PerfLogger perfLogger = SessionState.getPerfLogger();

  private transient String cacheKey;
  private transient ObjectCache cache;

  protected HashTableLoader loader;

  protected transient MapJoinTableContainer[] mapJoinTables;
  private transient MapJoinTableContainerSerDe[] mapJoinTableSerdes;
  private transient boolean hashTblInitedOnce;
  protected transient ReusableGetAdaptor[] hashMapRowGetters;

  private UnwrapRowContainer[] unwrapContainer;
  private transient Configuration hconf;
  private transient boolean hybridMapJoinLeftover;  // whether there's spilled data to be processed
  protected transient MapJoinBytesTableContainer[] spilledMapJoinTables;  // used to hold restored
                                                                          // spilled small tables
  protected HybridHashTableContainer firstSmallTable; // The first small table;
                                                      // Only this table has spilled big table rows

  protected transient boolean isTestingNoHashTableLoad;
  // Only used in bucket map join.
  private transient int numBuckets = -1;
  private transient int bucketId = -1;
  private transient ReentrantLock subCacheLock = new ReentrantLock();

  /** Kryo ctor. */
  protected MapJoinOperator() {
    super();
  }

  public MapJoinOperator(CompilationOpContext ctx) {
    super(ctx);
  }

  public MapJoinOperator(AbstractMapJoinOperator mjop) {
    super(mjop);
  }

  /*
   * We need the base (operator.java) implementation of start/endGroup.
   * The parent class has functionality in those that map join can't use.
   * Note: The mapjoin can be run in the reducer only on Tez.
   */
  @Override
  public void endGroup() throws HiveException {
    defaultEndGroup();
  }

  @Override
  public void startGroup() throws HiveException {
    defaultStartGroup();
  }

  protected HashTableLoader getHashTableLoader(Configuration hconf) {
    return HashTableLoaderFactory.getLoader(hconf);
  }

  public String getCacheKey() {
    return cacheKey;
  }

  @Override
  protected void initializeOp(Configuration hconf) throws HiveException {
    this.hconf = hconf;
    unwrapContainer = new UnwrapRowContainer[conf.getTagLength()];

    super.initializeOp(hconf);

    int tagLen = conf.getTagLength();

    // On Tez only: The hash map might already be cached in the container we run
    // the task in. On MR: The cache is a no-op.
    String queryId = HiveConf.getVar(hconf, HiveConf.ConfVars.HIVEQUERYID);
    cacheKey = "HASH_MAP_" + this.getOperatorId() + "_container";
    cache = ObjectCacheFactory.getCache(hconf, queryId, false);
    loader = getHashTableLoader(hconf);

    bucketId = hconf.getInt(Constants.LLAP_BUCKET_ID, -1);
    numBuckets = hconf.getInt(Constants.LLAP_NUM_BUCKETS, -1);

    hashMapRowGetters = null;

    mapJoinTables = new MapJoinTableContainer[tagLen];
    mapJoinTableSerdes = new MapJoinTableContainerSerDe[tagLen];
    hashTblInitedOnce = false;

    // Reset grace hashjoin context so that there is no state maintained when operator/work is
    // retrieved from object cache
    hybridMapJoinLeftover = false;
    firstSmallTable = null;

    generateMapMetaData();

    isTestingNoHashTableLoad = HiveConf.getBoolVar(hconf,
        HiveConf.ConfVars.HIVE_MAPJOIN_TESTING_NO_HASH_TABLE_LOAD);
    if (isTestingNoHashTableLoad) {
      return;
    }

    final ExecMapperContext mapContext = getExecContext();
    final MapredContext mrContext = MapredContext.get();

    if (!conf.isBucketMapJoin() && !conf.isDynamicPartitionHashJoin()) {
      /*
       * The issue with caching in case of bucket map join is that different tasks
       * process different buckets and if the container is reused to join a different bucket,
       * join results can be incorrect. The cache is keyed on operator id and for bucket map join
       * the operator does not change but data needed is different. For a proper fix, this
       * requires changes in the Tez API with regard to finding bucket id and
       * also ability to schedule tasks to re-use containers that have cached the specific bucket.
       */
      if (LOG.isDebugEnabled()) {
        LOG.debug("This is not bucket map join, so cache");
      }

      Future> future =
          cache.retrieveAsync(
              cacheKey, () ->loadHashTable(mapContext, mrContext));
      asyncInitOperations.add(future);
    } else if (!isInputFileChangeSensitive(mapContext)) {
      loadHashTable(mapContext, mrContext);
      hashTblInitedOnce = true;
    }
  }

  @SuppressWarnings("unchecked")
  @Override
  protected void completeInitializationOp(Object[] os) throws HiveException {
    if (os.length != 0) {
      Pair pair =
          (Pair) os[0];

      boolean spilled = false;
      for (MapJoinTableContainer container : pair.getLeft()) {
        if (container != null) {
          spilled = spilled || container.hasSpill();
        }
      }

      if (spilled) {
        // we can't use the cached table because it has spilled.

        loadHashTable(getExecContext(), MapredContext.get());
      } else {
        if (LOG.isDebugEnabled()) {
          String s = "Using tables from cache: [";
          for (MapJoinTableContainer c : pair.getLeft()) {
            s += ((c == null) ? "null" : c.getClass().getSimpleName()) + ", ";
          }
          LOG.debug(s + "]");
        }
        // let's use the table from the cache.
        mapJoinTables = pair.getLeft();
        mapJoinTableSerdes = pair.getRight();
      }
      hashTblInitedOnce = true;
    }

    if (this.getExecContext() != null) {
      // reset exec context so that initialization of the map operator happens
      // properly
      this.getExecContext().setLastInputPath(null);
      this.getExecContext().setCurrentInputPath(null);
    }
  }

  @VisibleForTesting
  public void setTestMapJoinTableContainer(int posSmallTable,
      MapJoinTableContainer testMapJoinTableContainer,
      MapJoinTableContainerSerDe mapJoinTableContainerSerDe) {
    mapJoinTables[posSmallTable] = testMapJoinTableContainer;
    mapJoinTableSerdes[posSmallTable] = mapJoinTableContainerSerDe;
  }

  @Override
  protected List getValueObjectInspectors(
      byte alias, List[] aliasToObjectInspectors) {
    int[] valueIndex = conf.getValueIndex(alias);
    if (valueIndex == null) {
      return super.getValueObjectInspectors(alias, aliasToObjectInspectors);
    }

    List inspectors = aliasToObjectInspectors[alias];
    int bigPos = conf.getPosBigTable();
    Converter[] converters = new Converter[valueIndex.length];
    List valueOI = new ArrayList();
    for (int i = 0; i < valueIndex.length; i++) {
      if (valueIndex[i] >= 0 && !joinKeysObjectInspectors[bigPos].isEmpty()) {
        if (conf.getNoOuterJoin()) {
          valueOI.add(joinKeysObjectInspectors[bigPos].get(valueIndex[i]));
        } else {
          // It is an outer join. We are going to add the inspector from the
          // inner side, but the key value will come from the outer side, so
          // we need to create a converter from inputOI to outputOI.
          valueOI.add(inspectors.get(i));
          converters[i] = ObjectInspectorConverters.getConverter(
                  joinKeysObjectInspectors[bigPos].get(valueIndex[i]), inspectors.get(i));
        }
      } else {
        valueOI.add(inspectors.get(i));
      }
    }

    unwrapContainer[alias] = new UnwrapRowContainer(alias, valueIndex, converters, hasFilter(alias));

    return valueOI;
  }

  public void generateMapMetaData() throws HiveException {
    // generate the meta data for key
    // index for key is -1

    try {
      TableDesc keyTableDesc = conf.getKeyTblDesc();
      AbstractSerDe keySerializer = (AbstractSerDe) ReflectionUtil.newInstance(
          keyTableDesc.getDeserializerClass(), null);
      SerDeUtils.initializeSerDe(keySerializer, null, keyTableDesc.getProperties(), null);
      MapJoinObjectSerDeContext keyContext = new MapJoinObjectSerDeContext(keySerializer, false);
      for (int pos = 0; pos < order.length; pos++) {
        if (pos == posBigTable) {
          continue;
        }
        TableDesc valueTableDesc;
        if (conf.getNoOuterJoin()) {
          valueTableDesc = conf.getValueTblDescs().get(pos);
        } else {
          valueTableDesc = conf.getValueFilteredTblDescs().get(pos);
        }
        AbstractSerDe valueSerDe = (AbstractSerDe) ReflectionUtil.newInstance(
            valueTableDesc.getDeserializerClass(), null);
        SerDeUtils.initializeSerDe(valueSerDe, null, valueTableDesc.getProperties(), null);
        MapJoinObjectSerDeContext valueContext =
            new MapJoinObjectSerDeContext(valueSerDe, hasFilter(pos));
        mapJoinTableSerdes[pos] = new MapJoinTableContainerSerDe(keyContext, valueContext);
      }
    } catch (SerDeException e) {
      throw new HiveException(e);
    }
  }

  // Core logic to load hash table using HashTableLoader
  private Pair loadHashTableInternal(
          ExecMapperContext mapContext, MapredContext mrContext) throws HiveException {
    perfLogger.PerfLogBegin(CLASS_NAME, PerfLogger.LOAD_HASHTABLE);
    loader.init(mapContext, mrContext, hconf, this);
    try {
      loader.load(mapJoinTables, mapJoinTableSerdes);
    } catch (HiveException e) {
      if (LOG.isInfoEnabled()) {
        LOG.info("Exception loading hash tables. Clearing partially loaded hash table containers.");
      }

      // there could be some spilled partitions which needs to be cleaned up
      clearAllTableContainers();
      throw e;
    }

    hashTblInitedOnce = true;

    Pair pair =
            new ImmutablePair<> (mapJoinTables, mapJoinTableSerdes);

    perfLogger.PerfLogEnd(CLASS_NAME, PerfLogger.LOAD_HASHTABLE);

    if (canSkipJoinProcessing(mapContext)) {
      LOG.info("Skipping big table join processing for " + this.toString());
      this.setDone(true);
    }
    return pair;
  }

  // Load Hash table for Bucket MapJoin
  private Pair loadHashTableBMJ(
          ExecMapperContext mapContext, MapredContext mrContext) throws HiveException {
    // Bucket MapJoin in LLAP, make sure the caches are populated.
    // Get the subcache.

    LlapObjectSubCache> subCache =
            new LlapObjectSubCache<>(cache, cacheKey + "_BMJ", numBuckets);

    subCache.lock(bucketId);
    try {
      Pair pair =
              subCache.get(bucketId);
      if (pair != null) {
        // match found! use it
        // update the tables.
        mapJoinTables = pair.getLeft();
        mapJoinTableSerdes = pair.getRight();
        return pair;
      }
      pair = loadHashTableInternal(mapContext, mrContext);

      // update the subcache
      subCache.set(pair, bucketId);
      return pair;
    } finally {
      subCache.unlock(bucketId);
    }
  }

  protected Pair loadHashTable(
      ExecMapperContext mapContext, MapredContext mrContext) throws HiveException {
    if (canSkipReload(mapContext)) {
      // no need to reload
      return new ImmutablePair<>(mapJoinTables, mapJoinTableSerdes);
    }

    if (conf.isBucketMapJoin() && cache instanceof LlapObjectCache &&
            numBuckets > 0 && HiveConf.getBoolVar(hconf,
            ConfVars.HIVE_TEZ_BMJ_USE_SUBCACHE)) {
      // Bucket MapJoin in LLAP
      return loadHashTableBMJ(mapContext, mrContext);
    }

    return loadHashTableInternal(mapContext, mrContext);
  }

  // Load the hash table
  @Override
  public void cleanUpInputFileChangedOp() throws HiveException {
    loadHashTable(getExecContext(), MapredContext.get());
  }

  protected JoinUtil.JoinResult setMapJoinKey(
      ReusableGetAdaptor dest, Object row, byte alias) throws HiveException {
    return dest.setFromRow(row, joinKeys[alias], joinKeysObjectInspectors[alias]);
  }

  protected MapJoinKey getRefKey(byte alias) {
    // We assume that since we are joining on the same key, all tables would have either
    // optimized or non-optimized key; hence, we can pass any key in any table as reference.
    // We do it so that MJKB could determine whether it can use optimized keys.
    for (byte pos = 0; pos < order.length; pos++) {
      if (pos == alias) continue;
      MapJoinKey refKey = mapJoinTables[pos].getAnyKey();
      if (refKey != null) return refKey;
    }
    return null; // All join tables have 0 keys, doesn't matter what we generate.
  }

  @Override
  public void process(Object row, int tag) throws HiveException {
    try {
      alias = (byte) tag;
      if (hashMapRowGetters == null) {
        hashMapRowGetters = new ReusableGetAdaptor[mapJoinTables.length];
        MapJoinKey refKey = getRefKey(alias);
        for (byte pos = 0; pos < order.length; pos++) {
          if (pos != alias) {
            hashMapRowGetters[pos] = mapJoinTables[pos].createGetter(refKey);
          }
        }
      }

      // As we're calling processOp again to process the leftover "tuples", we know the "row" is
      // coming from the spilled matchfile. We need to recreate hashMapRowGetter against new hashtables
      if (hybridMapJoinLeftover) {
        MapJoinKey refKey = getRefKey(alias);
        for (byte pos = 0; pos < order.length; pos++) {
          if (pos != alias && spilledMapJoinTables[pos] != null) {
            hashMapRowGetters[pos] = spilledMapJoinTables[pos].createGetter(refKey);
          }
        }
      }

      // compute keys and values as StandardObjects
      ReusableGetAdaptor firstSetKey = null;
      int fieldCount = joinKeys[alias].size();
      boolean joinNeeded = false;
      boolean bigTableRowSpilled = false;
      for (byte pos = 0; pos < order.length; pos++) {
        if (pos != alias) {
          JoinUtil.JoinResult joinResult;
          ReusableGetAdaptor adaptor;
          if (firstSetKey == null) {
            adaptor = firstSetKey = hashMapRowGetters[pos];
            joinResult = setMapJoinKey(firstSetKey, row, alias);
          } else {
            // Keys for all tables are the same, so only the first has to deserialize them.
            adaptor = hashMapRowGetters[pos];
            joinResult = adaptor.setFromOther(firstSetKey);
          }
          MapJoinRowContainer rowContainer = adaptor.getCurrentRows();
          if (joinResult != JoinUtil.JoinResult.MATCH) {
            assert (rowContainer == null || !rowContainer.hasRows()) :
                "Expecting an empty result set for no match";
          }
          if (rowContainer != null && unwrapContainer[pos] != null) {
            Object[] currentKey = firstSetKey.getCurrentKey();
            rowContainer = unwrapContainer[pos].setInternal(rowContainer, currentKey);
          }
          // there is no join-value or join-key has all null elements
          if (rowContainer == null || firstSetKey.hasAnyNulls(fieldCount, nullsafes)) {
            if (!noOuterJoin) {
              // For Hybrid Grace Hash Join, during the 1st round processing,
              // we only keep the LEFT side if the row is not spilled
              if (!conf.isHybridHashJoin() || hybridMapJoinLeftover ||
                  (joinResult != JoinUtil.JoinResult.SPILL && !bigTableRowSpilled)) {
                joinNeeded = true;
                storage[pos] = dummyObjVectors[pos];
              } else {
                joinNeeded = false;
              }
            } else {
              storage[pos] = emptyList;
            }
          } else {
            joinNeeded = true;
            storage[pos] = rowContainer.copy();
            aliasFilterTags[pos] = rowContainer.getAliasFilter();
          }
          // Spill the big table rows into appropriate partition:
          // When the JoinResult is SPILL, it means the corresponding small table row may have been
          // spilled to disk (at least the partition that holds this row is on disk). So we need to
          // postpone the join processing for this pair by also spilling this big table row.
          if (joinResult == JoinUtil.JoinResult.SPILL &&
              !bigTableRowSpilled) {  // For n-way join, only spill big table rows once
            spillBigTableRow(mapJoinTables[pos], row);
            bigTableRowSpilled = true;
          }
        }
      }
      if (joinNeeded) {
        List