org.apache.hadoop.hive.ql.exec.JoinUtil Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of hive-exec
There is a newer version: 4.0.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.exec;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Properties;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.metastore.api.hive_metastoreConstants;
import org.apache.hadoop.hive.ql.exec.persistence.RowContainer;
import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.JoinDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.AbstractSerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.io.ShortWritable;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory;
import org.apache.hadoop.mapred.Reporter;
import org.apache.hadoop.mapred.SequenceFileInputFormat;
import org.apache.hive.common.util.ReflectionUtil;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class JoinUtil {

  private static final Logger LOG = LoggerFactory.getLogger(JoinUtil.class);

  /**
   * Represents the join result between two tables
   */
  public static enum JoinResult {
    MATCH,    // A match is found
    NOMATCH,  // No match is found, and the current row will be dropped
    SPILL     // The current row has been spilled to disk, as the join is postponed
  }

  public static List[] getObjectInspectorsFromEvaluators(
      List[] exprEntries,
      ObjectInspector[] inputObjInspector,
      int posBigTableAlias, int tagLen) throws HiveException {
    List[] result = new List[tagLen];

    int iterate = Math.min(exprEntries.length, inputObjInspector.length);
    for (byte alias = 0; alias < iterate; alias++) {
      ObjectInspector inputOI = inputObjInspector[alias];

      // For vectorized reduce-side operators getting inputs from a reduce sink,
      // the row object inspector will get a flattened version of the object inspector
      // where the nested key/value structs are replaced with a single struct:
      // Example: { key: { reducesinkkey0:int }, value: { _col0:int, _col1:int, .. } }
      // Would get converted to the following for a vectorized input:
      //   { 'key.reducesinkkey0':int, 'value._col0':int, 'value._col1':int, .. }
      // The ExprNodeEvaluator initialzation below gets broken with the flattened
      // object inpsectors, so convert it back to the a form that contains the
      // nested key/value structs.
      inputOI = unflattenObjInspector(inputOI);

      if (alias == (byte) posBigTableAlias ||
          exprEntries[alias] == null || inputOI == null) {
        // skip the driver and directly loadable tables
        continue;
      }

      List exprList = exprEntries[alias];
      List fieldOIList = new ArrayList();
      for (int i = 0; i < exprList.size(); i++) {
        fieldOIList.add(exprList.get(i).initialize(inputOI));
      }
      result[alias] = fieldOIList;
    }
    return result;
  }


  public static List[] getStandardObjectInspectors(
      List[] aliasToObjectInspectors,
      int posBigTableAlias, int tagLen) {
    List[] result = new List[tagLen];
    for (byte alias = 0; alias < aliasToObjectInspectors.length; alias++) {
      //get big table
      if(alias == (byte) posBigTableAlias || aliasToObjectInspectors[alias] == null){
        //skip the big tables
          continue;
      }

      List oiList = aliasToObjectInspectors[alias];
      ArrayList fieldOIList = new ArrayList(
          oiList.size());
      for (int i = 0; i < oiList.size(); i++) {
        fieldOIList.add(ObjectInspectorUtils.getStandardObjectInspector(oiList
            .get(i), ObjectInspectorCopyOption.WRITABLE));
      }
      result[alias] = fieldOIList;
    }
    return result;

  }

  public static int populateJoinKeyValue(List[] outMap,
      Map> inputMap, int posBigTableAlias, Configuration conf) throws HiveException {
    return populateJoinKeyValue(outMap, inputMap, null, posBigTableAlias, conf);
  }

  public static int populateJoinKeyValue(List[] outMap,
      Map> inputMap,
      Byte[] order,
      int posBigTableAlias, Configuration conf) throws HiveException {
    int total = 0;
    for (Entry> e : inputMap.entrySet()) {
      if (e.getValue() == null) {
        continue;
      }
      Byte key = order == null ? e.getKey() : order[e.getKey()];
      List valueFields = new ArrayList();
      for (ExprNodeDesc expr : e.getValue()) {
        if (key == (byte) posBigTableAlias) {
          valueFields.add(null);
        } else {
          valueFields.add(expr == null ? null : ExprNodeEvaluatorFactory.get(expr, conf));
        }
      }
      outMap[key] = valueFields;
      total += valueFields.size();
    }

    return total;
  }


  /**
   * Return the key as a standard object. StandardObject can be inspected by a
   * standard ObjectInspector.
   */
  public static ArrayList