All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.ql.exec.persistence.MapJoinKey Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.ql.exec.persistence;

import java.io.IOException;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapper;
import org.apache.hadoop.hive.ql.exec.vector.VectorHashKeyWrapperBatch;
import org.apache.hadoop.hive.ql.exec.vector.expressions.VectorExpressionWriter;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Writable;

/**
 * The base class for MapJoinKey.
 * Ideally, this should now be removed, some table wrappers have no key object.
 */
public abstract class MapJoinKey {
  private static final byte[] EMPTY_BYTE_ARRAY = new byte[0];

  public abstract void write(MapJoinObjectSerDeContext context, ObjectOutputStream out)
      throws IOException, SerDeException;

  public abstract boolean hasAnyNulls(int fieldCount, boolean[] nullsafes);

  @SuppressWarnings("deprecation")
  public static MapJoinKey read(Output output, MapJoinObjectSerDeContext context,
      Writable writable) throws SerDeException, HiveException {
    SerDe serde = context.getSerDe();
    Object obj = serde.deserialize(writable);
    MapJoinKeyObject result = new MapJoinKeyObject();
    result.read(serde.getObjectInspector(), obj);
    return result;
  }

  private static final HashSet SUPPORTED_PRIMITIVES
      = new HashSet();
  static {
    // All but decimal.
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.BOOLEAN);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.VOID);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.BOOLEAN);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.BYTE);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.SHORT);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.INT);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.LONG);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.FLOAT);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.DOUBLE);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.STRING);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.DATE);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.TIMESTAMP);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.INTERVAL_YEAR_MONTH);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.INTERVAL_DAY_TIME);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.BINARY);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.VARCHAR);
    SUPPORTED_PRIMITIVES.add(PrimitiveCategory.CHAR);
  }

  public static boolean isSupportedField(ObjectInspector foi) {
    if (foi.getCategory() != Category.PRIMITIVE) return false; // not supported
    PrimitiveCategory pc = ((PrimitiveObjectInspector)foi).getPrimitiveCategory();
    if (!SUPPORTED_PRIMITIVES.contains(pc)) return false; // not supported
    return true;
  }

  public static boolean isSupportedField(String typeName) {
    TypeInfo typeInfo = TypeInfoUtils.getTypeInfoFromTypeString(typeName);

    if (typeInfo.getCategory() != Category.PRIMITIVE) return false; // not supported
    PrimitiveTypeInfo primitiveTypeInfo = (PrimitiveTypeInfo) typeInfo;
    PrimitiveCategory pc = primitiveTypeInfo.getPrimitiveCategory();
    if (!SUPPORTED_PRIMITIVES.contains(pc)) return false; // not supported
    return true;
  }


  public static MapJoinKey readFromVector(Output output, MapJoinKey key, Object[] keyObject,
      List keyOIs, boolean mayReuseKey) throws HiveException {
    MapJoinKeyObject result = mayReuseKey ? (MapJoinKeyObject)key : new MapJoinKeyObject();
    result.setKeyObjects(keyObject);
    return result;
  }

  /**
   * Serializes row to output for vectorized path.
   * @param byteStream Output to reuse. Can be null, in that case a new one would be created.
   */
  public static Output serializeVector(Output byteStream, VectorHashKeyWrapper kw,
      VectorExpressionWriter[] keyOutputWriters, VectorHashKeyWrapperBatch keyWrapperBatch,
      boolean[] nulls, boolean[] sortableSortOrders) throws HiveException, SerDeException {
    Object[] fieldData = new Object[keyOutputWriters.length];
    List fieldOis = new ArrayList();
    for (int i = 0; i < keyOutputWriters.length; ++i) {
      VectorExpressionWriter writer = keyOutputWriters[i];
      fieldOis.add(writer.getObjectInspector());
      // This is rather convoluted... to simplify for perf, we could call getRawKeyValue
      // instead of writable, and serialize based on Java type as opposed to OI.
      fieldData[i] = keyWrapperBatch.getWritableKeyValue(kw, i, writer);
      if (nulls != null) {
        nulls[i] = (fieldData[i] == null);
      }
    }
    return serializeRow(byteStream, fieldData, fieldOis, sortableSortOrders);
  }

  public static MapJoinKey readFromRow(Output output, MapJoinKey key, Object[] keyObject,
      List keyFieldsOI, boolean mayReuseKey) throws HiveException {
    MapJoinKeyObject result = mayReuseKey ? (MapJoinKeyObject)key : new MapJoinKeyObject();
    result.readFromRow(keyObject, keyFieldsOI);
    return result;
  }

  /**
   * Serializes row to output.
   * @param byteStream Output to reuse. Can be null, in that case a new one would be created.
   */
  public static Output serializeRow(Output byteStream, Object[] fieldData,
      List fieldOis, boolean[] sortableSortOrders) throws HiveException {
    if (byteStream == null) {
      byteStream = new Output();
    } else {
      byteStream.reset();
    }
    try {
      if (fieldData.length == 0) {
        byteStream.reset();
      } else if (sortableSortOrders == null) {
        LazyBinarySerDe.serializeStruct(byteStream, fieldData, fieldOis);
      } else {
        BinarySortableSerDe.serializeStruct(byteStream, fieldData, fieldOis, sortableSortOrders);
      }
    } catch (SerDeException e) {
      throw new HiveException("Serialization error", e);
    }
    return byteStream;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy