All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.hbase.HBaseRowSerializer Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.hbase;

import java.io.IOException;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hive.hbase.ColumnMappings.ColumnMapping;
import org.apache.hadoop.hive.serde2.ByteStream;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeUtils;
import org.apache.hadoop.hive.serde2.lazy.LazyUtils;
import org.apache.hadoop.hive.serde2.lazy.LazySerDeParameters;
import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.MapObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.io.Writable;

public class HBaseRowSerializer {

  private final HBaseKeyFactory keyFactory;
  private final HBaseSerDeParameters hbaseParam;
  private final LazySerDeParameters serdeParam;

  private final int keyIndex;
  private final int timestampIndex;
  private final ColumnMapping keyMapping;
  private final ColumnMapping timestampMapping;
  private final ColumnMapping[] columnMappings;
  private final byte[] separators;      // the separators array
  private final boolean escaped;        // whether we need to escape the data when writing out
  private final byte escapeChar;        // which char to use as the escape char, e.g. '\\'
  private final boolean[] needsEscape;  // which chars need to be escaped. 

  private final long putTimestamp;
  private final ByteStream.Output output = new ByteStream.Output();

  public HBaseRowSerializer(HBaseSerDeParameters hbaseParam) {
    this.hbaseParam = hbaseParam;
    this.keyFactory = hbaseParam.getKeyFactory();
    this.serdeParam = hbaseParam.getSerdeParams();
    this.separators = serdeParam.getSeparators();
    this.escaped = serdeParam.isEscaped();
    this.escapeChar = serdeParam.getEscapeChar();
    this.needsEscape = serdeParam.getNeedsEscape();
    this.keyIndex = hbaseParam.getKeyIndex();
    this.timestampIndex = hbaseParam.getTimestampIndex();
    this.columnMappings = hbaseParam.getColumnMappings().getColumnsMapping();
    this.keyMapping = hbaseParam.getColumnMappings().getKeyMapping();
    this.timestampMapping = hbaseParam.getColumnMappings().getTimestampMapping();
    this.putTimestamp = hbaseParam.getPutTimestamp();
  }

  public Writable serialize(Object obj, ObjectInspector objInspector) throws Exception {
    if (objInspector.getCategory() != ObjectInspector.Category.STRUCT) {
      throw new SerDeException(getClass().toString()
          + " can only serialize struct types, but we got: "
          + objInspector.getTypeName());
    }

    // Prepare the field ObjectInspectors
    StructObjectInspector soi = (StructObjectInspector) objInspector;
    List fields = soi.getAllStructFieldRefs();
    List values = soi.getStructFieldsDataAsList(obj);

    StructField field = fields.get(keyIndex);
    Object value = values.get(keyIndex);

    byte[] key = keyFactory.serializeKey(value, field);
    if (key == null) {
      throw new SerDeException("HBase row key cannot be NULL");
    }
    long timestamp = putTimestamp;
    if (timestamp < 0 && timestampIndex >= 0) {
      ObjectInspector inspector = fields.get(timestampIndex).getFieldObjectInspector();
      value = values.get(timestampIndex);
      if (inspector instanceof LongObjectInspector) {
        timestamp = ((LongObjectInspector)inspector).get(value);
      } else {
        PrimitiveObjectInspector primitive = (PrimitiveObjectInspector) inspector;
        timestamp = PrimitiveObjectInspectorUtils.getTimestamp(value, primitive).getTime();
      }
    }

    Put put = timestamp >= 0 ? new Put(key, timestamp) : new Put(key);

    // Serialize each field
    for (int i = 0; i < fields.size(); i++) {
      if (i == keyIndex || i == timestampIndex) {
        continue;
      }
      field = fields.get(i);
      value = values.get(i);
      serializeField(value, field, columnMappings[i], put);
    }

    return new PutWritable(put);
  }

  byte[] serializeKeyField(Object keyValue, StructField keyField, ColumnMapping keyMapping)
      throws IOException {
    if (keyValue == null) {
      throw new IOException("HBase row key cannot be NULL");
    }
    ObjectInspector keyFieldOI = keyField.getFieldObjectInspector();

    if (!keyFieldOI.getCategory().equals(ObjectInspector.Category.PRIMITIVE) &&
        keyMapping.isCategory(ObjectInspector.Category.PRIMITIVE)) {
      // we always serialize the String type using the escaped algorithm for LazyString
      return serialize(SerDeUtils.getJSONString(keyValue, keyFieldOI),
          PrimitiveObjectInspectorFactory.javaStringObjectInspector, 1, false);
    }
    // use the serialization option switch to write primitive values as either a variable
    // length UTF8 string or a fixed width bytes if serializing in binary format
    boolean writeBinary = keyMapping.binaryStorage.get(0);
    return serialize(keyValue, keyFieldOI, 1, writeBinary);
  }

  private void serializeField(
      Object value, StructField field, ColumnMapping colMap, Put put) throws IOException {
    if (value == null) {
      // a null object, we do not serialize it
      return;
    }
    // Get the field objectInspector and the field object.
    ObjectInspector foi = field.getFieldObjectInspector();

    // If the field corresponds to a column family in HBase
    if (colMap.qualifierName == null) {
      MapObjectInspector moi = (MapObjectInspector) foi;
      Map map = moi.getMap(value);
      if (map == null) {
        return;
      }
      ObjectInspector koi = moi.getMapKeyObjectInspector();
      ObjectInspector voi = moi.getMapValueObjectInspector();

      for (Map.Entry entry: map.entrySet()) {
        // Get the Key
        // Map keys are required to be primitive and may be serialized in binary format
        byte[] columnQualifierBytes = serialize(entry.getKey(), koi, 3, colMap.binaryStorage.get(0));
        if (columnQualifierBytes == null) {
          continue;
        }

        // Map values may be serialized in binary format when they are primitive and binary
        // serialization is the option selected
        byte[] bytes = serialize(entry.getValue(), voi, 3, colMap.binaryStorage.get(1));
        if (bytes == null) {
          continue;
        }

        put.add(colMap.familyNameBytes, columnQualifierBytes, bytes);
      }
    } else {
      byte[] bytes;
      // If the field that is passed in is NOT a primitive, and either the
      // field is not declared (no schema was given at initialization), or
      // the field is declared as a primitive in initialization, serialize
      // the data to JSON string.  Otherwise serialize the data in the
      // delimited way.
      if (!foi.getCategory().equals(ObjectInspector.Category.PRIMITIVE)
          && colMap.isCategory(ObjectInspector.Category.PRIMITIVE)) {
        // we always serialize the String type using the escaped algorithm for LazyString
        bytes = serialize(SerDeUtils.getJSONString(value, foi),
            PrimitiveObjectInspectorFactory.javaStringObjectInspector, 1, false);
      } else {
        // use the serialization option switch to write primitive values as either a variable
        // length UTF8 string or a fixed width bytes if serializing in binary format
        bytes = serialize(value, foi, 1, colMap.binaryStorage.get(0));
      }

      if (bytes == null) {
        return;
      }

      put.add(colMap.familyNameBytes, colMap.qualifierNameBytes, bytes);
    }
  }

  /*
   * Serialize the row into a ByteStream.
   *
   * @param obj           The object for the current field.
   * @param objInspector  The ObjectInspector for the current Object.
   * @param level         The current level of separator.
   * @param writeBinary   Whether to write a primitive object as an UTF8 variable length string or
   *                      as a fixed width byte array onto the byte stream.
   * @throws IOException  On error in writing to the serialization stream.
   * @return true         On serializing a non-null object, otherwise false.
   */
  private byte[] serialize(Object obj, ObjectInspector objInspector, int level, boolean writeBinary)
      throws IOException {
    output.reset();
    if (objInspector.getCategory() == ObjectInspector.Category.PRIMITIVE && writeBinary) {
      LazyUtils.writePrimitive(output, obj, (PrimitiveObjectInspector) objInspector);
    } else {
      if (!serialize(obj, objInspector, level, output)) {
        return null;
      }
    }
    return output.toByteArray();
  }

  private boolean serialize(
      Object obj,
      ObjectInspector objInspector,
      int level, ByteStream.Output ss) throws IOException {

    switch (objInspector.getCategory()) {
      case PRIMITIVE:
        LazyUtils.writePrimitiveUTF8(ss, obj,
            (PrimitiveObjectInspector) objInspector, escaped, escapeChar, needsEscape);
        return true;
      case LIST:
        char separator = (char) separators[level];
        ListObjectInspector loi = (ListObjectInspector)objInspector;
        List list = loi.getList(obj);
        ObjectInspector eoi = loi.getListElementObjectInspector();
        if (list == null) {
          return false;
        } else {
          for (int i = 0; i < list.size(); i++) {
            if (i > 0) {
              ss.write(separator);
            }
            serialize(list.get(i), eoi, level + 1, ss);
          }
        }
        return true;
      case MAP:
        char sep = (char) separators[level];
        char keyValueSeparator = (char) separators[level+1];
        MapObjectInspector moi = (MapObjectInspector) objInspector;
        ObjectInspector koi = moi.getMapKeyObjectInspector();
        ObjectInspector voi = moi.getMapValueObjectInspector();

        Map map = moi.getMap(obj);
        if (map == null) {
          return false;
        } else {
          boolean first = true;
          for (Map.Entry entry: map.entrySet()) {
            if (first) {
              first = false;
            } else {
              ss.write(sep);
            }
            serialize(entry.getKey(), koi, level+2, ss);
            ss.write(keyValueSeparator);
            serialize(entry.getValue(), voi, level+2, ss);
          }
        }
        return true;
      case STRUCT:
        sep = (char)separators[level];
        StructObjectInspector soi = (StructObjectInspector)objInspector;
        List fields = soi.getAllStructFieldRefs();
        list = soi.getStructFieldsDataAsList(obj);
        if (list == null) {
          return false;
        } else {
          for (int i = 0; i < list.size(); i++) {
            if (i > 0) {
              ss.write(sep);
            }

            serialize(list.get(i), fields.get(i).getFieldObjectInspector(),
                level + 1, ss);
          }
        }
        return true;
       case UNION: {
        // union type currently not totally supported. See HIVE-2390
        return false;
       }
      default:
        throw new RuntimeException("Unknown category type: " + objInspector.getCategory());
    }
  }
}