All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.serde2.lazybinary.fast.LazyBinarySerializeWrite Maven / Gradle / Ivy

There is a newer version: 3.0.0-10
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hive.serde2.lazybinary.fast;

import java.io.IOException;
import java.sql.Date;
import java.sql.Timestamp;

import com.facebook.presto.hive.$internal.org.apache.commons.logging.Log;
import com.facebook.presto.hive.$internal.org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.type.HiveChar;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.common.type.HiveIntervalDayTime;
import org.apache.hadoop.hive.common.type.HiveIntervalYearMonth;
import org.apache.hadoop.hive.common.type.HiveVarchar;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.io.DateWritable;
import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
import org.apache.hadoop.hive.serde2.io.HiveIntervalDayTimeWritable;
import org.apache.hadoop.hive.serde2.io.HiveIntervalYearMonthWritable;
import org.apache.hadoop.hive.serde2.io.TimestampWritable;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils;
import org.apache.hadoop.hive.serde2.fast.SerializeWrite;
import org.apache.hive.common.util.DateUtils;

/*
 * Directly serialize, field-by-field, the LazyBinary format.
*
 * This is an alternative way to serialize than what is provided by LazyBinarySerDe.
  */
public class LazyBinarySerializeWrite implements SerializeWrite {
  public static final Log LOG = LogFactory.getLog(LazyBinarySerializeWrite.class.getName());

  private Output output;

  private int fieldCount;
  private int fieldIndex;
  private byte nullByte;
  private long nullOffset;

  // For thread safety, we allocate private writable objects for our use only.
  private HiveDecimalWritable hiveDecimalWritable;
  private TimestampWritable timestampWritable;
  private HiveIntervalYearMonthWritable hiveIntervalYearMonthWritable;
  private HiveIntervalDayTimeWritable hiveIntervalDayTimeWritable;
  private HiveIntervalDayTime hiveIntervalDayTime;

  public LazyBinarySerializeWrite(int fieldCount) {
    this();
    this.fieldCount = fieldCount;
  }

  // Not public since we must have the field count and other information.
  private LazyBinarySerializeWrite() {
  }

  /*
   * Set the buffer that will receive the serialized data.  The output buffer will be reset.
   */
  @Override
  public void set(Output output) {
    this.output = output;
    output.reset();
    fieldIndex = 0;
    nullByte = 0;
    nullOffset = 0;
  }

  /*
   * Set the buffer that will receive the serialized data.  The output buffer will NOT be reset.
   */
  @Override
  public void setAppend(Output output) {
    this.output = output;
    fieldIndex = 0;
    nullByte = 0;
    nullOffset = output.getLength();
  }

  /*
   * Reset the previously supplied buffer that will receive the serialized data.
   */
  @Override
  public void reset() {
    output.reset();
    fieldIndex = 0;
    nullByte = 0;
    nullOffset = 0;
  }

  /*
   * General Pattern:
   *
   *  // Every 8 fields we write a NULL byte.
   *  IF ((fieldIndex % 8) == 0), then
   *    IF (fieldIndex > 0), then
   *       Write back previous NullByte
   *       NullByte = 0
   *       Remember write position
   *    Allocate room for next NULL byte.
   *
   *  WHEN NOT NULL: Set bit in NULL byte; Write value.
   *  OTHERWISE NULL: We do not set a bit in the nullByte when we are writing a null.
   *
   *  Increment fieldIndex
   *
   *  IF (fieldIndex == fieldCount), then
   *     Write back final NullByte
   *
   */

  /*
   * Write a NULL field.
   */
  @Override
  public void writeNull() throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // We DO NOT set a bit in the NULL byte when we are writing a NULL.

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  /*
   * BOOLEAN.
   */
  @Override
  public void writeBoolean(boolean v) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    output.write((byte) (v ? 1 : 0));

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  /*
   * BYTE.
   */
  @Override
  public void writeByte(byte v) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    output.write(v);

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  /*
   * SHORT.
   */
  @Override
  public void writeShort(short v) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    output.write((byte) (v >> 8));
    output.write((byte) (v));

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  /*
   * INT.
   */
  @Override
  public void writeInt(int v) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    LazyBinaryUtils.writeVInt(output, v);

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  /*
   * LONG.
   */
  @Override
  public void writeLong(long v) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    LazyBinaryUtils.writeVLong(output, v);

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  /*
   * FLOAT.
   */
  @Override
  public void writeFloat(float vf) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    int v = Float.floatToIntBits(vf);
    output.write((byte) (v >> 24));
    output.write((byte) (v >> 16));
    output.write((byte) (v >> 8));
    output.write((byte) (v));

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  /*
   * DOUBLE.
   */
  @Override
  public void writeDouble(double v) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    LazyBinaryUtils.writeDouble(output, v);

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  /*
   * STRING.
   * 
   * Can be used to write CHAR and VARCHAR when the caller takes responsibility for
   * truncation/padding issues.
   */
  @Override
  public void writeString(byte[] v) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    int length = v.length;
    LazyBinaryUtils.writeVInt(output, length);

    output.write(v, 0, length);

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  @Override
  public void writeString(byte[] v, int start, int length) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    LazyBinaryUtils.writeVInt(output, length);

    output.write(v, start, length);

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  /*
   * CHAR.
   */
  @Override
  public void writeHiveChar(HiveChar hiveChar) throws IOException {
    String string = hiveChar.getStrippedValue();
    byte[] bytes = string.getBytes();
    writeString(bytes);
  }

  /*
   * VARCHAR.
   */
  @Override
  public void writeHiveVarchar(HiveVarchar hiveVarchar) throws IOException {
    String string = hiveVarchar.getValue();
    byte[] bytes = string.getBytes();
    writeString(bytes);
  }

  /*
   * BINARY.
   */
  @Override
  public void writeBinary(byte[] v) throws IOException {
    writeString(v);
  }

  @Override
  public void writeBinary(byte[] v, int start, int length) throws IOException {
    writeString(v, start, length);
  }

  /*
   * DATE.
   */
  @Override
  public void writeDate(Date date) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    LazyBinaryUtils.writeVInt(output, DateWritable.dateToDays(date));

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  // We provide a faster way to write a date without a Date object.
  @Override
  public void writeDate(int dateAsDays) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    LazyBinaryUtils.writeVInt(output, dateAsDays);

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  /*
   * TIMESTAMP.
   */
  @Override
  public void writeTimestamp(Timestamp v) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    if (timestampWritable == null) {
      timestampWritable = new TimestampWritable();
    }
    timestampWritable.set(v);
    timestampWritable.writeToByteStream(output);

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  /*
   * INTERVAL_YEAR_MONTH.
   */
  @Override
  public void writeHiveIntervalYearMonth(HiveIntervalYearMonth viyt) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    if (hiveIntervalYearMonthWritable == null) {
      hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable();
    }
    hiveIntervalYearMonthWritable.set(viyt);
    hiveIntervalYearMonthWritable.writeToByteStream(output);

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  @Override
  public void writeHiveIntervalYearMonth(int totalMonths) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    if (hiveIntervalYearMonthWritable == null) {
      hiveIntervalYearMonthWritable = new HiveIntervalYearMonthWritable();
    }
    hiveIntervalYearMonthWritable.set(totalMonths);
    hiveIntervalYearMonthWritable.writeToByteStream(output);

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  /*
   * INTERVAL_DAY_TIME.
   */
  @Override
  public void writeHiveIntervalDayTime(HiveIntervalDayTime vidt) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    if (hiveIntervalDayTimeWritable == null) {
      hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable();
    }
    hiveIntervalDayTimeWritable.set(vidt);
    hiveIntervalDayTimeWritable.writeToByteStream(output);

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  @Override
  public void writeHiveIntervalDayTime(long totalNanos) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    if (hiveIntervalDayTime == null) {
      hiveIntervalDayTime = new HiveIntervalDayTime();
    }
    if (hiveIntervalDayTimeWritable == null) {
      hiveIntervalDayTimeWritable = new HiveIntervalDayTimeWritable();
    }
    DateUtils.setIntervalDayTimeTotalNanos(hiveIntervalDayTime, totalNanos);
    hiveIntervalDayTimeWritable.set(hiveIntervalDayTime);
    hiveIntervalDayTimeWritable.writeToByteStream(output);

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }

  /*
   * DECIMAL.
   */
  @Override
  public void writeHiveDecimal(HiveDecimal v) throws IOException {

    // Every 8 fields we write a NULL byte.
    if ((fieldIndex % 8) == 0) {
      if (fieldIndex > 0) {
        // Write back previous 8 field's NULL byte.
        output.writeByte(nullOffset, nullByte);
        nullByte = 0;
        nullOffset = output.getLength();
      }
      // Allocate next NULL byte.
      output.reserve(1);
    }

    // Set bit in NULL byte when a field is NOT NULL.
    nullByte |= 1 << (fieldIndex % 8);

    if (hiveDecimalWritable == null) {
      hiveDecimalWritable = new HiveDecimalWritable();
    }
    hiveDecimalWritable.set(v);
    hiveDecimalWritable.writeToByteStream(output);

    fieldIndex++;

    if (fieldIndex == fieldCount) {
      // Write back the final NULL byte before the last fields.
      output.writeByte(nullOffset, nullByte);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy