All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hive.serde2.io.TimestampWritable Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.serde2.io;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.io.OutputStream;
import java.math.BigDecimal;
import java.sql.Timestamp;
import java.text.DateFormat;
import java.text.SimpleDateFormat;
import java.util.Arrays;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.common.type.HiveDecimal;
import org.apache.hadoop.hive.serde2.ByteStream.Output;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils;
import org.apache.hadoop.hive.serde2.lazybinary.LazyBinaryUtils.VInt;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hadoop.io.WritableUtils;

/**
 * TimestampWritable
 * Writable equivalent of java.sq.Timestamp
 *
 * Timestamps are of the format
 *    YYYY-MM-DD HH:MM:SS.[fff...]
 *
 * We encode Unix timestamp in seconds in 4 bytes, using the MSB to signify
 * whether the timestamp has a fractional portion.
 *
 * The fractional portion is reversed, and encoded as a VInt
 * so timestamps with less precision use fewer bytes.
 *
 *      0.1    -> 1
 *      0.01   -> 10
 *      0.001  -> 100
 *
 */
public class TimestampWritable implements WritableComparable {
  static final private Log LOG = LogFactory.getLog(TimestampWritable.class);

  static final public byte[] nullBytes = {0x0, 0x0, 0x0, 0x0};

  private static final int NO_DECIMAL_MASK = 0x7FFFFFFF;
  private static final int HAS_DECIMAL_MASK = 0x80000000;

  private static final ThreadLocal threadLocalDateFormat =
      new ThreadLocal() {
        @Override
        protected synchronized DateFormat initialValue() {
          return new SimpleDateFormat("yyyy-MM-dd HH:mm:ss");
        }
      };

  private Timestamp timestamp = new Timestamp(0);

  /**
   * true if data is stored in timestamp field rather than byte arrays.
   *      allows for lazy conversion to bytes when necessary
   * false otherwise
   */
  private boolean bytesEmpty;
  private boolean timestampEmpty;

  /* Allow use of external byte[] for efficiency */
  private byte[] currentBytes;
  private final byte[] internalBytes = new byte[9];
  private byte[] externalBytes;
  private int offset;

  /* Reused to read VInts */
  static private final VInt vInt = new VInt();

  /* Constructors */
  public TimestampWritable() {
    Arrays.fill(internalBytes, (byte) 0x0);
    bytesEmpty = false;
    currentBytes = internalBytes;
    offset = 0;

    clearTimestamp();
  }

  public TimestampWritable(byte[] bytes, int offset) {
    set(bytes, offset);
  }

  public TimestampWritable(TimestampWritable t) {
    this(t.getBytes(), 0);
  }

  public TimestampWritable(Timestamp t) {
    set(t);
  }

  public void set(byte[] bytes, int offset) {
    externalBytes = bytes;
    this.offset = offset;
    bytesEmpty = false;
    currentBytes = externalBytes;

    clearTimestamp();
  }

  public void set(Timestamp t) {
    if (t == null) {
      timestamp.setTime(0);
      timestamp.setNanos(0);
      return;
    }
    this.timestamp = t;
    bytesEmpty = true;
    timestampEmpty = false;
  }

  public void set(TimestampWritable t) {
    if (t.bytesEmpty) {
      set(t.getTimestamp());
      return;
    }
    if (t.currentBytes == t.externalBytes) {
      set(t.currentBytes, t.offset);
    } else {
      set(t.currentBytes, 0);
    }
  }

  private void clearTimestamp() {
    timestampEmpty = true;
  }

  public void writeToByteStream(Output byteStream) {
    checkBytes();
    byteStream.write(currentBytes, offset, getTotalLength());
  }

  /**
   *
   * @return seconds corresponding to this TimestampWritable
   */
  public int getSeconds() {
    if (bytesEmpty) {
      return (int) (timestamp.getTime() / 1000);
    }
    return TimestampWritable.getSeconds(currentBytes, offset);
  }

  /**
   *
   * @return nanoseconds in this TimestampWritable
   */
  public int getNanos() {
    if (!timestampEmpty) {
      return timestamp.getNanos();
    }

    return hasDecimal() ? TimestampWritable.getNanos(currentBytes, offset+4) : 0;
  }

  /**
   *
   * @return length of serialized TimestampWritable data
   */
  private int getTotalLength() {
    return 4 + getDecimalLength();
  }

  /**
   *
   * @return number of bytes the variable length decimal takes up
   */
  private int getDecimalLength() {
    checkBytes();
    return hasDecimal() ? WritableUtils.decodeVIntSize(currentBytes[offset+4]) : 0;
  }

  public Timestamp getTimestamp() {
    if (timestampEmpty) {
      populateTimestamp();
    }
    return timestamp;
  }

  /**
   * Used to create copies of objects
   * @return a copy of the internal TimestampWritable byte[]
   */
  public byte[] getBytes() {
    checkBytes();

    int len = getTotalLength();
    byte[] b = new byte[len];

    System.arraycopy(currentBytes, offset, b, 0, len);
    return b;
  }

  /**
   * @return byte[] representation of TimestampWritable that is binary
   * sortable (4 byte seconds, 4 bytes for nanoseconds)
   */
  public byte[] getBinarySortable() {
    byte[] b = new byte[8];
    int nanos = getNanos();
    int seconds = HAS_DECIMAL_MASK | getSeconds();
    intToBytes(seconds, b, 0);
    intToBytes(nanos, b, 4);
    return b;
  }

  /**
   * Given a byte[] that has binary sortable data, initialize the internal
   * structures to hold that data
   * @param bytes
   * @param offset
   */
  public void setBinarySortable(byte[] bytes, int offset) {
    int seconds = bytesToInt(bytes, offset);
    int nanos = bytesToInt(bytes, offset+4);
    if (nanos == 0) {
      seconds &= NO_DECIMAL_MASK;
    } else {
      seconds |= HAS_DECIMAL_MASK;
    }
    intToBytes(seconds, internalBytes, 0);
    setNanosBytes(nanos, internalBytes, 4);
    currentBytes = internalBytes;
    this.offset = 0;
  }

  /**
   * The data of TimestampWritable can be stored either in a byte[]
   * or in a Timestamp object. Calling this method ensures that the byte[]
   * is populated from the Timestamp object if previously empty.
   */
  private void checkBytes() {
    if (bytesEmpty) {
      // Populate byte[] from Timestamp
      convertTimestampToBytes(timestamp, internalBytes, 0);
      offset = 0;
      currentBytes = internalBytes;
      bytesEmpty = false;
    }
  }

  /**
   *
   * @return double representation of the timestamp, accurate to nanoseconds
   */
  public double getDouble() {
    double seconds, nanos;
    if (bytesEmpty) {
      seconds = timestamp.getTime() / 1000;
      nanos = timestamp.getNanos();
    } else {
      seconds = getSeconds();
      nanos = getNanos();
    }
    return seconds + ((double) nanos) / 1000000000;
  }



  public void readFields(DataInput in) throws IOException {
    in.readFully(internalBytes, 0, 4);
    if (TimestampWritable.hasDecimal(internalBytes[0])) {
      in.readFully(internalBytes, 4, 1);
      int len = (byte) WritableUtils.decodeVIntSize(internalBytes[4]);
      in.readFully(internalBytes, 5, len-1);
    }
    currentBytes = internalBytes;
    this.offset = 0;
  }

  public void write(OutputStream out) throws IOException {
    checkBytes();
    out.write(currentBytes, offset, getTotalLength());
  }

  public void write(DataOutput out) throws IOException {
    write((OutputStream) out);
  }

  public int compareTo(TimestampWritable t) {
    checkBytes();
    int s1 = this.getSeconds();
    int s2 = t.getSeconds();
    if (s1 == s2) {
      int n1 = this.getNanos();
      int n2 = t.getNanos();
      if (n1 == n2) {
        return 0;
      }
      return n1 - n2;
    } else {
      return s1 - s2;
    }
  }

  @Override
  public boolean equals(Object o) {
    return compareTo((TimestampWritable) o) == 0;
  }

  @Override
  public String toString() {
    if (timestampEmpty) {
      populateTimestamp();
    }

    String timestampString = timestamp.toString();
    if (timestampString.length() > 19) {
      if (timestampString.length() == 21) {
        if (timestampString.substring(19).compareTo(".0") == 0) {
          return threadLocalDateFormat.get().format(timestamp);
        }
      }
      return threadLocalDateFormat.get().format(timestamp) + timestampString.substring(19);
    }

    return threadLocalDateFormat.get().format(timestamp);
  }

  @Override
  public int hashCode() {
    long seconds = getSeconds();
    seconds <<= 32;
    seconds |= getNanos();
    return (int) ((seconds >>> 32) ^ seconds);
  }

  private void populateTimestamp() {
    long seconds = getSeconds();
    int nanos = getNanos();
    timestamp.setTime(seconds * 1000);
    timestamp.setNanos(nanos);
  }

  /** Static methods **/

  /**
   * Gets seconds stored as integer at bytes[offset]
   * @param bytes
   * @param offset
   * @return the number of seconds
   */
  public static int getSeconds(byte[] bytes, int offset) {
    return NO_DECIMAL_MASK & bytesToInt(bytes, offset);
  }

  public static int getNanos(byte[] bytes, int offset) {
    LazyBinaryUtils.readVInt(bytes, offset, vInt);
    int val = vInt.value;
    int len = (int) Math.floor(Math.log10(val)) + 1;

    // Reverse the value
    int tmp = 0;
    while (val != 0) {
      tmp *= 10;
      tmp += val % 10;
      val /= 10;
    }
    val = tmp;

    if (len < 9) {
      val *= Math.pow(10, 9 - len);
    }
    return val;
  }

  /**
   * Writes a Timestamp's serialized value to byte array b at
   * @param t
   * @param b
   */
  public static void convertTimestampToBytes(Timestamp t, byte[] b,
      int offset) {
    if (b.length < 9) {
      LOG.error("byte array too short");
    }
    long millis = t.getTime();
    int nanos = t.getNanos();

    boolean hasDecimal = nanos != 0 && setNanosBytes(nanos, b, offset+4);
    setSecondsBytes(millis, b, offset, hasDecimal);
  }

  /**
   * Given an integer representing seconds, write its serialized
   * value to the byte array b at offset
   * @param millis
   * @param b
   * @param offset
   * @param hasDecimal
   */
  private static void setSecondsBytes(long millis, byte[] b, int offset, boolean hasDecimal) {
    int seconds = (int) (millis / 1000);

    if (!hasDecimal) {
      seconds &= NO_DECIMAL_MASK;
    } else {
      seconds |= HAS_DECIMAL_MASK;
    }

    intToBytes(seconds, b, offset);
  }

  /**
   * Given an integer representing nanoseconds, write its serialized
   * value to the byte array b at offset
   *
   * @param nanos
   * @param b
   * @param offset
   * @return
   */
  private static boolean setNanosBytes(int nanos, byte[] b, int offset) {
    int decimal = 0;
    if (nanos != 0) {
      int counter = 0;
      while (counter < 9) {
        decimal *= 10;
        decimal += nanos % 10;
        nanos /= 10;
        counter++;
      }
    }

    LazyBinaryUtils.writeVLongToByteArray(b, offset, decimal);
    return decimal != 0;
  }

  /**
   * Interprets a float as a unix timestamp and returns a Timestamp object
   * @param f
   * @return the equivalent Timestamp object
   */
  public static Timestamp floatToTimestamp(float f) {
    return doubleToTimestamp((double) f);
  }

  public static Timestamp decimalToTimestamp(HiveDecimal d) {
    BigDecimal seconds = new BigDecimal(d.longValue());
    long millis = d.bigDecimalValue().multiply(new BigDecimal(1000)).longValue();
    int nanos = d.bigDecimalValue().subtract(seconds).multiply(new BigDecimal(1000000000)).intValue();

    Timestamp t = new Timestamp(millis);
    t.setNanos(nanos);

    return t;
  }

  public static Timestamp doubleToTimestamp(double f) {
    long seconds = (long) f;

    // We must ensure the exactness of the double's fractional portion.
    // 0.6 as the fraction part will be converted to 0.59999... and
    // significantly reduce the savings from binary serializtion
    BigDecimal bd = new BigDecimal(String.valueOf(f));
    bd = bd.subtract(new BigDecimal(seconds)).multiply(new BigDecimal(1000000000));
    int nanos = bd.intValue();

    // Convert to millis
    long millis = seconds * 1000;
    Timestamp t = new Timestamp(millis);

    // Set remaining fractional portion to nanos
    t.setNanos(nanos);
    return t;
  }

  public static void setTimestamp(Timestamp t, byte[] bytes, int offset) {
    boolean hasDecimal = hasDecimal(bytes[offset]);
    t.setTime(((long) TimestampWritable.getSeconds(bytes, offset)) * 1000);
    if (hasDecimal) {
      t.setNanos(TimestampWritable.getNanos(bytes, offset+4));
    }
  }

  public static Timestamp createTimestamp(byte[] bytes, int offset) {
    Timestamp t = new Timestamp(0);
    TimestampWritable.setTimestamp(t, bytes, offset);
    return t;
  }

  public boolean hasDecimal() {
    return hasDecimal(currentBytes[offset]);
  }

  /**
   *
   * @param b first byte in an encoded TimestampWritable
   * @return true if it has a decimal portion, false otherwise
   */
  public static boolean hasDecimal(byte b) {
    return (b >> 7) != 0;
  }

  /**
   * Writes value into dest at offset
   * @param value
   * @param dest
   * @param offset
   */
  private static void intToBytes(int value, byte[] dest, int offset) {
    dest[offset] = (byte) ((value >> 24) & 0xFF);
    dest[offset+1] = (byte) ((value >> 16) & 0xFF);
    dest[offset+2] = (byte) ((value >> 8) & 0xFF);
    dest[offset+3] = (byte) (value & 0xFF);
  }

  /**
   *
   * @param bytes
   * @param offset
   * @return integer represented by the four bytes in bytes
   *  beginning at offset
   */
  private static int bytesToInt(byte[] bytes, int offset) {
    return ((0xFF & bytes[offset]) << 24)
        | ((0xFF & bytes[offset+1]) << 16)
        | ((0xFF & bytes[offset+2]) << 8)
        | (0xFF & bytes[offset+3]);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy