org.apache.hadoop.hive.ql.io.BucketCodec Maven / Gradle / Ivy

Go to download
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hive.ql.io;

/**
 * This class makes sense of {@link RecordIdentifier#getBucketProperty()}.  Up until ASF Hive 3.0 this
 * field was simply the bucket ID.  Since 3.0 it does bit packing to store several things:
 * top 3 bits - version describing the format (we can only have 8).
 * The rest is version specific - see below.
 */
public enum BucketCodec {
  /**
   * This is the "legacy" version.  The whole {@code bucket} value just has the bucket ID in it.
   * The numeric code for this version is 0. (Assumes bucket ID takes less than 29 bits... which
   * implies top 3 bits are 000 so data written before Hive 3.0 is readable with this scheme).
   */
  V0(0) {
    @Override
    public int decodeWriterId(int bucketProperty) {
      return bucketProperty;
    }
    @Override
    public int decodeStatementId(int bucketProperty) {
      return 0;
    }
    @Override
    public int encode(AcidOutputFormat.Options options) {
      return options.getBucketId();
    }
  },
  /**
   * Represents format of "bucket" property in Hive 3.0.
   * top 3 bits - version code.
   * next 1 bit - reserved for future
   * next 12 bits - the bucket ID
   * next 4 bits reserved for future
   * remaining 12 bits - the statement ID - 0-based numbering of all statements within a
   * transaction.  Each leg of a multi-insert statement gets a separate statement ID.
   * The reserved bits align it so that it easier to interpret it in Hex.
   * 
   * Constructs like Merge and Multi-Insert may have multiple tasks writing data that belongs to
   * the same physical bucket file.  For example, a Merge stmt with update and insert clauses,
   * (and split update enabled - should be the default in 3.0).  A task on behalf of insert may
   * be writing a row into bucket 0 and another task in the update branch may be writing an insert
   * event into bucket 0.  Each of these task are writing to different delta directory - distinguished
   * by statement ID.  By including both bucket ID and statement ID in {@link RecordIdentifier}
   * we ensure that {@link RecordIdentifier} is unique.
   * 
   * The intent is that sorting rows by {@link RecordIdentifier} groups rows in the same physical
   * bucket next to each other.
   * For any row created by a given version of Hive, top 3 bits are constant.  The next
   * most significant bits are the bucket ID, then the statement ID.  This ensures that
   * {@link org.apache.hadoop.hive.ql.optimizer.SortedDynPartitionOptimizer} works which is
   * designed so that each task only needs to keep 1 writer opened at a time.  It could be
   * configured such that a single writer sees data for multiple buckets so it must "group" data
   * by bucket ID (and then sort within each bucket as required) which is achieved via sorting
   * by {@link RecordIdentifier} which includes the {@link RecordIdentifier#getBucketProperty()}
   * which has the actual bucket ID in the high order bits.  This scheme also ensures that 
   * {@link org.apache.hadoop.hive.ql.exec.FileSinkOperator#process(Object, int)} works in case
   * there numBuckets > numReducers.  (The later could be fixed by changing how writers are
   * initialized in "if (fpaths.acidLastBucket != bucketNum) {")
   */
  V1(1) {
    @Override
    public int decodeWriterId(int bucketProperty) {
      return (bucketProperty & 0b0000_1111_1111_1111_0000_0000_0000_0000) >>> 16;
    }
    @Override
    public int decodeStatementId(int bucketProperty) {
      return (bucketProperty & 0b0000_0000_0000_0000_0000_1111_1111_1111);
    }
    @Override
    public int encode(AcidOutputFormat.Options options) {
      int statementId = options.getStatementId() >= 0 ? options.getStatementId() : 0;

      assert this.version >=0 && this.version <= MAX_VERSION
        : "Version out of range: " + version;
      if(!(options.getBucketId() >= 0 && options.getBucketId() <= MAX_BUCKET_ID)) {
        throw new IllegalArgumentException("bucketId out of range: " + options.getBucketId());
      }
      if(!(statementId >= 0 && statementId <= MAX_STATEMENT_ID)) {
        throw new IllegalArgumentException("statementId out of range: " + statementId);
      }
      return this.version << (1 + NUM_BUCKET_ID_BITS + 4 + NUM_STATEMENT_ID_BITS) |
        options.getBucketId() << (4 + NUM_STATEMENT_ID_BITS) | statementId;
    }
  };
  private static final int TOP3BITS_MASK = 0b1110_0000_0000_0000_0000_0000_0000_0000;
  private static final int NUM_VERSION_BITS = 3;
  private static final int NUM_BUCKET_ID_BITS = 12;
  private static final int NUM_STATEMENT_ID_BITS = 12;
  private static final int MAX_VERSION = (1 << NUM_VERSION_BITS) - 1;
  private static final int MAX_BUCKET_ID = (1 << NUM_BUCKET_ID_BITS) - 1;
  private static final int MAX_STATEMENT_ID = (1 << NUM_STATEMENT_ID_BITS) - 1;

  public static BucketCodec determineVersion(int bucket) {
    assert 7 << 29 == BucketCodec.TOP3BITS_MASK;
    //look at top 3 bits and return appropriate enum
    try {
      return getCodec((BucketCodec.TOP3BITS_MASK & bucket) >>> 29);
    }
    catch(IllegalArgumentException ex) {
      throw new IllegalArgumentException(ex.getMessage() + " Cannot decode version from " + bucket);
    }
  }
  public static BucketCodec getCodec(int version) {
    switch (version) {
      case 0:
        return BucketCodec.V0;
      case 1:
        return BucketCodec.V1;
      default:
        throw new IllegalArgumentException("Illegal 'bucket' format. Version=" + version);
    }
  }
  final int version;
  BucketCodec(int version) {
    this.version = version;
  }

  /**
   * For bucketed tables this the bucketId, otherwise writerId
   */
  public abstract int decodeWriterId(int bucketProperty);
  public abstract int decodeStatementId(int bucketProperty);
  public abstract int encode(AcidOutputFormat.Options options);
  public int getVersion() {
    return version;
  }
}