All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.io.hfile.HFileBlock Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hudi.io.hfile;

import org.apache.hudi.io.compress.CompressionCodec;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;

import static org.apache.hudi.io.hfile.DataSize.MAGIC_LENGTH;
import static org.apache.hudi.io.hfile.DataSize.SIZEOF_BYTE;
import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT32;
import static org.apache.hudi.io.hfile.DataSize.SIZEOF_INT64;
import static org.apache.hudi.io.util.IOUtils.readInt;

/**
 * Represents a block in a HFile. The types of blocks are defined in {@link HFileBlockType}.
 */
public abstract class HFileBlock {
  // The HFile block header size without checksum
  public static final int HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM =
      MAGIC_LENGTH + 2 * SIZEOF_INT32 + SIZEOF_INT64;
  // The HFile block header size with checksum
  // There is a 1 byte checksum type, followed by a 4 byte bytesPerChecksum
  // followed by another 4 byte value to store sizeofDataOnDisk.
  public static final int HFILEBLOCK_HEADER_SIZE =
      HFILEBLOCK_HEADER_SIZE_NO_CHECKSUM + SIZEOF_BYTE + 2 * SIZEOF_INT32;

  // Each checksum value is an integer that can be stored in 4 bytes.
  static final int CHECKSUM_SIZE = SIZEOF_INT32;

  static class Header {
    // Format of header is:
    // 8 bytes - block magic
    // 4 bytes int - onDiskSizeWithoutHeader
    // 4 bytes int - uncompressedSizeWithoutHeader
    // 8 bytes long - prevBlockOffset
    // The following 3 are only present if header contains checksum information
    // (which are present for HFile version 3)
    // 1 byte - checksum type
    // 4 byte int - bytes per checksum
    // 4 byte int - onDiskDataSizeWithHeader
    static int BLOCK_MAGIC_INDEX = 0;
    static int ON_DISK_SIZE_WITHOUT_HEADER_INDEX = 8;
    static int UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX = 12;
    static int PREV_BLOCK_OFFSET_INDEX = 16;
    static int CHECKSUM_TYPE_INDEX = 24;
    static int BYTES_PER_CHECKSUM_INDEX = 25;
    static int ON_DISK_DATA_SIZE_WITH_HEADER_INDEX = 29;
  }

  protected final HFileContext context;
  protected final byte[] byteBuff;
  protected final int startOffsetInBuff;
  protected final int sizeCheckSum;
  protected final int uncompressedEndOffset;
  private final HFileBlockType blockType;
  protected final int onDiskSizeWithoutHeader;
  protected final int uncompressedSizeWithoutHeader;
  protected final int bytesPerChecksum;
  private boolean isUnpacked = false;
  protected byte[] compressedByteBuff;
  protected int startOffsetInCompressedBuff;

  protected HFileBlock(HFileContext context,
                       HFileBlockType blockType,
                       byte[] byteBuff,
                       int startOffsetInBuff) {
    this.context = context;
    this.blockType = blockType;
    this.onDiskSizeWithoutHeader = readInt(
        byteBuff, startOffsetInBuff + Header.ON_DISK_SIZE_WITHOUT_HEADER_INDEX);
    this.uncompressedSizeWithoutHeader = readInt(
        byteBuff, startOffsetInBuff + Header.UNCOMPRESSED_SIZE_WITHOUT_HEADER_INDEX);
    this.bytesPerChecksum = readInt(
        byteBuff, startOffsetInBuff + Header.BYTES_PER_CHECKSUM_INDEX);
    this.sizeCheckSum = numChecksumBytes(getOnDiskSizeWithHeader(), bytesPerChecksum);
    if (CompressionCodec.NONE.equals(context.getCompressionCodec())) {
      isUnpacked = true;
      this.startOffsetInBuff = startOffsetInBuff;
      this.byteBuff = byteBuff;
    } else {
      this.startOffsetInCompressedBuff = startOffsetInBuff;
      this.compressedByteBuff = byteBuff;
      this.startOffsetInBuff = 0;
      this.byteBuff = allocateBufferForUnpacking();
    }
    this.uncompressedEndOffset =
        this.startOffsetInBuff + HFILEBLOCK_HEADER_SIZE + uncompressedSizeWithoutHeader;
  }

  /**
   * Parses the HFile block header and returns the {@link HFileBlock} instance based on the input.
   *
   * @param context           HFile context.
   * @param byteBuff          input data.
   * @param startOffsetInBuff offset to start parsing.
   * @return the {@link HFileBlock} instance based on the input.
   * @throws IOException if the block cannot be parsed.
   */
  public static HFileBlock parse(HFileContext context, byte[] byteBuff, int startOffsetInBuff)
      throws IOException {
    HFileBlockType blockType = HFileBlockType.parse(byteBuff, startOffsetInBuff);
    switch (blockType) {
      case ROOT_INDEX:
        return new HFileRootIndexBlock(context, byteBuff, startOffsetInBuff);
      case FILE_INFO:
        return new HFileFileInfoBlock(context, byteBuff, startOffsetInBuff);
      case DATA:
        return new HFileDataBlock(context, byteBuff, startOffsetInBuff);
      case META:
        return new HFileMetaBlock(context, byteBuff, startOffsetInBuff);
      default:
        throw new IOException(
            "Parsing of the HFile block type " + blockType + " is not supported");
    }
  }

  /**
   * Returns the number of bytes needed to store the checksums based on data size.
   *
   * @param numBytes         number of bytes of data.
   * @param bytesPerChecksum number of bytes covered by one checksum.
   * @return the number of bytes needed to store the checksum values.
   */
  static int numChecksumBytes(long numBytes, int bytesPerChecksum) {
    return numChecksumChunks(numBytes, bytesPerChecksum) * HFileBlock.CHECKSUM_SIZE;
  }

  /**
   * Returns the number of checksum chunks needed to store the checksums based on data size.
   *
   * @param numBytes         number of bytes of data.
   * @param bytesPerChecksum number of bytes in a checksum chunk.
   * @return the number of checksum chunks.
   */
  static int numChecksumChunks(long numBytes, int bytesPerChecksum) {
    long numChunks = numBytes / bytesPerChecksum;
    if (numBytes % bytesPerChecksum != 0) {
      numChunks++;
    }
    if (numChunks > Integer.MAX_VALUE / HFileBlock.CHECKSUM_SIZE) {
      throw new IllegalArgumentException("The number of chunks is too large: " + numChunks);
    }
    return (int) numChunks;
  }

  public HFileBlockType getBlockType() {
    return blockType;
  }

  public byte[] getByteBuff() {
    return byteBuff;
  }

  public int getOnDiskSizeWithHeader() {
    return onDiskSizeWithoutHeader + HFILEBLOCK_HEADER_SIZE;
  }

  /**
   * Decodes and decompresses the block content if the block content is compressed.
   * 

* This must be called for an encoded and compressed block before any reads. * * @throws IOException upon decoding and decompression error. */ public void unpack() throws IOException { if (!isUnpacked) { // Should only be called for compressed blocks CompressionCodec compression = context.getCompressionCodec(); if (compression != CompressionCodec.NONE) { // Copy the block header which is not compressed System.arraycopy( compressedByteBuff, startOffsetInCompressedBuff, byteBuff, 0, HFILEBLOCK_HEADER_SIZE); try (InputStream byteBuffInputStream = new ByteArrayInputStream( compressedByteBuff, startOffsetInCompressedBuff + HFILEBLOCK_HEADER_SIZE, onDiskSizeWithoutHeader)) { context.getDecompressor().decompress( byteBuffInputStream, byteBuff, HFILEBLOCK_HEADER_SIZE, byteBuff.length - HFILEBLOCK_HEADER_SIZE); } } isUnpacked = true; } } /** * Allocates new byte buffer for the uncompressed bytes. * * @return a new byte array based on the size of uncompressed data, holding the same header * bytes. */ protected byte[] allocateBufferForUnpacking() { int capacity = HFILEBLOCK_HEADER_SIZE + uncompressedSizeWithoutHeader + sizeCheckSum; return new byte[capacity]; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy