All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.io.hfile.HFileDataBlock Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hudi.io.hfile;

import org.apache.hudi.common.util.Option;

import static org.apache.hudi.io.hfile.HFileReader.SEEK_TO_FOUND;
import static org.apache.hudi.io.hfile.HFileReader.SEEK_TO_IN_RANGE;
import static org.apache.hudi.io.hfile.KeyValue.KEY_OFFSET;

/**
 * Represents a {@link HFileBlockType#DATA} block.
 */
public class HFileDataBlock extends HFileBlock {
  // Hudi does not use HFile MVCC timestamp version so the version
  // is always 0, thus the byte length of the version is always 1.
  // This assumption is also validated when parsing {@link HFileInfo},
  // i.e., the maximum MVCC timestamp in a HFile must be 0.
  private static final long ZERO_TS_VERSION_BYTE_LENGTH = 1;

  // End offset of content in the block, relative to the start of the start of the block
  protected final int uncompressedContentEndRelativeOffset;

  protected HFileDataBlock(HFileContext context,
                           byte[] byteBuff,
                           int startOffsetInBuff) {
    super(context, HFileBlockType.DATA, byteBuff, startOffsetInBuff);

    this.uncompressedContentEndRelativeOffset =
        this.uncompressedEndOffset - this.sizeCheckSum - this.startOffsetInBuff;
  }

  /**
   * Seeks to the key to look up. The key may not have an exact match.
   *
   * @param cursor                 {@link HFileCursor} containing the current position relative
   *                               to the beginning of the HFile (not the block start offset).
   * @param key                    key to look up.
   * @param blockStartOffsetInFile the start offset of the block relative to the beginning of the
   *                               HFile.
   * @return 0 if the block contains the exact same key as the lookup key, and the cursor points
   * to the key; or 1 if the lookup key does not exist, and the cursor points to the
   * lexicographically largest key that is smaller than the lookup key.
   */
  public int seekTo(HFileCursor cursor, Key key, int blockStartOffsetInFile) {
    int relativeOffset = cursor.getOffset() - blockStartOffsetInFile;
    int lastRelativeOffset = relativeOffset;
    Option lastKeyValue = cursor.getKeyValue();
    while (relativeOffset < uncompressedContentEndRelativeOffset) {
      // Full length is not known yet until parsing
      KeyValue kv = readKeyValue(relativeOffset);
      int comp = kv.getKey().compareTo(key);
      if (comp == 0) {
        // The lookup key equals the key `relativeOffset` points to; the key is found.
        // Set the cursor to the current offset that points to the exact match
        cursor.set(relativeOffset + blockStartOffsetInFile, kv);
        return SEEK_TO_FOUND;
      } else if (comp > 0) {
        // There is no matched key (otherwise, the method should already stop there and return 0)
        // and the key `relativeOffset` points to is already greater than the lookup key.
        // So set the cursor to the previous offset, pointing the greatest key in the file that is
        // less than the lookup key.
        if (lastKeyValue.isPresent()) {
          // If the key-value pair is already, cache it
          cursor.set(lastRelativeOffset + blockStartOffsetInFile, lastKeyValue.get());
        } else {
          // Otherwise, defer the read till it's needed
          cursor.setOffset(lastRelativeOffset + blockStartOffsetInFile);
        }
        return SEEK_TO_IN_RANGE;
      }
      long increment =
          (long) KEY_OFFSET + (long) kv.getKeyLength() + (long) kv.getValueLength()
              + ZERO_TS_VERSION_BYTE_LENGTH;
      lastRelativeOffset = relativeOffset;
      relativeOffset += increment;
      lastKeyValue = Option.of(kv);
    }
    // We reach the end of the block. Set the cursor to the offset of last key.
    // In this case, the lookup key is greater than the last key.
    if (lastKeyValue.isPresent()) {
      cursor.set(lastRelativeOffset + blockStartOffsetInFile, lastKeyValue.get());
    } else {
      cursor.setOffset(lastRelativeOffset + blockStartOffsetInFile);
    }
    return SEEK_TO_IN_RANGE;
  }

  /**
   * Reads the key value at the offset.
   *
   * @param offset offset to read relative to the start of {@code byteBuff}.
   * @return the {@link KeyValue} instance.
   */
  public KeyValue readKeyValue(int offset) {
    return new KeyValue(byteBuff, offset);
  }

  /**
   * Moves the cursor to next {@link KeyValue}.
   *
   * @param cursor                 {@link HFileCursor} instance containing the current position.
   * @param blockStartOffsetInFile the start offset of the block relative to the beginning of the
   *                               HFile.
   * @return {@code true} if there is next {@link KeyValue}; {code false} otherwise.
   */
  public boolean next(HFileCursor cursor, int blockStartOffsetInFile) {
    int offset = cursor.getOffset() - blockStartOffsetInFile;
    Option keyValue = cursor.getKeyValue();
    if (!keyValue.isPresent()) {
      keyValue = Option.of(readKeyValue(offset));
    }
    cursor.increment((long) KEY_OFFSET + (long) keyValue.get().getKeyLength()
        + (long) keyValue.get().getValueLength() + ZERO_TS_VERSION_BYTE_LENGTH);
    return cursor.getOffset() - blockStartOffsetInFile < uncompressedContentEndRelativeOffset;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy