All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.io.hfile.HFileUtils Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hudi.io.hfile;

import org.apache.hudi.io.compress.CompressionCodec;
import org.apache.hudi.io.util.IOUtils;

import java.util.Collections;
import java.util.HashMap;
import java.util.Map;

import static org.apache.hudi.common.util.StringUtils.fromUTF8Bytes;

/**
 * Util methods for reading and writing HFile.
 */
public class HFileUtils {
  private static final Map HFILE_COMPRESSION_CODEC_MAP = createCompressionCodecMap();

  /**
   * Gets the compression codec based on the ID.  This ID is written to the HFile on storage.
   *
   * @param id ID indicating the compression codec.
   * @return compression codec based on the ID.
   */
  public static CompressionCodec decodeCompressionCodec(int id) {
    CompressionCodec codec = HFILE_COMPRESSION_CODEC_MAP.get(id);
    if (codec == null) {
      throw new IllegalArgumentException("Compression code not found for ID: " + id);
    }
    return codec;
  }

  /**
   * Reads the HFile major version from the input.
   *
   * @param bytes  input data.
   * @param offset offset to start reading.
   * @return major version of the file.
   */
  public static int readMajorVersion(byte[] bytes, int offset) {
    int ch1 = bytes[offset] & 0xFF;
    int ch2 = bytes[offset + 1] & 0xFF;
    int ch3 = bytes[offset + 2] & 0xFF;
    return ((ch1 << 16) + (ch2 << 8) + ch3);
  }

  /**
   * Compares two HFile {@link Key}.
   *
   * @param key1 left operand key.
   * @param key2 right operand key.
   * @return 0 if equal, < 0 if left is less than right, > 0 otherwise.
   */
  public static int compareKeys(Key key1, Key key2) {
    return IOUtils.compareTo(
        key1.getBytes(), key1.getContentOffset(), key1.getContentLength(),
        key2.getBytes(), key2.getContentOffset(), key2.getContentLength());
  }

  /**
   * @param prefix the prefix to check
   * @param key    key to check
   * @return whether the key starts with the prefix.
   */
  public static boolean isPrefixOfKey(Key prefix, Key key) {
    int prefixLength = prefix.getContentLength();
    int keyLength = key.getLength();
    if (prefixLength > keyLength) {
      return false;
    }

    byte[] prefixBytes = prefix.getBytes();
    byte[] keyBytes = key.getBytes();
    for (int i = 0; i < prefixLength; i++) {
      if (prefixBytes[prefix.getContentOffset() + i] != keyBytes[key.getContentOffset() + i]) {
        return false;
      }
    }
    return true;
  }

  /**
   * Gets the value in String.
   *
   * @param kv {@link KeyValue} instance.
   * @return the String with UTF-8 decoding.
   */
  public static String getValue(KeyValue kv) {
    return fromUTF8Bytes(kv.getBytes(), kv.getValueOffset(), kv.getValueLength());
  }

  /**
   * The ID mapping cannot change or else that breaks all existing HFiles out there,
   * even the ones that are not compressed! (They use the NONE algorithm)
   * This is because HFile stores the ID to indicate which compression codec is used.
   *
   * @return the mapping of ID to compression codec.
   */
  private static Map createCompressionCodecMap() {
    Map result = new HashMap<>();
    result.put(0, CompressionCodec.LZO);
    result.put(1, CompressionCodec.GZIP);
    result.put(2, CompressionCodec.NONE);
    result.put(3, CompressionCodec.SNAPPY);
    result.put(4, CompressionCodec.LZ4);
    result.put(5, CompressionCodec.BZIP2);
    result.put(6, CompressionCodec.ZSTD);
    return Collections.unmodifiableMap(result);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy