All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.io.util.IOUtils Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hudi.io.util;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;

/**
 * Util methods on I/O.
 */
public class IOUtils {
  /**
   * Reads four bytes starting from the offset in the input and returns {@code int} value.
   *
   * @param bytes  input byte array.
   * @param offset offset to start reading.
   * @return the {@code int} value.
   */
  public static int readInt(byte[] bytes, int offset) {
    return (((bytes[offset] & 0xff) << 24)
        | ((bytes[offset + 1] & 0xff) << 16)
        | ((bytes[offset + 2] & 0xff) << 8)
        | (bytes[offset + 3] & 0xff));
  }

  /**
   * Reads eight bytes starting from the offset in the input and returns {@code long} value.
   *
   * @param bytes  input byte array.
   * @param offset offset to start reading.
   * @return the {@code long} value.
   */
  public static long readLong(byte[] bytes, int offset) {
    return (((long) (bytes[offset] & 0xff) << 56)
        | ((long) (bytes[offset + 1] & 0xff) << 48)
        | ((long) (bytes[offset + 2] & 0xff) << 40)
        | ((long) (bytes[offset + 3] & 0xff) << 32)
        | ((long) (bytes[offset + 4] & 0xff) << 24)
        | ((long) (bytes[offset + 5] & 0xff) << 16)
        | ((long) (bytes[offset + 6] & 0xff) << 8)
        | (long) (bytes[offset + 7] & 0xff));
  }

  /**
   * Reads two bytes starting from the offset in the input and returns {@code short} value.
   *
   * @param bytes  input byte array.
   * @param offset offset to start reading.
   * @return the {@code short} value.
   */
  public static short readShort(byte[] bytes, int offset) {
    short n = 0;
    n = (short) ((n ^ bytes[offset]) & 0xFF);
    n = (short) (n << 8);
    n ^= (short) (bytes[offset + 1] & 0xFF);
    return n;
  }

  /**
   * Parses the first byte of a variable-length encoded number (integer or long value) to determine
   * total number of bytes representing the number on disk.
   *
   * @param bytes  input byte array of the encoded number.
   * @param offset offset to start reading.
   * @return the total number of bytes (1 to 9) on disk.
   */
  public static int decodeVarLongSizeOnDisk(byte[] bytes, int offset) {
    byte firstByte = bytes[offset];
    return decodeVarLongSize(firstByte);
  }

  /**
   * Parses the first byte of a variable-length encoded number (integer or long value) to determine
   * total number of bytes representing the number on disk.
   *
   * @param value the first byte of the encoded number.
   * @return the total number of bytes (1 to 9) on disk.
   */
  public static int decodeVarLongSize(byte value) {
    if (value >= -112) {
      return 1;
    } else if (value < -120) {
      return -119 - value;
    }
    return -111 - value;
  }

  /**
   * Reads a variable-length encoded number from input bytes and returns it.
   *
   * @param bytes  input byte array.
   * @param offset offset to start reading.
   * @return decoded {@code long} from the input.
   */
  public static long readVarLong(byte[] bytes, int offset) {
    return readVarLong(bytes, offset, decodeVarLongSizeOnDisk(bytes, offset));
  }

  /**
   * Reads a variable-length encoded number from input bytes and the decoded size on disk,
   * and returns it.
   *
   * @param bytes             input byte array.
   * @param offset            offset to start reading.
   * @param varLongSizeOnDisk the total number of bytes (1 to 9) on disk.
   * @return decoded {@code long} from the input.
   */
  public static long readVarLong(byte[] bytes, int offset, int varLongSizeOnDisk) {
    byte firstByte = bytes[offset];
    if (varLongSizeOnDisk == 1) {
      return firstByte;
    }
    long value = 0;
    for (int i = 0; i < varLongSizeOnDisk - 1; i++) {
      value = value << 8;
      value = value | (bytes[offset + 1 + i] & 0xFF);
    }
    return (isNegativeVarLong(firstByte) ? (~value) : value);
  }

  /**
   * Given the first byte of a variable-length encoded number, determines the sign.
   *
   * @param value the first byte.
   * @return is the value negative.
   */
  public static boolean isNegativeVarLong(byte value) {
    return value < -120 || (value >= -112 && value < 0);
  }

  /**
   * @param bytes  input byte array.
   * @param offset offset to start reading.
   * @param length length of bytes to copy.
   * @return a new copy of the byte array.
   */
  public static byte[] copy(byte[] bytes, int offset, int length) {
    byte[] copy = new byte[length];
    System.arraycopy(bytes, offset, copy, 0, length);
    return copy;
  }

  /**
   * Lexicographically compares two byte arrays.
   *
   * @param bytes1 left operand.
   * @param bytes2 right operand.
   * @return 0 if equal, < 0 if left is less than right, etc.
   */
  public static int compareTo(byte[] bytes1, byte[] bytes2) {
    return compareTo(bytes1, 0, bytes1.length, bytes2, 0, bytes2.length);
  }

  /**
   * Lexicographically compares two byte arrays.
   *
   * @param bytes1  left operand.
   * @param bytes2  right operand.
   * @param offset1 where to start comparing in the left buffer.
   * @param offset2 where to start comparing in the right buffer.
   * @param length1 how much to compare from the left buffer.
   * @param length2 how much to compare from the right buffer.
   * @return 0 if equal, < 0 if left is less than right, > 0 otherwise.
   */
  public static int compareTo(byte[] bytes1, int offset1, int length1,
                              byte[] bytes2, int offset2, int length2) {
    if (bytes1 == bytes2 && offset1 == offset2 && length1 == length2) {
      return 0;
    }
    int end1 = offset1 + length1;
    int end2 = offset2 + length2;
    for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
      int a = (bytes1[i] & 0xff);
      int b = (bytes2[j] & 0xff);
      if (a != b) {
        return a - b;
      }
    }
    return length1 - length2;
  }

  /**
   * Returns the start position of the first occurrence of the specified {@code
   * target} within {@code array}, or {@code -1} if there is no such occurrence.
   *
   * 

More formally, returns the lowest index {@code i} such that the range * [i, i + target.length) in {@code array} contains exactly the same elements * as {@code target}. * * @param array the array to search for the sequence {@code target}. * @param target the array to search for as a sub-sequence of {@code array}. * @return the start position if found; {@code -1} if there is no such occurrence. */ public static int indexOf(byte[] array, byte[] target) { if (target.length == 0) { return 0; } outer: for (int i = 0; i < array.length - target.length + 1; i++) { for (int j = 0; j < target.length; j++) { if (array[i + j] != target[j]) { continue outer; } } return i; } return -1; } /** * @param bytes input byte array. * @param offset offset to start reading. * @param length length of bytes to read. * @return {@link String} value based on the byte array. */ public static String bytesToString(byte[] bytes, int offset, int length) { StringBuilder sb = new StringBuilder(); for (int i = offset; i < offset + length; i++) { sb.append((char) bytes[i]); } return sb.toString(); } /** * Converts an int value to a byte array using big-endian. * * @param val value to convert. * @return the byte array. */ public static byte[] toBytes(int val) { byte[] b = new byte[4]; for (int i = 3; i > 0; i--) { b[i] = (byte) val; val >>>= 8; } b[0] = (byte) val; return b; } /** * Converts a long value to a byte array using big-endian. * * @param val value to convert. * @return the byte array. */ public static byte[] toBytes(long val) { byte[] b = new byte[8]; for (int i = 7; i > 0; i--) { b[i] = (byte) val; val >>>= 8; } b[0] = (byte) val; return b; } /** * @param bytes byte array to hash. * @param offset offset to start hashing. * @param length length of bytes to hash. * @return the generated hash code. */ public static int hashCode(byte[] bytes, int offset, int length) { int hash = 1; for (int i = offset; i < offset + length; i++) { hash = (31 * hash) + bytes[i]; } return hash; } /** * Reads the data fully from the {@link InputStream} to the byte array. * * @param inputStream {@link InputStream} containing the data. * @param targetByteArray target byte array. * @param offset offset in the target byte array to start to write data. * @param length maximum amount of data to write. * @return size of bytes read. * @throws IOException upon error. */ public static int readFully(InputStream inputStream, byte[] targetByteArray, int offset, int length) throws IOException { int totalBytesRead = 0; int bytesRead; while (totalBytesRead < length) { bytesRead = inputStream.read(targetByteArray, offset + totalBytesRead, length - totalBytesRead); if (bytesRead < 0) { break; } totalBytesRead += bytesRead; } return totalBytesRead; } public static byte[] readAsByteArray(InputStream input, int outputSize) throws IOException { ByteArrayOutputStream bos = new ByteArrayOutputStream(outputSize); copy(input, bos); return bos.toByteArray(); } public static void copy(InputStream inputStream, OutputStream outputStream) throws IOException { byte[] buffer = new byte[1024]; int len; while ((len = inputStream.read(buffer)) != -1) { outputStream.write(buffer, 0, len); } } /** * @param byteBuffer {@link ByteBuffer} containing the bytes. * @return {@link DataInputStream} based on the byte buffer. */ public static DataInputStream getDataInputStream(ByteBuffer byteBuffer) { return new DataInputStream(new ByteArrayInputStream( byteBuffer.array(), byteBuffer.arrayOffset(), byteBuffer.limit() - byteBuffer.arrayOffset())); } /** * Returns a new byte array, copied from the given {@code buf}, from the index 0 (inclusive) * to the limit (exclusive), regardless of the current position. * The position and the other index parameters are not changed. * * @param buf a byte buffer. * @return the byte array. */ public static byte[] toBytes(ByteBuffer buf) { ByteBuffer dup = buf.duplicate(); dup.position(0); return readBytes(dup); } private static byte[] readBytes(ByteBuffer buf) { byte[] result = new byte[buf.remaining()]; buf.get(result); return result; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy