org.apache.hudi.io.util.IOUtils Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.io.util;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
/**
* Util methods on I/O.
*/
public class IOUtils {
/**
* Reads four bytes starting from the offset in the input and returns {@code int} value.
*
* @param bytes input byte array.
* @param offset offset to start reading.
* @return the {@code int} value.
*/
public static int readInt(byte[] bytes, int offset) {
return (((bytes[offset] & 0xff) << 24)
| ((bytes[offset + 1] & 0xff) << 16)
| ((bytes[offset + 2] & 0xff) << 8)
| (bytes[offset + 3] & 0xff));
}
/**
* Reads eight bytes starting from the offset in the input and returns {@code long} value.
*
* @param bytes input byte array.
* @param offset offset to start reading.
* @return the {@code long} value.
*/
public static long readLong(byte[] bytes, int offset) {
return (((long) (bytes[offset] & 0xff) << 56)
| ((long) (bytes[offset + 1] & 0xff) << 48)
| ((long) (bytes[offset + 2] & 0xff) << 40)
| ((long) (bytes[offset + 3] & 0xff) << 32)
| ((long) (bytes[offset + 4] & 0xff) << 24)
| ((long) (bytes[offset + 5] & 0xff) << 16)
| ((long) (bytes[offset + 6] & 0xff) << 8)
| (long) (bytes[offset + 7] & 0xff));
}
/**
* Reads two bytes starting from the offset in the input and returns {@code short} value.
*
* @param bytes input byte array.
* @param offset offset to start reading.
* @return the {@code short} value.
*/
public static short readShort(byte[] bytes, int offset) {
short n = 0;
n = (short) ((n ^ bytes[offset]) & 0xFF);
n = (short) (n << 8);
n ^= (short) (bytes[offset + 1] & 0xFF);
return n;
}
/**
* Parses the first byte of a variable-length encoded number (integer or long value) to determine
* total number of bytes representing the number on disk.
*
* @param bytes input byte array of the encoded number.
* @param offset offset to start reading.
* @return the total number of bytes (1 to 9) on disk.
*/
public static int decodeVarLongSizeOnDisk(byte[] bytes, int offset) {
byte firstByte = bytes[offset];
return decodeVarLongSize(firstByte);
}
/**
* Parses the first byte of a variable-length encoded number (integer or long value) to determine
* total number of bytes representing the number on disk.
*
* @param value the first byte of the encoded number.
* @return the total number of bytes (1 to 9) on disk.
*/
public static int decodeVarLongSize(byte value) {
if (value >= -112) {
return 1;
} else if (value < -120) {
return -119 - value;
}
return -111 - value;
}
/**
* Reads a variable-length encoded number from input bytes and returns it.
*
* @param bytes input byte array.
* @param offset offset to start reading.
* @return decoded {@code long} from the input.
*/
public static long readVarLong(byte[] bytes, int offset) {
return readVarLong(bytes, offset, decodeVarLongSizeOnDisk(bytes, offset));
}
/**
* Reads a variable-length encoded number from input bytes and the decoded size on disk,
* and returns it.
*
* @param bytes input byte array.
* @param offset offset to start reading.
* @param varLongSizeOnDisk the total number of bytes (1 to 9) on disk.
* @return decoded {@code long} from the input.
*/
public static long readVarLong(byte[] bytes, int offset, int varLongSizeOnDisk) {
byte firstByte = bytes[offset];
if (varLongSizeOnDisk == 1) {
return firstByte;
}
long value = 0;
for (int i = 0; i < varLongSizeOnDisk - 1; i++) {
value = value << 8;
value = value | (bytes[offset + 1 + i] & 0xFF);
}
return (isNegativeVarLong(firstByte) ? (~value) : value);
}
/**
* Given the first byte of a variable-length encoded number, determines the sign.
*
* @param value the first byte.
* @return is the value negative.
*/
public static boolean isNegativeVarLong(byte value) {
return value < -120 || (value >= -112 && value < 0);
}
/**
* @param bytes input byte array.
* @param offset offset to start reading.
* @param length length of bytes to copy.
* @return a new copy of the byte array.
*/
public static byte[] copy(byte[] bytes, int offset, int length) {
byte[] copy = new byte[length];
System.arraycopy(bytes, offset, copy, 0, length);
return copy;
}
/**
* Lexicographically compares two byte arrays.
*
* @param bytes1 left operand.
* @param bytes2 right operand.
* @return 0 if equal, < 0 if left is less than right, etc.
*/
public static int compareTo(byte[] bytes1, byte[] bytes2) {
return compareTo(bytes1, 0, bytes1.length, bytes2, 0, bytes2.length);
}
/**
* Lexicographically compares two byte arrays.
*
* @param bytes1 left operand.
* @param bytes2 right operand.
* @param offset1 where to start comparing in the left buffer.
* @param offset2 where to start comparing in the right buffer.
* @param length1 how much to compare from the left buffer.
* @param length2 how much to compare from the right buffer.
* @return 0 if equal, < 0 if left is less than right, > 0 otherwise.
*/
public static int compareTo(byte[] bytes1, int offset1, int length1,
byte[] bytes2, int offset2, int length2) {
if (bytes1 == bytes2 && offset1 == offset2 && length1 == length2) {
return 0;
}
int end1 = offset1 + length1;
int end2 = offset2 + length2;
for (int i = offset1, j = offset2; i < end1 && j < end2; i++, j++) {
int a = (bytes1[i] & 0xff);
int b = (bytes2[j] & 0xff);
if (a != b) {
return a - b;
}
}
return length1 - length2;
}
/**
* Returns the start position of the first occurrence of the specified {@code
* target} within {@code array}, or {@code -1} if there is no such occurrence.
*
* More formally, returns the lowest index {@code i} such that the range
* [i, i + target.length) in {@code array} contains exactly the same elements
* as {@code target}.
*
* @param array the array to search for the sequence {@code target}.
* @param target the array to search for as a sub-sequence of {@code array}.
* @return the start position if found; {@code -1} if there is no such occurrence.
*/
public static int indexOf(byte[] array, byte[] target) {
if (target.length == 0) {
return 0;
}
outer:
for (int i = 0; i < array.length - target.length + 1; i++) {
for (int j = 0; j < target.length; j++) {
if (array[i + j] != target[j]) {
continue outer;
}
}
return i;
}
return -1;
}
/**
* @param bytes input byte array.
* @param offset offset to start reading.
* @param length length of bytes to read.
* @return {@link String} value based on the byte array.
*/
public static String bytesToString(byte[] bytes, int offset, int length) {
StringBuilder sb = new StringBuilder();
for (int i = offset; i < offset + length; i++) {
sb.append((char) bytes[i]);
}
return sb.toString();
}
/**
* Converts an int value to a byte array using big-endian.
*
* @param val value to convert.
* @return the byte array.
*/
public static byte[] toBytes(int val) {
byte[] b = new byte[4];
for (int i = 3; i > 0; i--) {
b[i] = (byte) val;
val >>>= 8;
}
b[0] = (byte) val;
return b;
}
/**
* Converts a long value to a byte array using big-endian.
*
* @param val value to convert.
* @return the byte array.
*/
public static byte[] toBytes(long val) {
byte[] b = new byte[8];
for (int i = 7; i > 0; i--) {
b[i] = (byte) val;
val >>>= 8;
}
b[0] = (byte) val;
return b;
}
/**
* @param bytes byte array to hash.
* @param offset offset to start hashing.
* @param length length of bytes to hash.
* @return the generated hash code.
*/
public static int hashCode(byte[] bytes, int offset, int length) {
int hash = 1;
for (int i = offset; i < offset + length; i++) {
hash = (31 * hash) + bytes[i];
}
return hash;
}
/**
* Reads the data fully from the {@link InputStream} to the byte array.
*
* @param inputStream {@link InputStream} containing the data.
* @param targetByteArray target byte array.
* @param offset offset in the target byte array to start to write data.
* @param length maximum amount of data to write.
* @return size of bytes read.
* @throws IOException upon error.
*/
public static int readFully(InputStream inputStream,
byte[] targetByteArray,
int offset,
int length) throws IOException {
int totalBytesRead = 0;
int bytesRead;
while (totalBytesRead < length) {
bytesRead = inputStream.read(targetByteArray, offset + totalBytesRead, length - totalBytesRead);
if (bytesRead < 0) {
break;
}
totalBytesRead += bytesRead;
}
return totalBytesRead;
}
public static byte[] readAsByteArray(InputStream input, int outputSize) throws IOException {
ByteArrayOutputStream bos = new ByteArrayOutputStream(outputSize);
copy(input, bos);
return bos.toByteArray();
}
public static void copy(InputStream inputStream, OutputStream outputStream) throws IOException {
byte[] buffer = new byte[1024];
int len;
while ((len = inputStream.read(buffer)) != -1) {
outputStream.write(buffer, 0, len);
}
}
/**
* @param byteBuffer {@link ByteBuffer} containing the bytes.
* @return {@link DataInputStream} based on the byte buffer.
*/
public static DataInputStream getDataInputStream(ByteBuffer byteBuffer) {
return new DataInputStream(new ByteArrayInputStream(
byteBuffer.array(), byteBuffer.arrayOffset(), byteBuffer.limit() - byteBuffer.arrayOffset()));
}
/**
* Returns a new byte array, copied from the given {@code buf}, from the index 0 (inclusive)
* to the limit (exclusive), regardless of the current position.
* The position and the other index parameters are not changed.
*
* @param buf a byte buffer.
* @return the byte array.
*/
public static byte[] toBytes(ByteBuffer buf) {
ByteBuffer dup = buf.duplicate();
dup.position(0);
return readBytes(dup);
}
private static byte[] readBytes(ByteBuffer buf) {
byte[] result = new byte[buf.remaining()];
buf.get(result);
return result;
}
}