All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.parquet.bytes.BytesUtils Maven / Gradle / Ivy

There is a newer version: 1.15.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.parquet.bytes;

import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * utility methods to deal with bytes
 */
public class BytesUtils {
  private static final Logger LOG = LoggerFactory.getLogger(BytesUtils.class);

  /**
   * @deprecated Use {@link StandardCharsets#UTF_8} instead
   */
  @Deprecated
  public static final Charset UTF8 = Charset.forName("UTF-8");

  /**
   * give the number of bits needed to encode an int given the max value
   *
   * @param bound max int that we want to encode
   * @return the number of bits required
   */
  public static int getWidthFromMaxInt(int bound) {
    return 32 - Integer.numberOfLeadingZeros(bound);
  }

  /**
   * reads an int in little endian at the given position
   *
   * @param in     a byte buffer
   * @param offset an offset into the byte buffer
   * @return the integer at position offset read using little endian byte order
   * @throws IOException if there is an exception reading from the byte buffer
   */
  public static int readIntLittleEndian(ByteBuffer in, int offset) throws IOException {
    int ch4 = in.get(offset) & 0xff;
    int ch3 = in.get(offset + 1) & 0xff;
    int ch2 = in.get(offset + 2) & 0xff;
    int ch1 = in.get(offset + 3) & 0xff;
    return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0));
  }

  /**
   * reads an int in little endian at the given position
   *
   * @param in     a byte array
   * @param offset an offset into the byte array
   * @return the integer at position offset read using little endian byte order
   * @throws IOException if there is an exception reading from the byte array
   */
  public static int readIntLittleEndian(byte[] in, int offset) throws IOException {
    int ch4 = in[offset] & 0xff;
    int ch3 = in[offset + 1] & 0xff;
    int ch2 = in[offset + 2] & 0xff;
    int ch1 = in[offset + 3] & 0xff;
    return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + (ch4 << 0));
  }

  public static int readIntLittleEndian(InputStream in) throws IOException {
    // TODO: this is duplicated code in LittleEndianDataInputStream
    int ch1 = in.read();
    int ch2 = in.read();
    int ch3 = in.read();
    int ch4 = in.read();
    if ((ch1 | ch2 | ch3 | ch4) < 0) {
      throw new EOFException();
    }
    return ((ch4 << 24) + (ch3 << 16) + (ch2 << 8) + (ch1 << 0));
  }

  public static int readIntLittleEndianOnOneByte(InputStream in) throws IOException {
    int ch1 = in.read();
    if (ch1 < 0) {
      throw new EOFException();
    }
    return ch1;
  }

  public static int readIntLittleEndianOnTwoBytes(InputStream in) throws IOException {
    int ch1 = in.read();
    int ch2 = in.read();
    if ((ch1 | ch2) < 0) {
      throw new EOFException();
    }
    return ((ch2 << 8) + (ch1 << 0));
  }

  public static int readIntLittleEndianOnThreeBytes(InputStream in) throws IOException {
    int ch1 = in.read();
    int ch2 = in.read();
    int ch3 = in.read();
    if ((ch1 | ch2 | ch3) < 0) {
      throw new EOFException();
    }
    return ((ch3 << 16) + (ch2 << 8) + (ch1 << 0));
  }

  public static int readIntLittleEndianPaddedOnBitWidth(InputStream in, int bitWidth) throws IOException {

    int bytesWidth = paddedByteCountFromBits(bitWidth);
    switch (bytesWidth) {
      case 0:
        return 0;
      case 1:
        return BytesUtils.readIntLittleEndianOnOneByte(in);
      case 2:
        return BytesUtils.readIntLittleEndianOnTwoBytes(in);
      case 3:
        return BytesUtils.readIntLittleEndianOnThreeBytes(in);
      case 4:
        return BytesUtils.readIntLittleEndian(in);
      default:
        throw new IOException(
            String.format("Encountered bitWidth (%d) that requires more than 4 bytes", bitWidth));
    }
  }

  public static void writeIntLittleEndianOnOneByte(OutputStream out, int v) throws IOException {
    out.write((v >>> 0) & 0xFF);
  }

  public static void writeIntLittleEndianOnTwoBytes(OutputStream out, int v) throws IOException {
    out.write((v >>> 0) & 0xFF);
    out.write((v >>> 8) & 0xFF);
  }

  public static void writeIntLittleEndianOnThreeBytes(OutputStream out, int v) throws IOException {
    out.write((v >>> 0) & 0xFF);
    out.write((v >>> 8) & 0xFF);
    out.write((v >>> 16) & 0xFF);
  }

  public static void writeIntLittleEndian(OutputStream out, int v) throws IOException {
    // TODO: this is duplicated code in LittleEndianDataOutputStream
    out.write((v >>> 0) & 0xFF);
    out.write((v >>> 8) & 0xFF);
    out.write((v >>> 16) & 0xFF);
    out.write((v >>> 24) & 0xFF);
    if (LOG.isDebugEnabled())
      LOG.debug("write le int: " + v + " => " + ((v >>> 0) & 0xFF) + " " + ((v >>> 8) & 0xFF) + " "
          + ((v >>> 16) & 0xFF) + " " + ((v >>> 24) & 0xFF));
  }

  /**
   * Write a little endian int to out, using the the number of bytes required by
   * bit width
   *
   * @param out      an output stream
   * @param v        an int value
   * @param bitWidth bit width for padding
   * @throws IOException if there is an exception while writing
   */
  public static void writeIntLittleEndianPaddedOnBitWidth(OutputStream out, int v, int bitWidth) throws IOException {

    int bytesWidth = paddedByteCountFromBits(bitWidth);
    switch (bytesWidth) {
      case 0:
        break;
      case 1:
        writeIntLittleEndianOnOneByte(out, v);
        break;
      case 2:
        writeIntLittleEndianOnTwoBytes(out, v);
        break;
      case 3:
        writeIntLittleEndianOnThreeBytes(out, v);
        break;
      case 4:
        writeIntLittleEndian(out, v);
        break;
      default:
        throw new IOException(String.format("Encountered value (%d) that requires more than 4 bytes", v));
    }
  }

  public static int readUnsignedVarInt(InputStream in) throws IOException {
    int value = 0;
    int i = 0;
    int b;
    while (((b = in.read()) & 0x80) != 0) {
      value |= (b & 0x7F) << i;
      i += 7;
    }
    return value | (b << i);
  }

  /**
   * uses a trick mentioned in https://developers.google.com/protocol-buffers/docs/encoding to read zigZag encoded data
   *
   * @param in an input stream
   * @return the value of a zig-zag varint read from the current position in the stream
   * @throws IOException if there is an exception while reading
   */
  public static int readZigZagVarInt(InputStream in) throws IOException {
    int raw = readUnsignedVarInt(in);
    int temp = (((raw << 31) >> 31) ^ raw) >> 1;
    return temp ^ (raw & (1 << 31));
  }

  public static void writeUnsignedVarInt(int value, OutputStream out) throws IOException {
    while ((value & 0xFFFFFF80) != 0L) {
      out.write((value & 0x7F) | 0x80);
      value >>>= 7;
    }
    out.write(value & 0x7F);
  }

  public static void writeUnsignedVarInt(int value, ByteBuffer dest) throws IOException {
    while ((value & 0xFFFFFF80) != 0L) {
      dest.put((byte) ((value & 0x7F) | 0x80));
      value >>>= 7;
    }
    dest.put((byte) (value & 0x7F));
  }

  public static void writeZigZagVarInt(int intValue, OutputStream out) throws IOException {
    writeUnsignedVarInt((intValue << 1) ^ (intValue >> 31), out);
  }

  /**
   * uses a trick mentioned in https://developers.google.com/protocol-buffers/docs/encoding to read zigZag encoded data
   * TODO: the implementation is compatible with readZigZagVarInt. Is there a need for different functions?
   *
   * @param in an input stream
   * @return the value of a zig-zag var-long read from the current position in the stream
   * @throws IOException if there is an exception while reading
   */
  public static long readZigZagVarLong(InputStream in) throws IOException {
    long raw = readUnsignedVarLong(in);
    long temp = (((raw << 63) >> 63) ^ raw) >> 1;
    return temp ^ (raw & (1L << 63));
  }

  public static long readUnsignedVarLong(InputStream in) throws IOException {
    long value = 0;
    int i = 0;
    long b;
    while (((b = in.read()) & 0x80) != 0) {
      value |= (b & 0x7F) << i;
      i += 7;
    }
    return value | (b << i);
  }

  public static void writeUnsignedVarLong(long value, OutputStream out) throws IOException {
    while ((value & 0xFFFFFFFFFFFFFF80L) != 0L) {
      out.write((int) ((value & 0x7F) | 0x80));
      value >>>= 7;
    }
    out.write((int) (value & 0x7F));
  }

  public static void writeUnsignedVarLong(long value, ByteBuffer out) {
    while ((value & 0xFFFFFFFFFFFFFF80L) != 0L) {
      out.put((byte) ((value & 0x7F) | 0x80));
      value >>>= 7;
    }
    out.put((byte) (value & 0x7F));
  }

  public static void writeZigZagVarLong(long longValue, OutputStream out) throws IOException {
    writeUnsignedVarLong((longValue << 1) ^ (longValue >> 63), out);
  }

  /**
   * @param bitLength a count of bits
   * @return the corresponding byte count padded to the next byte
   */
  public static int paddedByteCountFromBits(int bitLength) {
    return (bitLength + 7) / 8;
  }

  public static byte[] intToBytes(int value) {
    byte[] outBuffer = new byte[4];
    outBuffer[3] = (byte) (value >>> 24);
    outBuffer[2] = (byte) (value >>> 16);
    outBuffer[1] = (byte) (value >>> 8);
    outBuffer[0] = (byte) (value >>> 0);
    return outBuffer;
  }

  public static int bytesToInt(byte[] bytes) {
    return ((int) (bytes[3] & 255) << 24)
        + ((int) (bytes[2] & 255) << 16)
        + ((int) (bytes[1] & 255) << 8)
        + ((int) (bytes[0] & 255) << 0);
  }

  public static byte[] longToBytes(long value) {
    byte[] outBuffer = new byte[8];
    outBuffer[7] = (byte) (value >>> 56);
    outBuffer[6] = (byte) (value >>> 48);
    outBuffer[5] = (byte) (value >>> 40);
    outBuffer[4] = (byte) (value >>> 32);
    outBuffer[3] = (byte) (value >>> 24);
    outBuffer[2] = (byte) (value >>> 16);
    outBuffer[1] = (byte) (value >>> 8);
    outBuffer[0] = (byte) (value >>> 0);
    return outBuffer;
  }

  public static long bytesToLong(byte[] bytes) {
    return (((long) bytes[7] << 56)
        + ((long) (bytes[6] & 255) << 48)
        + ((long) (bytes[5] & 255) << 40)
        + ((long) (bytes[4] & 255) << 32)
        + ((long) (bytes[3] & 255) << 24)
        + ((long) (bytes[2] & 255) << 16)
        + ((long) (bytes[1] & 255) << 8)
        + ((long) (bytes[0] & 255) << 0));
  }

  public static byte[] booleanToBytes(boolean value) {
    byte[] outBuffer = new byte[1];
    outBuffer[0] = (byte) (value ? 1 : 0);
    return outBuffer;
  }

  public static boolean bytesToBool(byte[] bytes) {
    return ((int) (bytes[0] & 255) != 0);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy