All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.avro.io.BinaryData Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     https://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.avro.io;

import java.io.IOException;

import org.apache.avro.Schema;
import org.apache.avro.Schema.Field;
import org.apache.avro.AvroRuntimeException;
import org.apache.avro.generic.GenericDatumReader;

/** Utilities for binary-encoded data. */
public class BinaryData {

  private BinaryData() {
  } // no public ctor

  private static class Decoders {
    private final BinaryDecoder d1, d2;

    public Decoders() {
      this.d1 = new BinaryDecoder(new byte[0], 0, 0);
      this.d2 = new BinaryDecoder(new byte[0], 0, 0);
    }

    public void set(byte[] data1, int off1, int len1, byte[] data2, int off2, int len2) {
      d1.setBuf(data1, off1, len1);
      d2.setBuf(data2, off2, len2);
    }

    public void clear() {
      d1.clearBuf();
      d2.clearBuf();
    }
  } // no public ctor

  private static final ThreadLocal DECODERS = ThreadLocal.withInitial(Decoders::new);

  /**
   * Compare binary encoded data. If equal, return zero. If greater-than, return
   * 1, if less than return -1. Order is consistent with that of
   * {@link org.apache.avro.generic.GenericData#compare(Object, Object, Schema)}.
   */
  public static int compare(byte[] b1, int s1, byte[] b2, int s2, Schema schema) {
    return compare(b1, s1, b1.length - s1, b2, s2, b2.length - s2, schema);
  }

  /**
   * Compare binary encoded data. If equal, return zero. If greater-than, return
   * 1, if less than return -1. Order is consistent with that of
   * {@link org.apache.avro.generic.GenericData#compare(Object, Object, Schema)}.
   */
  public static int compare(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2, Schema schema) {
    Decoders decoders = DECODERS.get();
    decoders.set(b1, s1, l1, b2, s2, l2);
    try {
      return compare(decoders, schema);
    } catch (IOException e) {
      throw new AvroRuntimeException(e);
    } finally {
      decoders.clear();
    }
  }

  /**
   * If equal, return the number of bytes consumed. If greater than, return GT, if
   * less than, return LT.
   */
  private static int compare(Decoders d, Schema schema) throws IOException {
    Decoder d1 = d.d1;
    Decoder d2 = d.d2;
    switch (schema.getType()) {
    case RECORD: {
      for (Field field : schema.getFields()) {
        if (field.order() == Field.Order.IGNORE) {
          GenericDatumReader.skip(field.schema(), d1);
          GenericDatumReader.skip(field.schema(), d2);
          continue;
        }
        int c = compare(d, field.schema());
        if (c != 0) {
          return (field.order() != Field.Order.DESCENDING) ? c : -c;
        }
      }
      return 0;
    }
    case ENUM:
    case INT:
      return Integer.compare(d1.readInt(), d2.readInt());
    case LONG:
      return Long.compare(d1.readLong(), d2.readLong());
    case FLOAT:
      return Float.compare(d1.readFloat(), d2.readFloat());
    case DOUBLE:
      return Double.compare(d1.readDouble(), d2.readDouble());
    case BOOLEAN:
      return Boolean.compare(d1.readBoolean(), d2.readBoolean());
    case ARRAY: {
      long i = 0; // position in array
      long r1 = 0, r2 = 0; // remaining in current block
      long l1 = 0, l2 = 0; // total array length
      while (true) {
        if (r1 == 0) { // refill blocks(s)
          r1 = d1.readLong();
          if (r1 < 0) {
            r1 = -r1;
            d1.readLong();
          }
          l1 += r1;
        }
        if (r2 == 0) {
          r2 = d2.readLong();
          if (r2 < 0) {
            r2 = -r2;
            d2.readLong();
          }
          l2 += r2;
        }
        if (r1 == 0 || r2 == 0) // empty block: done
          return Long.compare(l1, l2);
        long l = Math.min(l1, l2);
        while (i < l) { // compare to end of block
          int c = compare(d, schema.getElementType());
          if (c != 0)
            return c;
          i++;
          r1--;
          r2--;
        }
      }
    }
    case MAP:
      throw new AvroRuntimeException("Can't compare maps!");
    case UNION: {
      int i1 = d1.readInt();
      int i2 = d2.readInt();
      int c = Integer.compare(i1, i2);
      return c == 0 ? compare(d, schema.getTypes().get(i1)) : c;
    }
    case FIXED: {
      int size = schema.getFixedSize();
      int c = compareBytes(d.d1.getBuf(), d.d1.getPos(), size, d.d2.getBuf(), d.d2.getPos(), size);
      d.d1.skipFixed(size);
      d.d2.skipFixed(size);
      return c;
    }
    case STRING:
    case BYTES: {
      int l1 = d1.readInt();
      int l2 = d2.readInt();
      int c = compareBytes(d.d1.getBuf(), d.d1.getPos(), l1, d.d2.getBuf(), d.d2.getPos(), l2);
      d.d1.skipFixed(l1);
      d.d2.skipFixed(l2);
      return c;
    }
    case NULL:
      return 0;
    default:
      throw new AvroRuntimeException("Unexpected schema to compare!");
    }
  }

  /**
   * Lexicographically compare bytes. If equal, return zero. If greater-than,
   * return a positive value, if less than return a negative value.
   */
  public static int compareBytes(byte[] b1, int s1, int l1, byte[] b2, int s2, int l2) {
    int end1 = s1 + l1;
    int end2 = s2 + l2;
    for (int i = s1, j = s2; i < end1 && j < end2; i++, j++) {
      int a = (b1[i] & 0xff);
      int b = (b2[j] & 0xff);
      if (a != b) {
        return a - b;
      }
    }
    return l1 - l2;
  }

  private static class HashData {
    private final BinaryDecoder decoder;

    public HashData() {
      this.decoder = new BinaryDecoder(new byte[0], 0, 0);
    }

    public void set(byte[] bytes, int start, int len) {
      this.decoder.setBuf(bytes, start, len);
    }
  }

  private static final ThreadLocal HASH_DATA = ThreadLocal.withInitial(HashData::new);

  /**
   * Hash binary encoded data. Consistent with
   * {@link org.apache.avro.generic.GenericData#hashCode(Object, Schema)}.
   */
  public static int hashCode(byte[] bytes, int start, int length, Schema schema) {
    HashData data = HASH_DATA.get();
    data.set(bytes, start, length);
    try {
      return hashCode(data, schema);
    } catch (IOException e) {
      throw new AvroRuntimeException(e);
    }
  }

  private static int hashCode(HashData data, Schema schema) throws IOException {
    Decoder decoder = data.decoder;
    switch (schema.getType()) {
    case RECORD: {
      int hashCode = 1;
      for (Field field : schema.getFields()) {
        if (field.order() == Field.Order.IGNORE) {
          GenericDatumReader.skip(field.schema(), decoder);
          continue;
        }
        hashCode = hashCode * 31 + hashCode(data, field.schema());
      }
      return hashCode;
    }
    case ENUM:
    case INT:
      return decoder.readInt();
    case BOOLEAN:
      return Boolean.hashCode(decoder.readBoolean());
    case FLOAT:
      return Float.hashCode(decoder.readFloat());
    case LONG:
      return Long.hashCode(decoder.readLong());
    case DOUBLE:
      return Double.hashCode(decoder.readDouble());
    case ARRAY: {
      Schema elementType = schema.getElementType();
      int hashCode = 1;
      for (long l = decoder.readArrayStart(); l != 0; l = decoder.arrayNext()) {
        for (long i = 0; i < l; i++) {
          hashCode = hashCode * 31 + hashCode(data, elementType);
        }
      }
      return hashCode;
    }
    case MAP:
      throw new AvroRuntimeException("Can't hashCode maps!");
    case UNION:
      return hashCode(data, schema.getTypes().get(decoder.readInt()));
    case FIXED:
      return hashBytes(1, data, schema.getFixedSize(), false);
    case STRING:
      return hashBytes(0, data, decoder.readInt(), false);
    case BYTES:
      return hashBytes(1, data, decoder.readInt(), true);
    case NULL:
      return 0;
    default:
      throw new AvroRuntimeException("Unexpected schema to hashCode!");
    }
  }

  private static int hashBytes(int init, HashData data, int len, boolean rev) throws IOException {
    int hashCode = init;
    byte[] bytes = data.decoder.getBuf();
    int start = data.decoder.getPos();
    int end = start + len;
    if (rev)
      for (int i = end - 1; i >= start; i--)
        hashCode = hashCode * 31 + bytes[i];
    else
      for (int i = start; i < end; i++)
        hashCode = hashCode * 31 + bytes[i];
    data.decoder.skipFixed(len);
    return hashCode;
  }

  /** Skip a binary-encoded long, returning the position after it. */
  public static int skipLong(byte[] bytes, int start) {
    int i = start;
    for (int b = bytes[i++]; ((b & 0x80) != 0); b = bytes[i++]) {
    }
    return i;
  }

  /**
   * Encode a boolean to the byte array at the given position. Will throw
   * IndexOutOfBounds if the position is not valid.
   * 
   * @return The number of bytes written to the buffer, 1.
   */
  public static int encodeBoolean(boolean b, byte[] buf, int pos) {
    buf[pos] = b ? (byte) 1 : (byte) 0;
    return 1;
  }

  /**
   * Encode an integer to the byte array at the given position. Will throw
   * IndexOutOfBounds if it overflows. Users should ensure that there are at least
   * 5 bytes left in the buffer before calling this method.
   * 
   * @return The number of bytes written to the buffer, between 1 and 5.
   */
  public static int encodeInt(int n, byte[] buf, int pos) {
    // move sign to low-order bit, and flip others if negative
    n = (n << 1) ^ (n >> 31);
    int start = pos;
    if ((n & ~0x7F) != 0) {
      buf[pos++] = (byte) ((n | 0x80) & 0xFF);
      n >>>= 7;
      if (n > 0x7F) {
        buf[pos++] = (byte) ((n | 0x80) & 0xFF);
        n >>>= 7;
        if (n > 0x7F) {
          buf[pos++] = (byte) ((n | 0x80) & 0xFF);
          n >>>= 7;
          if (n > 0x7F) {
            buf[pos++] = (byte) ((n | 0x80) & 0xFF);
            n >>>= 7;
          }
        }
      }
    }
    buf[pos++] = (byte) n;
    return pos - start;
  }

  /**
   * Encode a long to the byte array at the given position. Will throw
   * IndexOutOfBounds if it overflows. Users should ensure that there are at least
   * 10 bytes left in the buffer before calling this method.
   * 
   * @return The number of bytes written to the buffer, between 1 and 10.
   */
  public static int encodeLong(long n, byte[] buf, int pos) {
    // move sign to low-order bit, and flip others if negative
    n = (n << 1) ^ (n >> 63);
    int start = pos;
    if ((n & ~0x7FL) != 0) {
      buf[pos++] = (byte) ((n | 0x80) & 0xFF);
      n >>>= 7;
      if (n > 0x7F) {
        buf[pos++] = (byte) ((n | 0x80) & 0xFF);
        n >>>= 7;
        if (n > 0x7F) {
          buf[pos++] = (byte) ((n | 0x80) & 0xFF);
          n >>>= 7;
          if (n > 0x7F) {
            buf[pos++] = (byte) ((n | 0x80) & 0xFF);
            n >>>= 7;
            if (n > 0x7F) {
              buf[pos++] = (byte) ((n | 0x80) & 0xFF);
              n >>>= 7;
              if (n > 0x7F) {
                buf[pos++] = (byte) ((n | 0x80) & 0xFF);
                n >>>= 7;
                if (n > 0x7F) {
                  buf[pos++] = (byte) ((n | 0x80) & 0xFF);
                  n >>>= 7;
                  if (n > 0x7F) {
                    buf[pos++] = (byte) ((n | 0x80) & 0xFF);
                    n >>>= 7;
                    if (n > 0x7F) {
                      buf[pos++] = (byte) ((n | 0x80) & 0xFF);
                      n >>>= 7;
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
    buf[pos++] = (byte) n;
    return pos - start;
  }

  /**
   * Encode a float to the byte array at the given position. Will throw
   * IndexOutOfBounds if it overflows. Users should ensure that there are at least
   * 4 bytes left in the buffer before calling this method.
   * 
   * @return Returns the number of bytes written to the buffer, 4.
   */
  public static int encodeFloat(float f, byte[] buf, int pos) {
    int len = 1;
    int bits = Float.floatToRawIntBits(f);
    // hotspot compiler works well with this variant
    buf[pos] = (byte) ((bits) & 0xFF);
    buf[pos + len++] = (byte) ((bits >>> 8) & 0xFF);
    buf[pos + len++] = (byte) ((bits >>> 16) & 0xFF);
    buf[pos + len++] = (byte) ((bits >>> 24) & 0xFF);
    return 4;
  }

  /**
   * Encode a double to the byte array at the given position. Will throw
   * IndexOutOfBounds if it overflows. Users should ensure that there are at least
   * 8 bytes left in the buffer before calling this method.
   * 
   * @return Returns the number of bytes written to the buffer, 8.
   */
  public static int encodeDouble(double d, byte[] buf, int pos) {
    long bits = Double.doubleToRawLongBits(d);
    int first = (int) (bits & 0xFFFFFFFF);
    int second = (int) ((bits >>> 32) & 0xFFFFFFFF);
    // the compiler seems to execute this order the best, likely due to
    // register allocation -- the lifetime of constants is minimized.
    buf[pos] = (byte) ((first) & 0xFF);
    buf[pos + 4] = (byte) ((second) & 0xFF);
    buf[pos + 5] = (byte) ((second >>> 8) & 0xFF);
    buf[pos + 1] = (byte) ((first >>> 8) & 0xFF);
    buf[pos + 2] = (byte) ((first >>> 16) & 0xFF);
    buf[pos + 6] = (byte) ((second >>> 16) & 0xFF);
    buf[pos + 7] = (byte) ((second >>> 24) & 0xFF);
    buf[pos + 3] = (byte) ((first >>> 24) & 0xFF);
    return 8;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy