All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.kitesdk.data.hbase.avro.io.MemcmpEncoder Maven / Gradle / Ivy

There is a newer version: 1.1.0
Show newest version
/**
 * Copyright 2013 Cloudera Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.kitesdk.data.hbase.avro.io;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import org.apache.avro.util.Utf8;
import org.apache.avro.io.Encoder;

/**
 * A class that will encode Avro types, whose sort order can be determined by a
 * memcmp.
 */
public class MemcmpEncoder extends Encoder {
  private OutputStream out;

  public MemcmpEncoder(OutputStream out) {
    this.out = out;
  }

  @Override
  public void flush() throws IOException {
    if (out != null) {
      out.flush();
    }
  }

  @Override
  public void writeNull() throws IOException {
  }

  /**
   * A boolean is encoded as a byte, with 1 being true, and 0 being false.
   * 
   * @param b
   *          The boolean to encode.
   */
  @Override
  public void writeBoolean(boolean b) throws IOException {
    out.write(b ? 1 : 0);
  }

  /**
   * An int is written by flipping the sign bit, and writing it as a big endian
   * int.
   * 
   * @param n
   *          The int to encode.
   */
  @Override
  public void writeInt(int n) throws IOException {
    byte[] intBytes = new byte[] { (byte) ((n >>> 24) ^ 0x80),
        (byte) (n >>> 16), (byte) (n >>> 8), (byte) n };
    out.write(intBytes);
  }

  /**
   * A long is written by flipping the sign bit, and writing it as a big endian
   * long.
   * 
   * @param n
   *          The long to encode.
   */
  @Override
  public void writeLong(long n) throws IOException {
    byte[] intBytes = new byte[] { (byte) ((n >>> 56) ^ 0x80),
        (byte) (n >>> 48), (byte) (n >>> 40), (byte) (n >>> 32),
        (byte) (n >>> 24), (byte) (n >>> 16), (byte) (n >>> 8), (byte) n };
    out.write(intBytes);
  }

  /**
   * A float is written as 4 bytes. It is encoded as a 32 bit integer using the
   * int encoding defined in this class. If it is a negative number, the
   * complement of the 31 bits (every bit except the sign bit) is taken. This is
   * to ensure proper ordering of negative numbers.
   * 
   * @param f
   *          The float to encode.
   */
  @Override
  public void writeFloat(float f) throws IOException {
    int n = Float.floatToIntBits(f);
    if ((n >>> 31) > 0) {
      n ^= 0x7fffffff;
    }
    writeInt(n);
  }

  /**
   * A double is written as 8 bytes. It is encoded as a 64 bit long using the
   * long encoding defined in this class. If it is a negative number, the
   * complement of the 63 bits (every bit except the sign bit) is taken. This is
   * to ensure proper ordering of negative numbers.
   * 
   * @param d
   *          The double to encode.
   */
  @Override
  public void writeDouble(double d) throws IOException {
    long n = Double.doubleToLongBits(d);
    if ((n >>> 63) > 0) {
      n ^= 0x7fffffffffffffffL;
    }
    writeLong(n);
  }

  /**
   * Strings are encoded by fetching the bytes of the UTF8 encoding, and using
   * the writeBytes method of this class.
   * 
   * @param utf8
   *          The utf8 string to encode.
   */
  @Override
  public void writeString(Utf8 utf8) throws IOException {
    writeBytes(utf8.getBytes(), 0, utf8.getByteLength());
  }

  /**
   * Fixed values are encoded as the raw bytes.
   * 
   * @param bytes
   *          The bytes to encode.
   * @param start
   *          The start of the byte array to encode.
   * @param len
   *          The length of the byte array to encode.
   */
  @Override
  public void writeFixed(byte[] bytes, int start, int len) throws IOException {
    out.write(bytes, start, len);
  }

  /**
   * Bytes are encoded by writing all bytes out as themselves, except for bytes
   * of value 0x00. Bytes of value 0x00 are encoded as two bytes, 0x00 0x01. The
   * end marker is signified by two 0x00 bytes. This guarantees that the end
   * marker is the least possible value.
   * 
   * @param bytes
   *          The bytes to encode.
   */
  @Override
  public void writeBytes(ByteBuffer bytes) throws IOException {
    writeBytes(bytes.array(), bytes.position(), bytes.remaining());
  }

  /**
   * Bytes are encoded by writing all bytes out as themselves, except for bytes
   * of value 0x00. Bytes of value 0x00 are encoded as two bytes, 0x00 0x01. The
   * end marker is signified by two 0x00 bytes. This guarantees that the end
   * marker is the least possible value.
   * 
   * @param bytes
   *          The bytes to encode.
   * @param start
   *          The start of the byte array to encode.
   * @param len
   *          The length of the byte array to encode.
   */
  @Override
  public void writeBytes(byte[] bytes, int start, int len) throws IOException {
    for (int i = start; i < start + len; ++i) {
      if (bytes[i] == 0x00) {
        out.write(0);
        out.write(1);
      } else {
        out.write((int) bytes[i]);
      }
    }
    out.write(0);
    out.write(0);
  }

  /**
   * Enums are encoded using the Integer encoding of this class.
   */
  @Override
  public void writeEnum(int e) throws IOException {
    writeInt(e);
  }

  @Override
  public void writeArrayStart() throws IOException {
  }

  @Override
  public void setItemCount(long itemCount) throws IOException {
  }

  /**
   * Each item in an array is prepended by a 1 byte. This is to ensure that one
   * array longer than another with equal elements up to that point will always
   * be greater.
   */
  @Override
  public void startItem() throws IOException {
    out.write(1);
  }

  /**
   * Arrays are appended with a 0 byte.
   */
  @Override
  public void writeArrayEnd() throws IOException {
    out.write(0);
  }

  /**
   * Memcmp encoding for maps not supported, since ordering of Map in Avro is
   * undefined.
   */
  @Override
  public void writeMapStart() throws IOException {
    throw new IOException("MemcmpEncoder does not support writing Map types.");
  }

  /**
   * Memcmp encoding for maps not supported, since ordering of Map in Avro is
   * undefined.
   */
  @Override
  public void writeMapEnd() throws IOException {
    throw new IOException("MemcmpEncoder does not support writing Map types.");
  }

  /**
   * Union indexes are written using the int encoding of this class.
   * 
   * @param unionIndex
   *          The union index to encode.
   */
  @Override
  public void writeIndex(int unionIndex) throws IOException {
    writeInt(unionIndex);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy