org.apache.thrift.protocol.TCompactProtocol Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of thrift Show documentation
Thrift Artifacts republished here as maven
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */


package org.apache.thrift.protocol;

import java.util.Stack;
import java.io.UnsupportedEncodingException;

import org.apache.thrift.transport.TTransport;
import org.apache.thrift.TException;

/**
 * TCompactProtocol2 is the Java implementation of the compact protocol specified
 * in THRIFT-110. The fundamental approach to reducing the overhead of 
 * structures is a) use variable-length integers all over the place and b) make
 * use of unused bits wherever possible. Your savings will obviously vary 
 * based on the specific makeup of your structs, but in general, the more 
 * fields, nested structures, short strings and collections, and low-value i32
 * and i64 fields you have, the more benefit you'll see.
 */
public final class TCompactProtocol extends TProtocol {

  private final static TStruct ANONYMOUS_STRUCT = new TStruct("");
  private final static TField TSTOP = new TField("", TType.STOP, (short)0);

  private final static byte[] ttypeToCompactType = new byte[16];
  
  static {
    ttypeToCompactType[TType.STOP] = TType.STOP;
    ttypeToCompactType[TType.BOOL] = Types.BOOLEAN_TRUE;
    ttypeToCompactType[TType.BYTE] = Types.BYTE;
    ttypeToCompactType[TType.I16] = Types.I16;
    ttypeToCompactType[TType.I32] = Types.I32;
    ttypeToCompactType[TType.I64] = Types.I64;
    ttypeToCompactType[TType.DOUBLE] = Types.DOUBLE;
    ttypeToCompactType[TType.STRING] = Types.BINARY;
    ttypeToCompactType[TType.LIST] = Types.LIST;
    ttypeToCompactType[TType.SET] = Types.SET;
    ttypeToCompactType[TType.MAP] = Types.MAP;
    ttypeToCompactType[TType.STRUCT] = Types.STRUCT;
  }
  
  /**
   * TProtocolFactory that produces TCompactProtocols.
   */
  public static class Factory implements TProtocolFactory {
    public Factory() {}
    
    public TProtocol getProtocol(TTransport trans) {
      return new TCompactProtocol(trans);
    }
  }
  
  private static final byte PROTOCOL_ID = (byte)0x82;
  private static final byte VERSION = 1;
  private static final byte VERSION_MASK = 0x1f; // 0001 1111
  private static final byte TYPE_MASK = (byte)0xE0; // 1110 0000
  private static final int  TYPE_SHIFT_AMOUNT = 5;
  
  /**
   * All of the on-wire type codes.
   */
  private static class Types {
    public static final byte BOOLEAN_TRUE   = 0x01;
    public static final byte BOOLEAN_FALSE  = 0x02;
    public static final byte BYTE           = 0x03;
    public static final byte I16            = 0x04;
    public static final byte I32            = 0x05;
    public static final byte I64            = 0x06;
    public static final byte DOUBLE         = 0x07;
    public static final byte BINARY         = 0x08;
    public static final byte LIST           = 0x09;
    public static final byte SET            = 0x0A;
    public static final byte MAP            = 0x0B;
    public static final byte STRUCT         = 0x0C;
  }
  
  /** 
   * Used to keep track of the last field for the current and previous structs,
   * so we can do the delta stuff.
   */
  private Stack lastField_ = new Stack();
  
  private short lastFieldId_ = 0;
  
  /** 
   * If we encounter a boolean field begin, save the TField here so it can 
   * have the value incorporated.
   */
  private TField booleanField_ = null;
  
  /**
   * If we read a field header, and it's a boolean field, save the boolean 
   * value here so that readBool can use it.
   */
  private Boolean boolValue_ = null;
  
  /**
   * Create a TCompactProtocol.
   *
   * @param transport the TTransport object to read from or write to.
   */
  public TCompactProtocol(TTransport transport) {
    super(transport);
  }
  
  
  //
  // Public Writing methods.
  //

  /**
   * Write a message header to the wire. Compact Protocol messages contain the
   * protocol version so we can migrate forwards in the future if need be.
   */
  public void writeMessageBegin(TMessage message) throws TException {
    writeByteDirect(PROTOCOL_ID);
    writeByteDirect((VERSION & VERSION_MASK) | ((message.type << TYPE_SHIFT_AMOUNT) & TYPE_MASK));
    writeVarint32(message.seqid);
    writeString(message.name);
  }

  /**
   * Write a struct begin. This doesn't actually put anything on the wire. We 
   * use it as an opportunity to put special placeholder markers on the field
   * stack so we can get the field id deltas correct.
   */
  public void writeStructBegin(TStruct struct) throws TException {
    lastField_.push(lastFieldId_);
    lastFieldId_ = 0;
  }

  /**
   * Write a struct end. This doesn't actually put anything on the wire. We use
   * this as an opportunity to pop the last field from the current struct off
   * of the field stack.
   */
  public void writeStructEnd() throws TException {
    lastFieldId_ = lastField_.pop();
  }
  
  /**
   * Write a field header containing the field id and field type. If the
   * difference between the current field id and the last one is small (< 15),
   * then the field id will be encoded in the 4 MSB as a delta. Otherwise, the
   * field id will follow the type header as a zigzag varint.
   */ 
  public void writeFieldBegin(TField field) throws TException {
    if (field.type == TType.BOOL) {
      // we want to possibly include the value, so we'll wait.
      booleanField_ = field;
    } else {
      writeFieldBeginInternal(field, (byte)-1);
    }
  }

  /**
   * The workhorse of writeFieldBegin. It has the option of doing a 
   * 'type override' of the type header. This is used specifically in the 
   * boolean field case.
   */
  private void writeFieldBeginInternal(TField field, byte typeOverride) throws TException {
    // short lastField = lastField_.pop();

    // if there's a type override, use that.
    byte typeToWrite = typeOverride == -1 ? getCompactType(field.type) : typeOverride;

    // check if we can use delta encoding for the field id
    if (field.id > lastFieldId_ && field.id - lastFieldId_ <= 15) {
      // write them together
      writeByteDirect((field.id - lastFieldId_) << 4 | typeToWrite);
    } else {
      // write them separate
      writeByteDirect(typeToWrite);
      writeI16(field.id);
    }

    lastFieldId_ = field.id;
    // lastField_.push(field.id);
  }

  /**
   * Write the STOP symbol so we know there are no more fields in this struct.
   */
  public void writeFieldStop() throws TException {
    writeByteDirect(TType.STOP);
  }

  /**
   * Write a map header. If the map is empty, omit the key and value type 
   * headers, as we don't need any additional information to skip it.
   */
  public void writeMapBegin(TMap map) throws TException {
    if (map.size == 0) {
      writeByteDirect(0);
    } else {
      writeVarint32(map.size);
      writeByteDirect(getCompactType(map.keyType) << 4 | getCompactType(map.valueType));
    }
  }
  
  /** 
   * Write a list header.
   */
  public void writeListBegin(TList list) throws TException {
    writeCollectionBegin(list.elemType, list.size);
  }

  /**
   * Write a set header.
   */
  public void writeSetBegin(TSet set) throws TException {
    writeCollectionBegin(set.elemType, set.size);
  }

  /**
   * Write a boolean value. Potentially, this could be a boolean field, in 
   * which case the field header info isn't written yet. If so, decide what the
   * right type header is for the value and then write the field header. 
   * Otherwise, write a single byte.
   */
  public void writeBool(boolean b) throws TException {
    if (booleanField_ != null) {
      // we haven't written the field header yet
      writeFieldBeginInternal(booleanField_, b ? Types.BOOLEAN_TRUE : Types.BOOLEAN_FALSE);
      booleanField_ = null;
    } else {
      // we're not part of a field, so just write the value.
      writeByteDirect(b ? Types.BOOLEAN_TRUE : Types.BOOLEAN_FALSE);
    }
  }

  /** 
   * Write a byte. Nothing to see here!
   */
  public void writeByte(byte b) throws TException {
    writeByteDirect(b);
  }

  /**
   * Write an I16 as a zigzag varint.
   */
  public void writeI16(short i16) throws TException {
    writeVarint32(intToZigZag(i16));
  }
  
  /**
   * Write an i32 as a zigzag varint.
   */
  public void writeI32(int i32) throws TException {
    writeVarint32(intToZigZag(i32));
  }

  /**
   * Write an i64 as a zigzag varint.
   */
  public void writeI64(long i64) throws TException {
    writeVarint64(longToZigzag(i64));
  }

  /**
   * Write a double to the wire as 8 bytes.
   */ 
  public void writeDouble(double dub) throws TException {
    byte[] data = new byte[]{0, 0, 0, 0, 0, 0, 0, 0};
    fixedLongToBytes(Double.doubleToLongBits(dub), data, 0);
    trans_.write(data);
  }

  /**
   * Write a string to the wire with a varint size preceeding.
   */
  public void writeString(String str) throws TException {
    try {
      writeBinary(str.getBytes("UTF-8"));
    } catch (UnsupportedEncodingException e) {
      throw new TException("UTF-8 not supported!");
    }
  }

  /**
   * Write a byte array, using a varint for the size. 
   */
  public void writeBinary(byte[] bin) throws TException {
    writeVarint32(bin.length);
    trans_.write(bin);
  }

  //
  // These methods are called by structs, but don't actually have any wire 
  // output or purpose.
  // 
  
  public void writeMessageEnd() throws TException {}
  public void writeMapEnd() throws TException {}
  public void writeListEnd() throws TException {}
  public void writeSetEnd() throws TException {}
  public void writeFieldEnd() throws TException {}

  //
  // Internal writing methods
  //

  /**
   * Abstract method for writing the start of lists and sets. List and sets on 
   * the wire differ only by the type indicator.
   */
  protected void writeCollectionBegin(byte elemType, int size) throws TException {
    if (size <= 14) {
      writeByteDirect(size << 4 | getCompactType(elemType));
    } else {
      writeByteDirect(0xf0 | getCompactType(elemType));
      writeVarint32(size);
    }
  }

  /**
   * Write an i32 as a varint. Results in 1-5 bytes on the wire.
   * TODO: make a permanent buffer like writeVarint64?
   */
  byte[] i32buf = new byte[5];
  private void writeVarint32(int n) throws TException {
    int idx = 0;
    while (true) {
      if ((n & ~0x7F) == 0) {
        i32buf[idx++] = (byte)n;
        // writeByteDirect((byte)n);
        break;
        // return;
      } else {
        i32buf[idx++] = (byte)((n & 0x7F) | 0x80);
        // writeByteDirect((byte)((n & 0x7F) | 0x80));
        n >>>= 7;
      }
    }
    trans_.write(i32buf, 0, idx);
  }

  /**
   * Write an i64 as a varint. Results in 1-10 bytes on the wire.
   */
  byte[] varint64out = new byte[10];
  private void writeVarint64(long n) throws TException {
    int idx = 0;
    while (true) {
      if ((n & ~0x7FL) == 0) {
        varint64out[idx++] = (byte)n;
        break;
      } else {
        varint64out[idx++] = ((byte)((n & 0x7F) | 0x80));
        n >>>= 7;
      }
    }
    trans_.write(varint64out, 0, idx);
  }
  
  /**
   * Convert l into a zigzag long. This allows negative numbers to be 
   * represented compactly as a varint.
   */
  private long longToZigzag(long l) {
    return (l << 1) ^ (l >> 63);
  }
  
  /**
   * Convert n into a zigzag int. This allows negative numbers to be 
   * represented compactly as a varint.
   */
  private int intToZigZag(int n) {
    return (n << 1) ^ (n >> 31);
  }
  
  /**
   * Convert a long into little-endian bytes in buf starting at off and going 
   * until off+7.
   */
  private void fixedLongToBytes(long n, byte[] buf, int off) {
    buf[off+0] = (byte)( n        & 0xff);
    buf[off+1] = (byte)((n >> 8 ) & 0xff);
    buf[off+2] = (byte)((n >> 16) & 0xff);
    buf[off+3] = (byte)((n >> 24) & 0xff);
    buf[off+4] = (byte)((n >> 32) & 0xff);
    buf[off+5] = (byte)((n >> 40) & 0xff);
    buf[off+6] = (byte)((n >> 48) & 0xff);
    buf[off+7] = (byte)((n >> 56) & 0xff);
  }

  /** 
   * Writes a byte without any possiblity of all that field header nonsense. 
   * Used internally by other writing methods that know they need to write a byte.
   */
  private byte[] byteDirectBuffer = new byte[1];
  private void writeByteDirect(byte b) throws TException {
    byteDirectBuffer[0] = b;
    trans_.write(byteDirectBuffer);
  }

  /** 
   * Writes a byte without any possiblity of all that field header nonsense.
   */
  private void writeByteDirect(int n) throws TException {
    writeByteDirect((byte)n);
  }


  // 
  // Reading methods.
  // 

  /**
   * Read a message header. 
   */
  public TMessage readMessageBegin() throws TException {
    byte protocolId = readByte();
    if (protocolId != PROTOCOL_ID) {
      throw new TProtocolException("Expected protocol id " + Integer.toHexString(PROTOCOL_ID) + " but got " + Integer.toHexString(protocolId));
    }
    byte versionAndType = readByte();
    byte version = (byte)(versionAndType & VERSION_MASK);
    if (version != VERSION) {
      throw new TProtocolException("Expected version " + VERSION + " but got " + version);
    }
    byte type = (byte)((versionAndType >> TYPE_SHIFT_AMOUNT) & 0x03);
    int seqid = readVarint32();
    String messageName = readString();
    return new TMessage(messageName, type, seqid);
  }

  /**
   * Read a struct begin. There's nothing on the wire for this, but it is our
   * opportunity to push a new struct begin marker onto the field stack.
   */
  public TStruct readStructBegin() throws TException {
    lastField_.push(lastFieldId_);
    lastFieldId_ = 0;
    return ANONYMOUS_STRUCT;
  }

  /**
   * Doesn't actually consume any wire data, just removes the last field for 
   * this struct from the field stack.
   */
  public void readStructEnd() throws TException {
    // consume the last field we read off the wire.
    lastFieldId_ = lastField_.pop();
  }
  
  /**
   * Read a field header off the wire. 
   */
  public TField readFieldBegin() throws TException {
    byte type = readByte();
    
    // if it's a stop, then we can return immediately, as the struct is over.
    if ((type & 0x0f) == TType.STOP) {
      return TSTOP;
    }
    
    short fieldId;

    // mask off the 4 MSB of the type header. it could contain a field id delta.
    short modifier = (short)((type & 0xf0) >> 4);
    if (modifier == 0) {
      // not a delta. look ahead for the zigzag varint field id.
      fieldId = readI16();
    } else {
      // has a delta. add the delta to the last read field id.
      fieldId = (short)(lastFieldId_ + modifier);
    }
    
    TField field = new TField("", getTType((byte)(type & 0x0f)), fieldId);

    // if this happens to be a boolean field, the value is encoded in the type
    if (isBoolType(type)) {
      // save the boolean value in a special instance variable.
      boolValue_ = (byte)(type & 0x0f) == Types.BOOLEAN_TRUE ? Boolean.TRUE : Boolean.FALSE;
    } 

    // push the new field onto the field stack so we can keep the deltas going.
    lastFieldId_ = field.id;
    return field;
  }

  /** 
   * Read a map header off the wire. If the size is zero, skip reading the key
   * and value type. This means that 0-length maps will yield TMaps without the
   * "correct" types.
   */
  public TMap readMapBegin() throws TException {
    int size = readVarint32();
    byte keyAndValueType = size == 0 ? 0 : readByte();
    return new TMap(getTType((byte)(keyAndValueType >> 4)), getTType((byte)(keyAndValueType & 0xf)), size);
  }

  /**
   * Read a list header off the wire. If the list size is 0-14, the size will 
   * be packed into the element type header. If it's a longer list, the 4 MSB
   * of the element type header will be 0xF, and a varint will follow with the
   * true size.
   */
  public TList readListBegin() throws TException {
    byte size_and_type = readByte();
    int size = (size_and_type >> 4) & 0x0f;
    if (size == 15) {
      size = readVarint32();
    }
    byte type = getTType(size_and_type);
    return new TList(type, size);
  }

  /**
   * Read a set header off the wire. If the set size is 0-14, the size will 
   * be packed into the element type header. If it's a longer set, the 4 MSB
   * of the element type header will be 0xF, and a varint will follow with the
   * true size.
   */
  public TSet readSetBegin() throws TException {
    return new TSet(readListBegin());
  }

  /**
   * Read a boolean off the wire. If this is a boolean field, the value should
   * already have been read during readFieldBegin, so we'll just consume the
   * pre-stored value. Otherwise, read a byte.
   */
  public boolean readBool() throws TException {
    if (boolValue_ != null) {
      boolean result = boolValue_.booleanValue();
      boolValue_ = null;
      return result;
    }
    return readByte() == Types.BOOLEAN_TRUE;
  }

  byte[] byteRawBuf = new byte[1];
  /**
   * Read a single byte off the wire. Nothing interesting here.
   */
  public byte readByte() throws TException {
    trans_.readAll(byteRawBuf, 0, 1);
    return byteRawBuf[0];
  }

  /**
   * Read an i16 from the wire as a zigzag varint.
   */
  public short readI16() throws TException {
    return (short)zigzagToInt(readVarint32());
  }

  /**
   * Read an i32 from the wire as a zigzag varint.
   */
  public int readI32() throws TException {
    return zigzagToInt(readVarint32());
  }

  /**
   * Read an i64 from the wire as a zigzag varint.
   */
  public long readI64() throws TException {
    return zigzagToLong(readVarint64());
  }

  /**
   * No magic here - just read a double off the wire.
   */
  public double readDouble() throws TException {
    byte[] longBits = new byte[8];
    trans_.readAll(longBits, 0, 8);
    return Double.longBitsToDouble(bytesToLong(longBits));
  }

  /**
   * Reads a byte[] (via readBinary), and then UTF-8 decodes it.
   */
  public String readString() throws TException {
    try {
      return new String(readBinary(), "UTF-8");
    } catch (UnsupportedEncodingException e) {
      throw new TException("UTF-8 not supported!");
    }
  }

  /**
   * Read a byte[] from the wire. 
   */
  public byte[] readBinary() throws TException {
    int length = readVarint32();
    if (length == 0) return new byte[0];

    byte[] buf = new byte[length];
    trans_.readAll(buf, 0, length);
    return buf;
  }


  //
  // These methods are here for the struct to call, but don't have any wire 
  // encoding.
  //
  public void readMessageEnd() throws TException {}
  public void readFieldEnd() throws TException {}
  public void readMapEnd() throws TException {}
  public void readListEnd() throws TException {}
  public void readSetEnd() throws TException {}
  
  //
  // Internal reading methods
  //
  
  /**
   * Read an i32 from the wire as a varint. The MSB of each byte is set
   * if there is another byte to follow. This can read up to 5 bytes.
   */
  private int readVarint32() throws TException {
    // if the wire contains the right stuff, this will just truncate the i64 we
    // read and get us the right sign.
    return (int)readVarint64();
  }

  /**
   * Read an i64 from the wire as a proper varint. The MSB of each byte is set 
   * if there is another byte to follow. This can read up to 10 bytes.
   */
  private long readVarint64() throws TException {
    int shift = 0;
    long result = 0;
    while (true) {
      byte b = readByte();
      result |= (long) (b & 0x7f) << shift;
      if ((b & 0x80) != 0x80) break;
      shift +=7;
    }
    return result;
  }

  //
  // encoding helpers
  //
  
  /**
   * Convert from zigzag int to int.
   */
  private int zigzagToInt(int n) {
    return (n >>> 1) ^ -(n & 1);
  }
  
  /** 
   * Convert from zigzag long to long.
   */
  private long zigzagToLong(long n) {
    return (n >>> 1) ^ -(n & 1);
  }

  /**
   * Note that it's important that the mask bytes are long literals, 
   * otherwise they'll default to ints, and when you shift an int left 56 bits,
   * you just get a messed up int.
   */
  private long bytesToLong(byte[] bytes) {
    return
      ((bytes[7] & 0xffL) << 56) |
      ((bytes[6] & 0xffL) << 48) |
      ((bytes[5] & 0xffL) << 40) |
      ((bytes[4] & 0xffL) << 32) |
      ((bytes[3] & 0xffL) << 24) |
      ((bytes[2] & 0xffL) << 16) |
      ((bytes[1] & 0xffL) <<  8) |
      ((bytes[0] & 0xffL));
  }

  //
  // type testing and converting
  //

  private boolean isBoolType(byte b) {
    return (b & 0x0f) == Types.BOOLEAN_TRUE || (b & 0x0f) == Types.BOOLEAN_FALSE;
  }

  /**
   * Given a TCompactProtocol.Types constant, convert it to its corresponding 
   * TType value.
   */
  private byte getTType(byte type) {
    switch ((byte)(type & 0x0f)) {
      case TType.STOP:
        return TType.STOP;
      case Types.BOOLEAN_FALSE:
      case Types.BOOLEAN_TRUE:
        return TType.BOOL;
      case Types.BYTE:
        return TType.BYTE;
      case Types.I16:
        return TType.I16;
      case Types.I32:
        return TType.I32;
      case Types.I64:
        return TType.I64;
      case Types.DOUBLE:
        return TType.DOUBLE;
      case Types.BINARY:
        return TType.STRING;
      case Types.LIST:
        return TType.LIST;
      case Types.SET:
        return TType.SET;
      case Types.MAP:
        return TType.MAP;
      case Types.STRUCT:
        return TType.STRUCT;
      default:
        throw new RuntimeException("don't know what type: " + (byte)(type & 0x0f));
    }
  }

  /**
   * Given a TType value, find the appropriate TCompactProtocol.Types constant.
   */
  private byte getCompactType(byte ttype) {
    return ttypeToCompactType[ttype];
  }
  
}