All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.thrift.protocol.TCompactProtocol Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements. See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership. The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License. You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */


package org.apache.thrift.protocol;

import java.util.Stack;
import java.io.UnsupportedEncodingException;

import org.apache.thrift.transport.TTransport;
import org.apache.thrift.TException;

/**
 * TCompactProtocol2 is the Java implementation of the compact protocol specified
 * in THRIFT-110. The fundamental approach to reducing the overhead of 
 * structures is a) use variable-length integers all over the place and b) make
 * use of unused bits wherever possible. Your savings will obviously vary 
 * based on the specific makeup of your structs, but in general, the more 
 * fields, nested structures, short strings and collections, and low-value i32
 * and i64 fields you have, the more benefit you'll see.
 */
public final class TCompactProtocol extends TProtocol {

  private final static TStruct ANONYMOUS_STRUCT = new TStruct("");
  private final static TField TSTOP = new TField("", TType.STOP, (short)0);

  private final static byte[] ttypeToCompactType = new byte[16];
  
  static {
    ttypeToCompactType[TType.STOP] = TType.STOP;
    ttypeToCompactType[TType.BOOL] = Types.BOOLEAN_TRUE;
    ttypeToCompactType[TType.BYTE] = Types.BYTE;
    ttypeToCompactType[TType.I16] = Types.I16;
    ttypeToCompactType[TType.I32] = Types.I32;
    ttypeToCompactType[TType.I64] = Types.I64;
    ttypeToCompactType[TType.DOUBLE] = Types.DOUBLE;
    ttypeToCompactType[TType.STRING] = Types.BINARY;
    ttypeToCompactType[TType.LIST] = Types.LIST;
    ttypeToCompactType[TType.SET] = Types.SET;
    ttypeToCompactType[TType.MAP] = Types.MAP;
    ttypeToCompactType[TType.STRUCT] = Types.STRUCT;
  }
  
  /**
   * TProtocolFactory that produces TCompactProtocols.
   */
  public static class Factory implements TProtocolFactory {
    public Factory() {}
    
    public TProtocol getProtocol(TTransport trans) {
      return new TCompactProtocol(trans);
    }
  }
  
  private static final byte PROTOCOL_ID = (byte)0x82;
  private static final byte VERSION = 1;
  private static final byte VERSION_MASK = 0x1f; // 0001 1111
  private static final byte TYPE_MASK = (byte)0xE0; // 1110 0000
  private static final int  TYPE_SHIFT_AMOUNT = 5;
  
  /**
   * All of the on-wire type codes.
   */
  private static class Types {
    public static final byte BOOLEAN_TRUE   = 0x01;
    public static final byte BOOLEAN_FALSE  = 0x02;
    public static final byte BYTE           = 0x03;
    public static final byte I16            = 0x04;
    public static final byte I32            = 0x05;
    public static final byte I64            = 0x06;
    public static final byte DOUBLE         = 0x07;
    public static final byte BINARY         = 0x08;
    public static final byte LIST           = 0x09;
    public static final byte SET            = 0x0A;
    public static final byte MAP            = 0x0B;
    public static final byte STRUCT         = 0x0C;
  }
  
  /** 
   * Used to keep track of the last field for the current and previous structs,
   * so we can do the delta stuff.
   */
  private Stack lastField_ = new Stack();
  
  private short lastFieldId_ = 0;
  
  /** 
   * If we encounter a boolean field begin, save the TField here so it can 
   * have the value incorporated.
   */
  private TField booleanField_ = null;
  
  /**
   * If we read a field header, and it's a boolean field, save the boolean 
   * value here so that readBool can use it.
   */
  private Boolean boolValue_ = null;
  
  /**
   * Create a TCompactProtocol.
   *
   * @param transport the TTransport object to read from or write to.
   */
  public TCompactProtocol(TTransport transport) {
    super(transport);
  }
  
  
  //
  // Public Writing methods.
  //

  /**
   * Write a message header to the wire. Compact Protocol messages contain the
   * protocol version so we can migrate forwards in the future if need be.
   */
  public void writeMessageBegin(TMessage message) throws TException {
    writeByteDirect(PROTOCOL_ID);
    writeByteDirect((VERSION & VERSION_MASK) | ((message.type << TYPE_SHIFT_AMOUNT) & TYPE_MASK));
    writeVarint32(message.seqid);
    writeString(message.name);
  }

  /**
   * Write a struct begin. This doesn't actually put anything on the wire. We 
   * use it as an opportunity to put special placeholder markers on the field
   * stack so we can get the field id deltas correct.
   */
  public void writeStructBegin(TStruct struct) throws TException {
    lastField_.push(lastFieldId_);
    lastFieldId_ = 0;
  }

  /**
   * Write a struct end. This doesn't actually put anything on the wire. We use
   * this as an opportunity to pop the last field from the current struct off
   * of the field stack.
   */
  public void writeStructEnd() throws TException {
    lastFieldId_ = lastField_.pop();
  }
  
  /**
   * Write a field header containing the field id and field type. If the
   * difference between the current field id and the last one is small (< 15),
   * then the field id will be encoded in the 4 MSB as a delta. Otherwise, the
   * field id will follow the type header as a zigzag varint.
   */ 
  public void writeFieldBegin(TField field) throws TException {
    if (field.type == TType.BOOL) {
      // we want to possibly include the value, so we'll wait.
      booleanField_ = field;
    } else {
      writeFieldBeginInternal(field, (byte)-1);
    }
  }

  /**
   * The workhorse of writeFieldBegin. It has the option of doing a 
   * 'type override' of the type header. This is used specifically in the 
   * boolean field case.
   */
  private void writeFieldBeginInternal(TField field, byte typeOverride) throws TException {
    // short lastField = lastField_.pop();

    // if there's a type override, use that.
    byte typeToWrite = typeOverride == -1 ? getCompactType(field.type) : typeOverride;

    // check if we can use delta encoding for the field id
    if (field.id > lastFieldId_ && field.id - lastFieldId_ <= 15) {
      // write them together
      writeByteDirect((field.id - lastFieldId_) << 4 | typeToWrite);
    } else {
      // write them separate
      writeByteDirect(typeToWrite);
      writeI16(field.id);
    }

    lastFieldId_ = field.id;
    // lastField_.push(field.id);
  }

  /**
   * Write the STOP symbol so we know there are no more fields in this struct.
   */
  public void writeFieldStop() throws TException {
    writeByteDirect(TType.STOP);
  }

  /**
   * Write a map header. If the map is empty, omit the key and value type 
   * headers, as we don't need any additional information to skip it.
   */
  public void writeMapBegin(TMap map) throws TException {
    if (map.size == 0) {
      writeByteDirect(0);
    } else {
      writeVarint32(map.size);
      writeByteDirect(getCompactType(map.keyType) << 4 | getCompactType(map.valueType));
    }
  }
  
  /** 
   * Write a list header.
   */
  public void writeListBegin(TList list) throws TException {
    writeCollectionBegin(list.elemType, list.size);
  }

  /**
   * Write a set header.
   */
  public void writeSetBegin(TSet set) throws TException {
    writeCollectionBegin(set.elemType, set.size);
  }

  /**
   * Write a boolean value. Potentially, this could be a boolean field, in 
   * which case the field header info isn't written yet. If so, decide what the
   * right type header is for the value and then write the field header. 
   * Otherwise, write a single byte.
   */
  public void writeBool(boolean b) throws TException {
    if (booleanField_ != null) {
      // we haven't written the field header yet
      writeFieldBeginInternal(booleanField_, b ? Types.BOOLEAN_TRUE : Types.BOOLEAN_FALSE);
      booleanField_ = null;
    } else {
      // we're not part of a field, so just write the value.
      writeByteDirect(b ? Types.BOOLEAN_TRUE : Types.BOOLEAN_FALSE);
    }
  }

  /** 
   * Write a byte. Nothing to see here!
   */
  public void writeByte(byte b) throws TException {
    writeByteDirect(b);
  }

  /**
   * Write an I16 as a zigzag varint.
   */
  public void writeI16(short i16) throws TException {
    writeVarint32(intToZigZag(i16));
  }
  
  /**
   * Write an i32 as a zigzag varint.
   */
  public void writeI32(int i32) throws TException {
    writeVarint32(intToZigZag(i32));
  }

  /**
   * Write an i64 as a zigzag varint.
   */
  public void writeI64(long i64) throws TException {
    writeVarint64(longToZigzag(i64));
  }

  /**
   * Write a double to the wire as 8 bytes.
   */ 
  public void writeDouble(double dub) throws TException {
    byte[] data = new byte[]{0, 0, 0, 0, 0, 0, 0, 0};
    fixedLongToBytes(Double.doubleToLongBits(dub), data, 0);
    trans_.write(data);
  }

  /**
   * Write a string to the wire with a varint size preceeding.
   */
  public void writeString(String str) throws TException {
    try {
      writeBinary(str.getBytes("UTF-8"));
    } catch (UnsupportedEncodingException e) {
      throw new TException("UTF-8 not supported!");
    }
  }

  /**
   * Write a byte array, using a varint for the size. 
   */
  public void writeBinary(byte[] bin) throws TException {
    writeVarint32(bin.length);
    trans_.write(bin);
  }

  //
  // These methods are called by structs, but don't actually have any wire 
  // output or purpose.
  // 
  
  public void writeMessageEnd() throws TException {}
  public void writeMapEnd() throws TException {}
  public void writeListEnd() throws TException {}
  public void writeSetEnd() throws TException {}
  public void writeFieldEnd() throws TException {}

  //
  // Internal writing methods
  //

  /**
   * Abstract method for writing the start of lists and sets. List and sets on 
   * the wire differ only by the type indicator.
   */
  protected void writeCollectionBegin(byte elemType, int size) throws TException {
    if (size <= 14) {
      writeByteDirect(size << 4 | getCompactType(elemType));
    } else {
      writeByteDirect(0xf0 | getCompactType(elemType));
      writeVarint32(size);
    }
  }

  /**
   * Write an i32 as a varint. Results in 1-5 bytes on the wire.
   * TODO: make a permanent buffer like writeVarint64?
   */
  byte[] i32buf = new byte[5];
  private void writeVarint32(int n) throws TException {
    int idx = 0;
    while (true) {
      if ((n & ~0x7F) == 0) {
        i32buf[idx++] = (byte)n;
        // writeByteDirect((byte)n);
        break;
        // return;
      } else {
        i32buf[idx++] = (byte)((n & 0x7F) | 0x80);
        // writeByteDirect((byte)((n & 0x7F) | 0x80));
        n >>>= 7;
      }
    }
    trans_.write(i32buf, 0, idx);
  }

  /**
   * Write an i64 as a varint. Results in 1-10 bytes on the wire.
   */
  byte[] varint64out = new byte[10];
  private void writeVarint64(long n) throws TException {
    int idx = 0;
    while (true) {
      if ((n & ~0x7FL) == 0) {
        varint64out[idx++] = (byte)n;
        break;
      } else {
        varint64out[idx++] = ((byte)((n & 0x7F) | 0x80));
        n >>>= 7;
      }
    }
    trans_.write(varint64out, 0, idx);
  }
  
  /**
   * Convert l into a zigzag long. This allows negative numbers to be 
   * represented compactly as a varint.
   */
  private long longToZigzag(long l) {
    return (l << 1) ^ (l >> 63);
  }
  
  /**
   * Convert n into a zigzag int. This allows negative numbers to be 
   * represented compactly as a varint.
   */
  private int intToZigZag(int n) {
    return (n << 1) ^ (n >> 31);
  }
  
  /**
   * Convert a long into little-endian bytes in buf starting at off and going 
   * until off+7.
   */
  private void fixedLongToBytes(long n, byte[] buf, int off) {
    buf[off+0] = (byte)( n        & 0xff);
    buf[off+1] = (byte)((n >> 8 ) & 0xff);
    buf[off+2] = (byte)((n >> 16) & 0xff);
    buf[off+3] = (byte)((n >> 24) & 0xff);
    buf[off+4] = (byte)((n >> 32) & 0xff);
    buf[off+5] = (byte)((n >> 40) & 0xff);
    buf[off+6] = (byte)((n >> 48) & 0xff);
    buf[off+7] = (byte)((n >> 56) & 0xff);
  }

  /** 
   * Writes a byte without any possiblity of all that field header nonsense. 
   * Used internally by other writing methods that know they need to write a byte.
   */
  private byte[] byteDirectBuffer = new byte[1];
  private void writeByteDirect(byte b) throws TException {
    byteDirectBuffer[0] = b;
    trans_.write(byteDirectBuffer);
  }

  /** 
   * Writes a byte without any possiblity of all that field header nonsense.
   */
  private void writeByteDirect(int n) throws TException {
    writeByteDirect((byte)n);
  }


  // 
  // Reading methods.
  // 

  /**
   * Read a message header. 
   */
  public TMessage readMessageBegin() throws TException {
    byte protocolId = readByte();
    if (protocolId != PROTOCOL_ID) {
      throw new TProtocolException("Expected protocol id " + Integer.toHexString(PROTOCOL_ID) + " but got " + Integer.toHexString(protocolId));
    }
    byte versionAndType = readByte();
    byte version = (byte)(versionAndType & VERSION_MASK);
    if (version != VERSION) {
      throw new TProtocolException("Expected version " + VERSION + " but got " + version);
    }
    byte type = (byte)((versionAndType >> TYPE_SHIFT_AMOUNT) & 0x03);
    int seqid = readVarint32();
    String messageName = readString();
    return new TMessage(messageName, type, seqid);
  }

  /**
   * Read a struct begin. There's nothing on the wire for this, but it is our
   * opportunity to push a new struct begin marker onto the field stack.
   */
  public TStruct readStructBegin() throws TException {
    lastField_.push(lastFieldId_);
    lastFieldId_ = 0;
    return ANONYMOUS_STRUCT;
  }

  /**
   * Doesn't actually consume any wire data, just removes the last field for 
   * this struct from the field stack.
   */
  public void readStructEnd() throws TException {
    // consume the last field we read off the wire.
    lastFieldId_ = lastField_.pop();
  }
  
  /**
   * Read a field header off the wire. 
   */
  public TField readFieldBegin() throws TException {
    byte type = readByte();
    
    // if it's a stop, then we can return immediately, as the struct is over.
    if ((type & 0x0f) == TType.STOP) {
      return TSTOP;
    }
    
    short fieldId;

    // mask off the 4 MSB of the type header. it could contain a field id delta.
    short modifier = (short)((type & 0xf0) >> 4);
    if (modifier == 0) {
      // not a delta. look ahead for the zigzag varint field id.
      fieldId = readI16();
    } else {
      // has a delta. add the delta to the last read field id.
      fieldId = (short)(lastFieldId_ + modifier);
    }
    
    TField field = new TField("", getTType((byte)(type & 0x0f)), fieldId);

    // if this happens to be a boolean field, the value is encoded in the type
    if (isBoolType(type)) {
      // save the boolean value in a special instance variable.
      boolValue_ = (byte)(type & 0x0f) == Types.BOOLEAN_TRUE ? Boolean.TRUE : Boolean.FALSE;
    } 

    // push the new field onto the field stack so we can keep the deltas going.
    lastFieldId_ = field.id;
    return field;
  }

  /** 
   * Read a map header off the wire. If the size is zero, skip reading the key
   * and value type. This means that 0-length maps will yield TMaps without the
   * "correct" types.
   */
  public TMap readMapBegin() throws TException {
    int size = readVarint32();
    byte keyAndValueType = size == 0 ? 0 : readByte();
    return new TMap(getTType((byte)(keyAndValueType >> 4)), getTType((byte)(keyAndValueType & 0xf)), size);
  }

  /**
   * Read a list header off the wire. If the list size is 0-14, the size will 
   * be packed into the element type header. If it's a longer list, the 4 MSB
   * of the element type header will be 0xF, and a varint will follow with the
   * true size.
   */
  public TList readListBegin() throws TException {
    byte size_and_type = readByte();
    int size = (size_and_type >> 4) & 0x0f;
    if (size == 15) {
      size = readVarint32();
    }
    byte type = getTType(size_and_type);
    return new TList(type, size);
  }

  /**
   * Read a set header off the wire. If the set size is 0-14, the size will 
   * be packed into the element type header. If it's a longer set, the 4 MSB
   * of the element type header will be 0xF, and a varint will follow with the
   * true size.
   */
  public TSet readSetBegin() throws TException {
    return new TSet(readListBegin());
  }

  /**
   * Read a boolean off the wire. If this is a boolean field, the value should
   * already have been read during readFieldBegin, so we'll just consume the
   * pre-stored value. Otherwise, read a byte.
   */
  public boolean readBool() throws TException {
    if (boolValue_ != null) {
      boolean result = boolValue_.booleanValue();
      boolValue_ = null;
      return result;
    }
    return readByte() == Types.BOOLEAN_TRUE;
  }

  byte[] byteRawBuf = new byte[1];
  /**
   * Read a single byte off the wire. Nothing interesting here.
   */
  public byte readByte() throws TException {
    trans_.readAll(byteRawBuf, 0, 1);
    return byteRawBuf[0];
  }

  /**
   * Read an i16 from the wire as a zigzag varint.
   */
  public short readI16() throws TException {
    return (short)zigzagToInt(readVarint32());
  }

  /**
   * Read an i32 from the wire as a zigzag varint.
   */
  public int readI32() throws TException {
    return zigzagToInt(readVarint32());
  }

  /**
   * Read an i64 from the wire as a zigzag varint.
   */
  public long readI64() throws TException {
    return zigzagToLong(readVarint64());
  }

  /**
   * No magic here - just read a double off the wire.
   */
  public double readDouble() throws TException {
    byte[] longBits = new byte[8];
    trans_.readAll(longBits, 0, 8);
    return Double.longBitsToDouble(bytesToLong(longBits));
  }

  /**
   * Reads a byte[] (via readBinary), and then UTF-8 decodes it.
   */
  public String readString() throws TException {
    try {
      return new String(readBinary(), "UTF-8");
    } catch (UnsupportedEncodingException e) {
      throw new TException("UTF-8 not supported!");
    }
  }

  /**
   * Read a byte[] from the wire. 
   */
  public byte[] readBinary() throws TException {
    int length = readVarint32();
    if (length == 0) return new byte[0];

    byte[] buf = new byte[length];
    trans_.readAll(buf, 0, length);
    return buf;
  }


  //
  // These methods are here for the struct to call, but don't have any wire 
  // encoding.
  //
  public void readMessageEnd() throws TException {}
  public void readFieldEnd() throws TException {}
  public void readMapEnd() throws TException {}
  public void readListEnd() throws TException {}
  public void readSetEnd() throws TException {}
  
  //
  // Internal reading methods
  //
  
  /**
   * Read an i32 from the wire as a varint. The MSB of each byte is set
   * if there is another byte to follow. This can read up to 5 bytes.
   */
  private int readVarint32() throws TException {
    // if the wire contains the right stuff, this will just truncate the i64 we
    // read and get us the right sign.
    return (int)readVarint64();
  }

  /**
   * Read an i64 from the wire as a proper varint. The MSB of each byte is set 
   * if there is another byte to follow. This can read up to 10 bytes.
   */
  private long readVarint64() throws TException {
    int shift = 0;
    long result = 0;
    while (true) {
      byte b = readByte();
      result |= (long) (b & 0x7f) << shift;
      if ((b & 0x80) != 0x80) break;
      shift +=7;
    }
    return result;
  }

  //
  // encoding helpers
  //
  
  /**
   * Convert from zigzag int to int.
   */
  private int zigzagToInt(int n) {
    return (n >>> 1) ^ -(n & 1);
  }
  
  /** 
   * Convert from zigzag long to long.
   */
  private long zigzagToLong(long n) {
    return (n >>> 1) ^ -(n & 1);
  }

  /**
   * Note that it's important that the mask bytes are long literals, 
   * otherwise they'll default to ints, and when you shift an int left 56 bits,
   * you just get a messed up int.
   */
  private long bytesToLong(byte[] bytes) {
    return
      ((bytes[7] & 0xffL) << 56) |
      ((bytes[6] & 0xffL) << 48) |
      ((bytes[5] & 0xffL) << 40) |
      ((bytes[4] & 0xffL) << 32) |
      ((bytes[3] & 0xffL) << 24) |
      ((bytes[2] & 0xffL) << 16) |
      ((bytes[1] & 0xffL) <<  8) |
      ((bytes[0] & 0xffL));
  }

  //
  // type testing and converting
  //

  private boolean isBoolType(byte b) {
    return (b & 0x0f) == Types.BOOLEAN_TRUE || (b & 0x0f) == Types.BOOLEAN_FALSE;
  }

  /**
   * Given a TCompactProtocol.Types constant, convert it to its corresponding 
   * TType value.
   */
  private byte getTType(byte type) {
    switch ((byte)(type & 0x0f)) {
      case TType.STOP:
        return TType.STOP;
      case Types.BOOLEAN_FALSE:
      case Types.BOOLEAN_TRUE:
        return TType.BOOL;
      case Types.BYTE:
        return TType.BYTE;
      case Types.I16:
        return TType.I16;
      case Types.I32:
        return TType.I32;
      case Types.I64:
        return TType.I64;
      case Types.DOUBLE:
        return TType.DOUBLE;
      case Types.BINARY:
        return TType.STRING;
      case Types.LIST:
        return TType.LIST;
      case Types.SET:
        return TType.SET;
      case Types.MAP:
        return TType.MAP;
      case Types.STRUCT:
        return TType.STRUCT;
      default:
        throw new RuntimeException("don't know what type: " + (byte)(type & 0x0f));
    }
  }

  /**
   * Given a TType value, find the appropriate TCompactProtocol.Types constant.
   */
  private byte getCompactType(byte ttype) {
    return ttypeToCompactType[ttype];
  }
  
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy