xapi.model.impl.PrimitiveSerializerDefault Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of xapi-dev Show documentation
Everything needed to run a comprehensive dev environment. Just type X_ and pick a service from autocomplete; new dev modules will be added as they are built. The only dev service not included in the uber jar is xapi-dev-maven, as it includes all runtime dependencies of maven, adding ~4 seconds to build time, and 6 megabytes to the final output jar size (without xapi-dev-maven, it's ~1MB).
The newest version!
/**
 *
 */
package xapi.model.impl;

import xapi.annotation.inject.InstanceDefault;
import xapi.model.api.PrimitiveSerializer;
import xapi.source.api.CharIterator;
import xapi.util.X_Debug;

/**
 * @author James X. Nelson ([email protected], @james)
 *
 */
@InstanceDefault(implFor=PrimitiveSerializer.class)
public class PrimitiveSerializerDefault implements PrimitiveSerializer {

  /**
   * The boundary which all negative ending numbers will be below.
   * A character below this bounadary is a termination digit that signifies the number is negative
   */
  private static final char NEGATIVE_VALUE_BOUNDARY = '=';

  /**
   * The boundary above which all continuation digits will occur.
   * And number below this value is a termination digit which signifies that the current number is complete.
   */
  private static final char END_VALUE_BOUNDARY = '^';

  /**
   * This continuation group of numbers is used to encode base 32 digits in a serialized number.
   * When serializing a number, the continuation bits are used to signify that there are still more
   * digits to serialize.  These are the top 33 printable ascii digits.
   * 
   * Note that there are 33 digits in this section and 32 in all others in order for us to handle
   * Integer.MIN_VALUE and Long.MIN_VALUE, both of which would normally overflow a positive value.
   * 

   * The information of whether a number is negative or positive is encoded in the final digit
   * (the first and only non-continuation digit), so in order to handle the fact that
   * Math.abs(MIN_VALUE) == Math.abs(MAX_VALUE) + 1, we allow the first continuation digit to
   * reach a value of 32 instead of 31, like all other base 32 digits.  Once we encounter the
   * final digit, we will negate the accumulated and current values to avoid negative integer overflows.
   * 

   * Also note that digits in this section are ordered according to their likely frequency in
   * English language text; this is to help improve GZipping of response bodies, as we are
   * far more likely to encounter the numbers 1 or 0 than 30 or 31.  The order chosen was
   * based upon http://en.wikipedia.org/wiki/Letter_frequency and other Google searchs for
   * frequency of punctuation occurrence.
   */
  private static final char[] CONTINUATION_NUM_SECTION = new char[] {
    'e', 't', 'a', 'o', 'i', 'n', 's', 'h', 'r', 'd',
    'l', 'c', 'u', 'm', 'w', 'f', 'g', 'y', 'p', 'b',
    'v', 'k', 'j', 'x', 'q', 'z', '_', '{', '}', '|',
    '~', '^', '`',
  };

  /**
   * These numbers are used to denote the end of an encoded positive number.
   * See {@link #CONTINUATION_NUM_SECTION} for a more detailed breakdown of our
   * integer serialization policy.
   * 

   * Note that the values in this section are all strictly less than those in the
   * {@link #CONTINUATION_NUM_SECTION} and less than those of {@link #NEGATIVE_NUM_ENDING},
   * however, they are sorted by probabilistic frequency in English language text,
   * to aid in the optimization of the GZip protocol.
   */
  private static final char[] POSITIVE_NUM_ENDING = new char[] {
    'E', 'T', 'A', 'O', 'I', 'N', 'S', 'H', 'R', 'D',
    'L', 'C', 'U', 'M', 'W', 'F', 'G', 'Y', 'P', 'B',
    'V', 'K', 'J', 'X', 'Q', 'Z', '?', '@', '[',  ']',
    '>', '\\'
  };

  /**
   * The negative number endings encompass the sequentially lowest group of digits,
   * ordered in likely probability of occurrence in regular text Strings, to encourage
   * fewer unique digits in payload and help optimize GZip.
   * 

   * The lowest characters were chosen for negative values because negative numbers
   * will be less likely to occur than positive ones, so they are assigned the least
   * common characters (number digits and punctuation symbols).
   * 

   * Even then, number digits are prioritized so common values like -1 will result
   * in commonly encountered symbols in text.  -1 will be the space character instead
   * of the '1' character, as space is the most common symbol in written text.
   * 

   * Punctuation ordering loosely influenced by:
   * http://mdickens.me/typing/theory-of-letter-frequency.html
   * and the fact that we expect markdown symbols to be used more frequently.
   * 

   * See {@link #CONTINUATION_NUM_SECTION} for a detailed description of our Integer
   * serialization policies.
   *
   */
  private static final char[] NEGATIVE_NUM_ENDING = new char[] {
    // Note the first digit is '\0'; it is never used because we never have a -0 ending.
    // However, a value in the 0 position must be included for indexing to work correctly.
    // We never have a -0 due to how we pack numbers; a negative number's final digit
    // will always have a value of one or more; the only value capable of ending
    // in 0 is +0 itself.
   '\0', ' ', '2', '3', '4', '5', '6', '7',  '8', '9',
    '0', '1', '.', ',', '-', '\'', '"', '/', '*', '(',
    ')',  ':', ';', '!', '+', '=', '#', '$', '%', '&',
    '<', '\t'
  };

  /**
   * See {@link #CONTINUATION_NUM_SECTION} for a detailed explanation of our integer serialization policy.
   * 

   * This lookup table is used to deserialize our base32 serialized integers by addressing this array
   * with the integer value of each character.  This is used to convert a serialized character back into the
   * base 32 number which sourced it.
   * 

   * This table is not ordered in increasing or decreasing order; rather we maintain three ranges of digits,
   * which are, from highest to lowest, {@link #CONTINUATION_NUM_SECTION}, {@link #POSITIVE_NUM_ENDING} and
   * {@link #NEGATIVE_NUM_ENDING}.  This is used so that we can deserialize a number without having to explicitly
   * encode its length.  Instead, we encode Continutation digits until we encounter a terminiation digit.
   * The range of the termination digit will be used to determine if the serialized number was positive or negative.
   * 

   * This serialization scheme was designed to be as GZip-friendly as possible, while also minimizing encoded
   * payload size.  It is also designed to be fast, as browsers like Chrome can serialize ascii char[] to string
   * much faster than they can handle UTF-8 encoded Strings.  (We use String.valueOf(char[]) as it skips any
   * UTF-8 encoding in GWT; we don't need it as we ensure all our serialized chars are < 127).
   */
  private static final int[] VALUE_TO_NUM = new int[] {
    0,  0,  0,  0,  0,   0,  0,  0,  0, 31,  // 0 - 10
//                                      \t

    0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  // 10 - 20
//

    0,  0,  0,  0,  0,   0,  0,  0,  0,  0,  // 20 - 30
//

    0,  0,  1, 23, 16,  26, 27, 28, 29, 15,  // 30 - 40
//         ' '  !   "    #   $   %   &   '

    19, 20, 18, 24, 13,  14, 12, 17, 10, 11,  // 40 - 50
//   (   )   *   +   ,    -   .   /   0   1

    2,  3,  4,  5,  6,   7,  8,  9, 21, 22,  // 50 - 60
//  2   3   4   5   6    7   8   9   :   ;

    30, 25, 30, 26, 27,   2, 19, 11,  9,  0,  // 60 - 70
//   <   =   >   ?   @    A   B   C   D

    15, 16,  7,  4, 22,  21, 10, 13,  5,  3,  // 70 - 80
//   F   G   H   I   J    K   L   M   N   O

    18, 24,  8,  6,  1,  12, 20, 14, 23, 17,  // 80 - 90
//   P   Q   R   S   T    U   V   W   X   Y

//                           Note this value of 32. It is used to handle integer MIN_VALUEs
    25, 28, 31, 29, 31,  26, 32,  2, 19, 11,  // 90 - 100
//   Z   [   \   ]   ^    _   `   a   b   c

    9,  0, 15, 16,  7,   4, 22, 21, 10, 13,  // 100 - 110
//  d       f   g   h    i   j   k   l   m

    5,  3, 18, 24,  8,   6,  1, 12, 20, 14,  // 110 - 120
//  n   o   p   q   r    s   t   u   v   w

    23, 17, 25, 27, 29,  28, 30,              // 120 - 130
//   x   y   z   {   |    }   ~
  };


  @SuppressWarnings("unused")
  // We only use this method if we update any of the ordering of serialization char->int mappings
  private static int[] computeValueToNum(){
    final int[] VALUE_TO_NUM = new int[127];
    final char[] lookup = new char[127];
    for (int i = CONTINUATION_NUM_SECTION.length; i-->0; ) {
      VALUE_TO_NUM[CONTINUATION_NUM_SECTION[i]] = i;
      lookup[CONTINUATION_NUM_SECTION[i]] = CONTINUATION_NUM_SECTION[i];
    }
    for (int i = POSITIVE_NUM_ENDING.length; i-->0; ) {
      VALUE_TO_NUM[POSITIVE_NUM_ENDING[i]] = i;
      lookup[POSITIVE_NUM_ENDING[i]] = POSITIVE_NUM_ENDING[i];
    }
    for (int i = NEGATIVE_NUM_ENDING.length; i-->0; ) {
      VALUE_TO_NUM[NEGATIVE_NUM_ENDING[i]] = i;
      lookup[NEGATIVE_NUM_ENDING[i]] = NEGATIVE_NUM_ENDING[i];
    }
    final StringBuilder b = new StringBuilder("= new int[] {\n"), l = new StringBuilder();
    b.append("\n   ");
    l.append("\n// ");
    for (int i = 0; i < VALUE_TO_NUM.length; i ++) {
      final int pos = VALUE_TO_NUM[i];
      final int val = (char)i;
      String num = Integer.toString(pos);
      if (num.length() == 1) {
        num = " "+num;
      }
      if (pos == 0) {
        l.append("    ");
      } else if (val == '\t') {
        l.append("\\t  ");
      } else if (val == ' ') {
        l.append("' ' ");
      } else {
        l.append(" "+((char)val)+"  ");
      }
      b.append(num).append(", ");
      if (i == VALUE_TO_NUM.length-1) {
        b.append("            ");
        i += 3;
      }
      if (i%10 == 9) {
        b.append(" // "+(i-9)+" - "+(i+1));
        b.append(l);
        l.setLength(0);
        l.append("\n// ");
        b.append("\n\n   ");
      }
      else if (i%5 == 4) {
        b.append(" ");
        l.append(" ");
      }
    }
    // Print out the value so we can hard-code it instead of compute it;
    // this method should be unused.
    System.out.println(b);
    return VALUE_TO_NUM;
  }

  private static final int[] BIT_MASKS = new int[] {
    1, 2, 4, 8, 16, 32, 64, 128, 256
  };

  /**
   * This is a class that we will use so that we can determine the size of characters needed to
   * represent a number, while also collecting up the character we want for each position,
   * so that we can allocate a char[] of the correct size, without having to examine a number twice.
   * 

   * As we examine the base 32 length of a number, we collect the chars needed in this single-linked list.
   *
   * @author James X. Nelson ([email protected], @james)
   *
   */
  protected static class CharacterBuffer {
    // The next buffer, if any
    protected CharacterBuffer next;
    // The index of the current slot; the head node will contain the total count so we can alloc a char[]
    protected int slot;
    // The char of the current node
    protected char c;
  }

  /**
   * Consume characters from the supplied {@link CharIterator} to reassemble a serialized int value.
   * 

   * This will read in chars that are in the range of {@link #CONTINUATION_NUM_SECTION} as base 32
   * digits, until a termination digit from {@link #POSITIVE_NUM_ENDING} or {@link #NEGATIVE_NUM_ENDING}
   * are encountered, at which time deserialization will terminate, and the value will be returned.
   */
  @Override
  public int deserializeInt(final CharIterator i) {
    int value = 0, multi = 1;
    for (; i.hasNext();) {
      final char c = i.next();
      final int delta = multi * VALUE_TO_NUM[c];
      assert delta >= 0 : "Unexpected Integer overlow" ;
      if (c < END_VALUE_BOUNDARY) {
        // We hit the end of this number
        if (c > NEGATIVE_VALUE_BOUNDARY) {
          // And the number was not negative; just return the sum
          return value + delta;
        }
        // Note that we negate the value and the delta, as this will prevent
        // an integer overflow of Integer.MIN_VALUE.
        return -value - delta;
      }
      // For continuation digits, just accumulate the sum of each base 32 digit.
      value += delta;
      multi <<= 5; // multiply by 32
    }
    assert false : "Malformed encoded number: "+i;
    return value;
  }

  /**
   * Consume characters from the supplied {@link CharIterator} to reassemble a serialized long value.
   * 

   * This will read in chars that are in the range of {@link #CONTINUATION_NUM_SECTION} as base 32
   * digits, until a termination digit from {@link #POSITIVE_NUM_ENDING} or {@link #NEGATIVE_NUM_ENDING}
   * are encountered, at which time deserialization will terminate, and the value will be returned.
   */
  @Override
    public long deserializeLong(final CharIterator l) {
      long value = 0, multi = 1;
      for (; l.hasNext();) {
        final char c = l.next();
        final long delta = (VALUE_TO_NUM[c]*multi);
        assert delta >= 0 : "Unexpected Long overlow" ;
        if (c < END_VALUE_BOUNDARY) {        // We hit the end of this number
          if (c > NEGATIVE_VALUE_BOUNDARY) {
            // And the number was not negative; just return the sum
            return value + delta;
          }
          // Note that we negate the value and the delta, as this will prevent
          // an integer overflow of Integer.MIN_VALUE.
          return -value - delta;
        }
        // For continuation digits, just accumulate the sum of each base 32 digit.
        value += delta;
        multi <<= 5; // multiply by 32
      }
      assert false : "Malformed encoded number: "+l;
      return value;
    }

  /**
   * Serializes an int according to the serialization policy defined in {@link #CONTINUATION_NUM_SECTION}.
   */
  @Override
  public String serializeInt(final int i) {
    CharacterBuffer buffer = computeSerialization(i);
    // The very first buffer will have its slot set to size, since we know it will always exist and be in slot 0
    final char[] data = new char[buffer.slot];
    // Reset the head slot to zero
    buffer.slot = 0;
    for (;buffer != null; buffer = buffer.next) {
      // Assemble the char[] computed as a linked list
      data[buffer.slot] = buffer.c;
    }
    return String.valueOf(data);
  }

  /**
   * Serializes a long according to the serialization policy defined in {@link #CONTINUATION_NUM_SECTION}.
   */
  @Override
  public String serializeLong(final long l) {
    CharacterBuffer buffer = computeSerialization(l);
    // The very first buffer will have its slot set to size, since we know it will always exist and be in slot 0
    final char[] data = new char[buffer.slot];
    // Reset the slot to zero for our loop
    buffer.slot = 0;
    for (;buffer != null; buffer = buffer.next) {
      // Assemble the char[] computed as a linked list
      data[buffer.slot] = buffer.c;
    }
    return String.valueOf(data);
  }

  /**
   * Computes a linked list of serialization results for the supplied integer.
   * 

   * See {@link #CONTINUATION_NUM_SECTION} for a detailed description of the serialization policy.
   */
  protected CharacterBuffer computeSerialization(int i) {
    boolean negative;
    final CharacterBuffer head = new CharacterBuffer();
    CharacterBuffer tail = head;
    if (i < 0) {
      negative = true;
      if (i == Integer.MIN_VALUE) {
        // This is a sneaky trick to add the extra +1 for the fact that
        // Math.abs(MIN_VALUE) = Math.abs(MAX_VALUE) +1.

        // We set the value of the first digit to 32, which technically overflows our
        // otherwise base 32 numbering system.

        // When we deserialize, we will accumulate this extra +1, and then right
        // at the end of the int deserialization, when we realize the number is negative,
        // then we will switch the accumulated value and the last digit to
        // negatives, so they won't overflow.
        tail = pushItem(1, 32, head, tail);

        // We remove an extra 1 here as well, so all the remaining bits will be 1s instead of 0s
        i = i/-32 - 1;
      } else {
        i = -i;
      }
      assert i >= 0;
    } else {
      negative = false;
    }
    for ( int pos = head.slot + 1; ; pos++ ) {
      final int chunk = i%32;
      i = i/32;
      if (i == 0) {
        head.slot ++;
        if (negative) {
          tail.c = NEGATIVE_NUM_ENDING[chunk];
        } else {
          tail.c = POSITIVE_NUM_ENDING[chunk];
        }
        return head;
      }
      tail = pushItem(pos, chunk, head, tail);
    }
  }

  private CharacterBuffer pushItem(final int slot, final int value, final CharacterBuffer head, CharacterBuffer tail) {
    tail.c = CONTINUATION_NUM_SECTION[value];
    head.slot ++;
    final CharacterBuffer next = new CharacterBuffer();
    next.slot = slot;
    tail.next = next;
    tail = next;
    return next;
  }

  /**
   * Computes a linked list of serialization results for the supplied long.
   * 
   * See {@link #CONTINUATION_NUM_SECTION} for a detailed description of the serialization policy.
   */
  private CharacterBuffer computeSerialization(long i) {
    boolean negative;
    final CharacterBuffer head = new CharacterBuffer();
    CharacterBuffer tail = head;
    if (i < 0) {
      negative = true;
      if (i == Long.MIN_VALUE) {

        // This is a sneaky trick to add the extra +1 for the fact that
        // Math.abs(MIN_VALUE) = Math.abs(MAX_VALUE) +1.

        // We set the value of the first digit to 32, which technically overflows our
        // otherwise base 32 numbering system.

        // When we deserialize, we will accumulate this extra +1, and then right
        // at the end of the int deserialization, when we realize the number is negative,
        // then we will switch the accumulated value and the last digit to
        // negatives, so they won't overflow.
        tail = pushItem(1, 32, head, tail);

        // We remove an extra 1 here as well, so all the remaining bits will be 1s instead of 0s
        i = i/-32L - 1;
      } else {
        i = -i;
      }
      assert i >= 0;
    } else {
      negative = false;
    }
    for ( int pos = head.slot + 1 ; ; pos++ ) {
      final int chunk = (int)(i%32L);
      i = i/32L;
      if (i == 0) {
        head.slot ++;
        if (negative) {
          tail.c = NEGATIVE_NUM_ENDING[chunk];
        } else {
          tail.c = POSITIVE_NUM_ENDING[chunk];
        }
        return head;
      }
      tail = pushItem(pos, chunk, head, tail);
    }
  }

  @Override
  public String serializeBoolean(final boolean z) {
    return z ? "1" : "0";
  }

  @Override
  public String serializeBooleanArray(final boolean ... z) {
    final int size = z.length / 5 + 1;
    // We will write a full large int using as many base 32 values as we need.
    CharacterBuffer sizeChunk = computeSerialization(z.length);
    final int offset = sizeChunk.slot;
    final char[] buffer = new char[size + offset];
    sizeChunk.slot = 0;
    while (sizeChunk != null) {
      buffer[sizeChunk.slot] = sizeChunk.c;
      sizeChunk = sizeChunk.next;
    }
    for (int i = 0; i < size; i++) {
      final int value = boolsToBase32(z, i);
      assert value < 32;
      buffer[i+offset] = POSITIVE_NUM_ENDING[value%32];
    }
    return String.valueOf(buffer);
  }

  private int boolsToBase32(final boolean[] z, int pos) {
    int value = 0;
    if (pos+5 >= z.length) {
      // This value is less than five booleans.  Use a loop
      final int start = pos;
      for (;pos < z.length; pos++ ) {
        if (z[pos]) {
          value += BIT_MASKS[pos-start];
        }
      }
      return value;
    } else {
      // We know we have at least five booleans we can read
      if (z[pos++]) {
        value += 1;
      }
      if (z[pos++]) {
        value += 2;
      }
      if (z[pos++]) {
        value += 4;
      }
      if (z[pos++]) {
        value += 8;
      }
      if (z[pos++]) {
        value += 16;
      }
      return value;
    }
  }

  @Override
  public String serializeByte(final byte b) {
    return serializeInt(b);
  }

  @Override
  public String serializeShort(final short s) {
    return serializeInt(s);
  }

  @Override
  public String serializeChar(final char c) {
    return serializeInt(c);
  }

  @Override
  public String serializeFloat(final float f) {
    return serializeInt(Float.floatToIntBits(f));
  }

  @Override
  public String serializeDouble(final double d) {
    return serializeLong(Double.doubleToLongBits(d));
  }

  @Override
  public boolean deserializeBoolean(final CharIterator z) {
    return z.next() == '1';
  }

  @Override
  public boolean[] deserializeBooleanArray(final CharIterator z) {
    final int size = deserializeInt(z);
    final boolean[] result = new boolean[size];
    for (int i = 0; ; ) {
      final int value = deserializeInt(z);
      if (i + 5 > size) {
        // The last value; may not have all five booleans; use a loop
        final int start = i;
        for (;i < size; i++) {
          result[i] = (value & BIT_MASKS[i-start]) != 0;
        }
        break;
      } else {
        // Write five more booleans
        result[i++] = (value & 1) != 0;
        result[i++] = (value & 2) != 0;
        result[i++] = (value & 4) != 0;
        result[i++] = (value & 8) != 0;
        result[i++] = (value & 16) != 0;
      }
    }
    return result;
  }

  @Override
  public byte deserializeByte(final CharIterator b) {
    return (byte)deserializeInt(b);
  }

  @Override
  public short deserializeShort(final CharIterator s) {
    return (short)deserializeInt(s);
  }

  @Override
  public char deserializeChar(final CharIterator c) {
    return (char)deserializeInt(c);
  }

  @Override
  public float deserializeFloat(final CharIterator f) {
    final int asInt = deserializeInt(f);
    return Float.intBitsToFloat(asInt);
/**
For javascript, we will use a native function to get our int bits:
function FloatToIEEE(f)
{
  var buf = new ArrayBuffer(4);
  (new Float32Array(buf))[0] = f;
  return (new Uint32Array(buf))[0];
}
*/
  }

  @Override
  public double deserializeDouble(final CharIterator d) {
    final long asLong = deserializeLong(d);
    return Double.longBitsToDouble(asLong);
/**
For javascript, we wil use a native function to get our long bits:
function DoubleToIEEE(f)
{
  var buf = new ArrayBuffer(8);
  (new Float64Array(buf))[0] = f;
  // We will also process these bits as ints to avoid long emulation.
  // Thus, we do not bother with a doubleToLongBits method, as long emulation sucks
  return [ (new Uint32Array(buf))[0] ,(new Uint32Array(buf))[1] ];
}
*/
  }

  /**
   * @see xapi.model.api.PrimitiveSerializer#deserializeString(xapi.source.api.CharIterator)
   */
  @Override
  public String deserializeString(final CharIterator s) {
    final int size = deserializeInt(s);
    if (size == -1) {
      return null;
    }
    if (size == 0) {
      return "";
    }
    return s.consume(size).toString();
  }

  @Override
  public String serializeString(final String s) {
    if (s == null) {
      return serializeInt(-1);
    }
    return serializeInt(s.length()) + s;
  }

  /**
   * @see xapi.model.api.PrimitiveSerializer#serializeClass(java.lang.Class)
   */
  @Override
  public String serializeClass(final Class c) {
    return serializeString(c.getName());
  }

  /**
   * @see xapi.model.api.PrimitiveSerializer#deserializeClass(xapi.source.api.CharIterator)
   */
  @Override
  @SuppressWarnings("unchecked")
  public Class deserializeClass(final CharIterator c) {
    final String cls = deserializeString(c);
    return loadClass(cls);
  }

  @Override
  @SuppressWarnings("unchecked")
  public Class loadClass(final String cls) {
    if (cls == null) {
      return null;
    }
    switch (cls) {
      case "boolean":
        return boolean.class;
      case "byte":
        return byte.class;
      case "short":
        return short.class;
      case "char":
        return char.class;
      case "int":
        return int.class;
      case "long":
        return long.class;
      case "float":
        return float.class;
      case "double":
        return double.class;
      case "void":
        return void.class;
    }
    try {
      return Class.forName(cls);
    } catch (final ClassNotFoundException e) {
      throw X_Debug.rethrow(e);
    }
  }

}