All Downloads are FREE. Search and download functionalities are using the official Maven repository.

xapi.model.impl.PrimitiveSerializerDefault Maven / Gradle / Ivy

Go to download

Everything needed to run a comprehensive dev environment. Just type X_ and pick a service from autocomplete; new dev modules will be added as they are built. The only dev service not included in the uber jar is xapi-dev-maven, as it includes all runtime dependencies of maven, adding ~4 seconds to build time, and 6 megabytes to the final output jar size (without xapi-dev-maven, it's ~1MB).

The newest version!
/**
 *
 */
package xapi.model.impl;

import xapi.annotation.inject.InstanceDefault;
import xapi.model.api.PrimitiveSerializer;
import xapi.source.api.CharIterator;
import xapi.util.X_Debug;

/**
 * @author James X. Nelson ([email protected], @james)
 *
 */
@InstanceDefault(implFor=PrimitiveSerializer.class)
public class PrimitiveSerializerDefault implements PrimitiveSerializer {

  /**
   * The boundary which all negative ending numbers will be below.
   * A character below this bounadary is a termination digit that signifies the number is negative
   */
  private static final char NEGATIVE_VALUE_BOUNDARY = '=';

  /**
   * The boundary above which all continuation digits will occur.
   * And number below this value is a termination digit which signifies that the current number is complete.
   */
  private static final char END_VALUE_BOUNDARY = '^';

  /**
   * This continuation group of numbers is used to encode base 32 digits in a serialized number.
   * When serializing a number, the continuation bits are used to signify that there are still more
   * digits to serialize.  These are the top 33 printable ascii digits.
   * 

* Note that there are 33 digits in this section and 32 in all others in order for us to handle * Integer.MIN_VALUE and Long.MIN_VALUE, both of which would normally overflow a positive value. *

* The information of whether a number is negative or positive is encoded in the final digit * (the first and only non-continuation digit), so in order to handle the fact that * Math.abs(MIN_VALUE) == Math.abs(MAX_VALUE) + 1, we allow the first continuation digit to * reach a value of 32 instead of 31, like all other base 32 digits. Once we encounter the * final digit, we will negate the accumulated and current values to avoid negative integer overflows. *

* Also note that digits in this section are ordered according to their likely frequency in * English language text; this is to help improve GZipping of response bodies, as we are * far more likely to encounter the numbers 1 or 0 than 30 or 31. The order chosen was * based upon http://en.wikipedia.org/wiki/Letter_frequency and other Google searchs for * frequency of punctuation occurrence. */ private static final char[] CONTINUATION_NUM_SECTION = new char[] { 'e', 't', 'a', 'o', 'i', 'n', 's', 'h', 'r', 'd', 'l', 'c', 'u', 'm', 'w', 'f', 'g', 'y', 'p', 'b', 'v', 'k', 'j', 'x', 'q', 'z', '_', '{', '}', '|', '~', '^', '`', }; /** * These numbers are used to denote the end of an encoded positive number. * See {@link #CONTINUATION_NUM_SECTION} for a more detailed breakdown of our * integer serialization policy. *

* Note that the values in this section are all strictly less than those in the * {@link #CONTINUATION_NUM_SECTION} and less than those of {@link #NEGATIVE_NUM_ENDING}, * however, they are sorted by probabilistic frequency in English language text, * to aid in the optimization of the GZip protocol. */ private static final char[] POSITIVE_NUM_ENDING = new char[] { 'E', 'T', 'A', 'O', 'I', 'N', 'S', 'H', 'R', 'D', 'L', 'C', 'U', 'M', 'W', 'F', 'G', 'Y', 'P', 'B', 'V', 'K', 'J', 'X', 'Q', 'Z', '?', '@', '[', ']', '>', '\\' }; /** * The negative number endings encompass the sequentially lowest group of digits, * ordered in likely probability of occurrence in regular text Strings, to encourage * fewer unique digits in payload and help optimize GZip. *

* The lowest characters were chosen for negative values because negative numbers * will be less likely to occur than positive ones, so they are assigned the least * common characters (number digits and punctuation symbols). *

* Even then, number digits are prioritized so common values like -1 will result * in commonly encountered symbols in text. -1 will be the space character instead * of the '1' character, as space is the most common symbol in written text. *

* Punctuation ordering loosely influenced by: * http://mdickens.me/typing/theory-of-letter-frequency.html * and the fact that we expect markdown symbols to be used more frequently. *

* See {@link #CONTINUATION_NUM_SECTION} for a detailed description of our Integer * serialization policies. * */ private static final char[] NEGATIVE_NUM_ENDING = new char[] { // Note the first digit is '\0'; it is never used because we never have a -0 ending. // However, a value in the 0 position must be included for indexing to work correctly. // We never have a -0 due to how we pack numbers; a negative number's final digit // will always have a value of one or more; the only value capable of ending // in 0 is +0 itself. '\0', ' ', '2', '3', '4', '5', '6', '7', '8', '9', '0', '1', '.', ',', '-', '\'', '"', '/', '*', '(', ')', ':', ';', '!', '+', '=', '#', '$', '%', '&', '<', '\t' }; /** * See {@link #CONTINUATION_NUM_SECTION} for a detailed explanation of our integer serialization policy. *

* This lookup table is used to deserialize our base32 serialized integers by addressing this array * with the integer value of each character. This is used to convert a serialized character back into the * base 32 number which sourced it. *

* This table is not ordered in increasing or decreasing order; rather we maintain three ranges of digits, * which are, from highest to lowest, {@link #CONTINUATION_NUM_SECTION}, {@link #POSITIVE_NUM_ENDING} and * {@link #NEGATIVE_NUM_ENDING}. This is used so that we can deserialize a number without having to explicitly * encode its length. Instead, we encode Continutation digits until we encounter a terminiation digit. * The range of the termination digit will be used to determine if the serialized number was positive or negative. *

* This serialization scheme was designed to be as GZip-friendly as possible, while also minimizing encoded * payload size. It is also designed to be fast, as browsers like Chrome can serialize ascii char[] to string * much faster than they can handle UTF-8 encoded Strings. (We use String.valueOf(char[]) as it skips any * UTF-8 encoding in GWT; we don't need it as we ensure all our serialized chars are < 127). */ private static final int[] VALUE_TO_NUM = new int[] { 0, 0, 0, 0, 0, 0, 0, 0, 0, 31, // 0 - 10 // \t 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 10 - 20 // 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 20 - 30 // 0, 0, 1, 23, 16, 26, 27, 28, 29, 15, // 30 - 40 // ' ' ! " # $ % & ' 19, 20, 18, 24, 13, 14, 12, 17, 10, 11, // 40 - 50 // ( ) * + , - . / 0 1 2, 3, 4, 5, 6, 7, 8, 9, 21, 22, // 50 - 60 // 2 3 4 5 6 7 8 9 : ; 30, 25, 30, 26, 27, 2, 19, 11, 9, 0, // 60 - 70 // < = > ? @ A B C D 15, 16, 7, 4, 22, 21, 10, 13, 5, 3, // 70 - 80 // F G H I J K L M N O 18, 24, 8, 6, 1, 12, 20, 14, 23, 17, // 80 - 90 // P Q R S T U V W X Y // Note this value of 32. It is used to handle integer MIN_VALUEs 25, 28, 31, 29, 31, 26, 32, 2, 19, 11, // 90 - 100 // Z [ \ ] ^ _ ` a b c 9, 0, 15, 16, 7, 4, 22, 21, 10, 13, // 100 - 110 // d f g h i j k l m 5, 3, 18, 24, 8, 6, 1, 12, 20, 14, // 110 - 120 // n o p q r s t u v w 23, 17, 25, 27, 29, 28, 30, // 120 - 130 // x y z { | } ~ }; @SuppressWarnings("unused") // We only use this method if we update any of the ordering of serialization char->int mappings private static int[] computeValueToNum(){ final int[] VALUE_TO_NUM = new int[127]; final char[] lookup = new char[127]; for (int i = CONTINUATION_NUM_SECTION.length; i-->0; ) { VALUE_TO_NUM[CONTINUATION_NUM_SECTION[i]] = i; lookup[CONTINUATION_NUM_SECTION[i]] = CONTINUATION_NUM_SECTION[i]; } for (int i = POSITIVE_NUM_ENDING.length; i-->0; ) { VALUE_TO_NUM[POSITIVE_NUM_ENDING[i]] = i; lookup[POSITIVE_NUM_ENDING[i]] = POSITIVE_NUM_ENDING[i]; } for (int i = NEGATIVE_NUM_ENDING.length; i-->0; ) { VALUE_TO_NUM[NEGATIVE_NUM_ENDING[i]] = i; lookup[NEGATIVE_NUM_ENDING[i]] = NEGATIVE_NUM_ENDING[i]; } final StringBuilder b = new StringBuilder("= new int[] {\n"), l = new StringBuilder(); b.append("\n "); l.append("\n// "); for (int i = 0; i < VALUE_TO_NUM.length; i ++) { final int pos = VALUE_TO_NUM[i]; final int val = (char)i; String num = Integer.toString(pos); if (num.length() == 1) { num = " "+num; } if (pos == 0) { l.append(" "); } else if (val == '\t') { l.append("\\t "); } else if (val == ' ') { l.append("' ' "); } else { l.append(" "+((char)val)+" "); } b.append(num).append(", "); if (i == VALUE_TO_NUM.length-1) { b.append(" "); i += 3; } if (i%10 == 9) { b.append(" // "+(i-9)+" - "+(i+1)); b.append(l); l.setLength(0); l.append("\n// "); b.append("\n\n "); } else if (i%5 == 4) { b.append(" "); l.append(" "); } } // Print out the value so we can hard-code it instead of compute it; // this method should be unused. System.out.println(b); return VALUE_TO_NUM; } private static final int[] BIT_MASKS = new int[] { 1, 2, 4, 8, 16, 32, 64, 128, 256 }; /** * This is a class that we will use so that we can determine the size of characters needed to * represent a number, while also collecting up the character we want for each position, * so that we can allocate a char[] of the correct size, without having to examine a number twice. *

* As we examine the base 32 length of a number, we collect the chars needed in this single-linked list. * * @author James X. Nelson ([email protected], @james) * */ protected static class CharacterBuffer { // The next buffer, if any protected CharacterBuffer next; // The index of the current slot; the head node will contain the total count so we can alloc a char[] protected int slot; // The char of the current node protected char c; } /** * Consume characters from the supplied {@link CharIterator} to reassemble a serialized int value. *

* This will read in chars that are in the range of {@link #CONTINUATION_NUM_SECTION} as base 32 * digits, until a termination digit from {@link #POSITIVE_NUM_ENDING} or {@link #NEGATIVE_NUM_ENDING} * are encountered, at which time deserialization will terminate, and the value will be returned. */ @Override public int deserializeInt(final CharIterator i) { int value = 0, multi = 1; for (; i.hasNext();) { final char c = i.next(); final int delta = multi * VALUE_TO_NUM[c]; assert delta >= 0 : "Unexpected Integer overlow" ; if (c < END_VALUE_BOUNDARY) { // We hit the end of this number if (c > NEGATIVE_VALUE_BOUNDARY) { // And the number was not negative; just return the sum return value + delta; } // Note that we negate the value and the delta, as this will prevent // an integer overflow of Integer.MIN_VALUE. return -value - delta; } // For continuation digits, just accumulate the sum of each base 32 digit. value += delta; multi <<= 5; // multiply by 32 } assert false : "Malformed encoded number: "+i; return value; } /** * Consume characters from the supplied {@link CharIterator} to reassemble a serialized long value. *

* This will read in chars that are in the range of {@link #CONTINUATION_NUM_SECTION} as base 32 * digits, until a termination digit from {@link #POSITIVE_NUM_ENDING} or {@link #NEGATIVE_NUM_ENDING} * are encountered, at which time deserialization will terminate, and the value will be returned. */ @Override public long deserializeLong(final CharIterator l) { long value = 0, multi = 1; for (; l.hasNext();) { final char c = l.next(); final long delta = (VALUE_TO_NUM[c]*multi); assert delta >= 0 : "Unexpected Long overlow" ; if (c < END_VALUE_BOUNDARY) { // We hit the end of this number if (c > NEGATIVE_VALUE_BOUNDARY) { // And the number was not negative; just return the sum return value + delta; } // Note that we negate the value and the delta, as this will prevent // an integer overflow of Integer.MIN_VALUE. return -value - delta; } // For continuation digits, just accumulate the sum of each base 32 digit. value += delta; multi <<= 5; // multiply by 32 } assert false : "Malformed encoded number: "+l; return value; } /** * Serializes an int according to the serialization policy defined in {@link #CONTINUATION_NUM_SECTION}. */ @Override public String serializeInt(final int i) { CharacterBuffer buffer = computeSerialization(i); // The very first buffer will have its slot set to size, since we know it will always exist and be in slot 0 final char[] data = new char[buffer.slot]; // Reset the head slot to zero buffer.slot = 0; for (;buffer != null; buffer = buffer.next) { // Assemble the char[] computed as a linked list data[buffer.slot] = buffer.c; } return String.valueOf(data); } /** * Serializes a long according to the serialization policy defined in {@link #CONTINUATION_NUM_SECTION}. */ @Override public String serializeLong(final long l) { CharacterBuffer buffer = computeSerialization(l); // The very first buffer will have its slot set to size, since we know it will always exist and be in slot 0 final char[] data = new char[buffer.slot]; // Reset the slot to zero for our loop buffer.slot = 0; for (;buffer != null; buffer = buffer.next) { // Assemble the char[] computed as a linked list data[buffer.slot] = buffer.c; } return String.valueOf(data); } /** * Computes a linked list of serialization results for the supplied integer. *

* See {@link #CONTINUATION_NUM_SECTION} for a detailed description of the serialization policy. */ protected CharacterBuffer computeSerialization(int i) { boolean negative; final CharacterBuffer head = new CharacterBuffer(); CharacterBuffer tail = head; if (i < 0) { negative = true; if (i == Integer.MIN_VALUE) { // This is a sneaky trick to add the extra +1 for the fact that // Math.abs(MIN_VALUE) = Math.abs(MAX_VALUE) +1. // We set the value of the first digit to 32, which technically overflows our // otherwise base 32 numbering system. // When we deserialize, we will accumulate this extra +1, and then right // at the end of the int deserialization, when we realize the number is negative, // then we will switch the accumulated value and the last digit to // negatives, so they won't overflow. tail = pushItem(1, 32, head, tail); // We remove an extra 1 here as well, so all the remaining bits will be 1s instead of 0s i = i/-32 - 1; } else { i = -i; } assert i >= 0; } else { negative = false; } for ( int pos = head.slot + 1; ; pos++ ) { final int chunk = i%32; i = i/32; if (i == 0) { head.slot ++; if (negative) { tail.c = NEGATIVE_NUM_ENDING[chunk]; } else { tail.c = POSITIVE_NUM_ENDING[chunk]; } return head; } tail = pushItem(pos, chunk, head, tail); } } private CharacterBuffer pushItem(final int slot, final int value, final CharacterBuffer head, CharacterBuffer tail) { tail.c = CONTINUATION_NUM_SECTION[value]; head.slot ++; final CharacterBuffer next = new CharacterBuffer(); next.slot = slot; tail.next = next; tail = next; return next; } /** * Computes a linked list of serialization results for the supplied long. *

* See {@link #CONTINUATION_NUM_SECTION} for a detailed description of the serialization policy. */ private CharacterBuffer computeSerialization(long i) { boolean negative; final CharacterBuffer head = new CharacterBuffer(); CharacterBuffer tail = head; if (i < 0) { negative = true; if (i == Long.MIN_VALUE) { // This is a sneaky trick to add the extra +1 for the fact that // Math.abs(MIN_VALUE) = Math.abs(MAX_VALUE) +1. // We set the value of the first digit to 32, which technically overflows our // otherwise base 32 numbering system. // When we deserialize, we will accumulate this extra +1, and then right // at the end of the int deserialization, when we realize the number is negative, // then we will switch the accumulated value and the last digit to // negatives, so they won't overflow. tail = pushItem(1, 32, head, tail); // We remove an extra 1 here as well, so all the remaining bits will be 1s instead of 0s i = i/-32L - 1; } else { i = -i; } assert i >= 0; } else { negative = false; } for ( int pos = head.slot + 1 ; ; pos++ ) { final int chunk = (int)(i%32L); i = i/32L; if (i == 0) { head.slot ++; if (negative) { tail.c = NEGATIVE_NUM_ENDING[chunk]; } else { tail.c = POSITIVE_NUM_ENDING[chunk]; } return head; } tail = pushItem(pos, chunk, head, tail); } } @Override public String serializeBoolean(final boolean z) { return z ? "1" : "0"; } @Override public String serializeBooleanArray(final boolean ... z) { final int size = z.length / 5 + 1; // We will write a full large int using as many base 32 values as we need. CharacterBuffer sizeChunk = computeSerialization(z.length); final int offset = sizeChunk.slot; final char[] buffer = new char[size + offset]; sizeChunk.slot = 0; while (sizeChunk != null) { buffer[sizeChunk.slot] = sizeChunk.c; sizeChunk = sizeChunk.next; } for (int i = 0; i < size; i++) { final int value = boolsToBase32(z, i); assert value < 32; buffer[i+offset] = POSITIVE_NUM_ENDING[value%32]; } return String.valueOf(buffer); } private int boolsToBase32(final boolean[] z, int pos) { int value = 0; if (pos+5 >= z.length) { // This value is less than five booleans. Use a loop final int start = pos; for (;pos < z.length; pos++ ) { if (z[pos]) { value += BIT_MASKS[pos-start]; } } return value; } else { // We know we have at least five booleans we can read if (z[pos++]) { value += 1; } if (z[pos++]) { value += 2; } if (z[pos++]) { value += 4; } if (z[pos++]) { value += 8; } if (z[pos++]) { value += 16; } return value; } } @Override public String serializeByte(final byte b) { return serializeInt(b); } @Override public String serializeShort(final short s) { return serializeInt(s); } @Override public String serializeChar(final char c) { return serializeInt(c); } @Override public String serializeFloat(final float f) { return serializeInt(Float.floatToIntBits(f)); } @Override public String serializeDouble(final double d) { return serializeLong(Double.doubleToLongBits(d)); } @Override public boolean deserializeBoolean(final CharIterator z) { return z.next() == '1'; } @Override public boolean[] deserializeBooleanArray(final CharIterator z) { final int size = deserializeInt(z); final boolean[] result = new boolean[size]; for (int i = 0; ; ) { final int value = deserializeInt(z); if (i + 5 > size) { // The last value; may not have all five booleans; use a loop final int start = i; for (;i < size; i++) { result[i] = (value & BIT_MASKS[i-start]) != 0; } break; } else { // Write five more booleans result[i++] = (value & 1) != 0; result[i++] = (value & 2) != 0; result[i++] = (value & 4) != 0; result[i++] = (value & 8) != 0; result[i++] = (value & 16) != 0; } } return result; } @Override public byte deserializeByte(final CharIterator b) { return (byte)deserializeInt(b); } @Override public short deserializeShort(final CharIterator s) { return (short)deserializeInt(s); } @Override public char deserializeChar(final CharIterator c) { return (char)deserializeInt(c); } @Override public float deserializeFloat(final CharIterator f) { final int asInt = deserializeInt(f); return Float.intBitsToFloat(asInt); /** For javascript, we will use a native function to get our int bits: function FloatToIEEE(f) { var buf = new ArrayBuffer(4); (new Float32Array(buf))[0] = f; return (new Uint32Array(buf))[0]; } */ } @Override public double deserializeDouble(final CharIterator d) { final long asLong = deserializeLong(d); return Double.longBitsToDouble(asLong); /** For javascript, we wil use a native function to get our long bits: function DoubleToIEEE(f) { var buf = new ArrayBuffer(8); (new Float64Array(buf))[0] = f; // We will also process these bits as ints to avoid long emulation. // Thus, we do not bother with a doubleToLongBits method, as long emulation sucks return [ (new Uint32Array(buf))[0] ,(new Uint32Array(buf))[1] ]; } */ } /** * @see xapi.model.api.PrimitiveSerializer#deserializeString(xapi.source.api.CharIterator) */ @Override public String deserializeString(final CharIterator s) { final int size = deserializeInt(s); if (size == -1) { return null; } if (size == 0) { return ""; } return s.consume(size).toString(); } @Override public String serializeString(final String s) { if (s == null) { return serializeInt(-1); } return serializeInt(s.length()) + s; } /** * @see xapi.model.api.PrimitiveSerializer#serializeClass(java.lang.Class) */ @Override public String serializeClass(final Class c) { return serializeString(c.getName()); } /** * @see xapi.model.api.PrimitiveSerializer#deserializeClass(xapi.source.api.CharIterator) */ @Override @SuppressWarnings("unchecked") public Class deserializeClass(final CharIterator c) { final String cls = deserializeString(c); return loadClass(cls); } @Override @SuppressWarnings("unchecked") public Class loadClass(final String cls) { if (cls == null) { return null; } switch (cls) { case "boolean": return boolean.class; case "byte": return byte.class; case "short": return short.class; case "char": return char.class; case "int": return int.class; case "long": return long.class; case "float": return float.class; case "double": return double.class; case "void": return void.class; } try { return Class.forName(cls); } catch (final ClassNotFoundException e) { throw X_Debug.rethrow(e); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy