com.amazon.ion.impl.lite.ReverseBinaryEncoder Maven / Gradle / Ivy
Show all versions of ion-java Show documentation
/*
* Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/
package com.amazon.ion.impl.lite;
import static com.amazon.ion.SymbolTable.UNKNOWN_SYMBOL_ID;
import static com.amazon.ion.SystemSymbols.IMPORTS_SID;
import static com.amazon.ion.SystemSymbols.ION_SYMBOL_TABLE_SID;
import static com.amazon.ion.SystemSymbols.MAX_ID_SID;
import static com.amazon.ion.SystemSymbols.NAME_SID;
import static com.amazon.ion.SystemSymbols.SYMBOLS_SID;
import static com.amazon.ion.SystemSymbols.VERSION_SID;
import static com.amazon.ion.impl._Private_IonConstants.BINARY_VERSION_MARKER_1_0;
import static com.amazon.ion.impl._Private_IonConstants.lnBooleanFalse;
import static com.amazon.ion.impl._Private_IonConstants.lnBooleanTrue;
import static com.amazon.ion.impl._Private_IonConstants.lnIsNull;
import static com.amazon.ion.impl._Private_IonConstants.lnIsVarLen;
import static com.amazon.ion.impl._Private_IonConstants.tidBlob;
import static com.amazon.ion.impl._Private_IonConstants.tidBoolean;
import static com.amazon.ion.impl._Private_IonConstants.tidClob;
import static com.amazon.ion.impl._Private_IonConstants.tidDecimal;
import static com.amazon.ion.impl._Private_IonConstants.tidFloat;
import static com.amazon.ion.impl._Private_IonConstants.tidList;
import static com.amazon.ion.impl._Private_IonConstants.tidNegInt;
import static com.amazon.ion.impl._Private_IonConstants.tidNull;
import static com.amazon.ion.impl._Private_IonConstants.tidPosInt;
import static com.amazon.ion.impl._Private_IonConstants.tidSexp;
import static com.amazon.ion.impl._Private_IonConstants.tidString;
import static com.amazon.ion.impl._Private_IonConstants.tidStruct;
import static com.amazon.ion.impl._Private_IonConstants.tidSymbol;
import static com.amazon.ion.impl._Private_IonConstants.tidTimestamp;
import static com.amazon.ion.impl._Private_IonConstants.tidTypedecl;
import com.amazon.ion.Decimal;
import com.amazon.ion.IonBlob;
import com.amazon.ion.IonBool;
import com.amazon.ion.IonClob;
import com.amazon.ion.IonDatagram;
import com.amazon.ion.IonDecimal;
import com.amazon.ion.IonException;
import com.amazon.ion.IonFloat;
import com.amazon.ion.IonInt;
import com.amazon.ion.IonList;
import com.amazon.ion.IonSequence;
import com.amazon.ion.IonSexp;
import com.amazon.ion.IonString;
import com.amazon.ion.IonStruct;
import com.amazon.ion.IonSymbol;
import com.amazon.ion.IonSystem;
import com.amazon.ion.IonTimestamp;
import com.amazon.ion.IonValue;
import com.amazon.ion.SymbolTable;
import com.amazon.ion.SymbolToken;
import com.amazon.ion.Timestamp;
import java.io.IOException;
import java.io.OutputStream;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.ListIterator;
/**
* Encoder implementation that encodes a IonDatagram into binary format using a
* reverse encoding algorithm instead of the default pre-order (left-to-right)
* two-pass algorithm.
*
* This reverse encoding algorithm requires a fully materialized IonDatagram
* DOM to qualify for use. It uses a single buffer, {@link #myBuffer}, to hold
* the entire binary-encoded data, with an integer, {@link #myOffset}, to index
* the current position to write the bytes.
*
* The algorithm begins by traversing from the last top-level value to the
* first top-level value. During this traversal, it recursively goes into the
* nested values of the top-level value being traversed in a similar
* last-to-first (right-to-left) order.
*/
class ReverseBinaryEncoder
{
private static final BigInteger MAX_LONG_VALUE =
BigInteger.valueOf(Long.MAX_VALUE);
private static final int NULL_LENGTH_MASK = lnIsNull;
private static final int TYPE_NULL = tidNull << 4;
private static final int TYPE_BOOL = tidBoolean << 4;
private static final int TYPE_POS_INT = tidPosInt << 4;
private static final int TYPE_NEG_INT = tidNegInt << 4;
private static final int TYPE_FLOAT = tidFloat << 4;
private static final int TYPE_DECIMAL = tidDecimal << 4;
private static final int TYPE_TIMESTAMP = tidTimestamp << 4;
private static final int TYPE_SYMBOL = tidSymbol << 4;
private static final int TYPE_STRING = tidString << 4;
private static final int TYPE_CLOB = tidClob << 4;
private static final int TYPE_BLOB = tidBlob << 4;
private static final int TYPE_LIST = tidList << 4;
private static final int TYPE_SEXP = tidSexp << 4;
private static final int TYPE_STRUCT = tidStruct << 4;
private static final int TYPE_ANNOTATIONS = tidTypedecl << 4;
/**
* Holds the entire binary encoded data. When IonDatagram is fully encoded
* into binary data, this byte array will hold that data.
*/
private byte[] myBuffer;
/**
* Index onto the position where the bytes are last written to the buffer.
* That means that if you want to write 1 more byte to the buffer, you have
* to decrease the index by 1 (myOffset - 1).
*/
private int myOffset;
/**
* The symbol table attached to the IonValue (and its nested values)
* that the encoder is currently traversing on.
*/
private SymbolTable mySymbolTable;
private IonSystem myIonSystem;
ReverseBinaryEncoder(int initialSize)
{
myBuffer = new byte[initialSize];
myOffset = initialSize;
}
/**
* Returns the size of the Ion binary-encoded byte array.
*
* This makes an unchecked assumption that {{@link #serialize(IonDatagram)}
* is already called.
*
* @return the number of bytes of the byte array
*/
int byteSize()
{
return myBuffer.length - myOffset;
}
/**
* Copies the current contents of the Ion binary-encoded byte array into a
* new byte array. The allocates an array of the size needed to exactly hold
* the output and copies the entire byte array to it.
*
* This makes an unchecked assumption that {{@link #serialize(IonDatagram)}
* is already called.
*
* @return the newly allocated byte array
*/
byte[] toNewByteArray()
{
int length = myBuffer.length - myOffset;
byte[] bytes = new byte[length];
System.arraycopy(myBuffer, myOffset, bytes, 0, length);
return bytes;
}
/**
* Copies the current contents of the Ion binary-encoded byte array into a
* a given byte array.
*
* The given array must be large enough to contain all the bytes of the
* Ion binary-encoded byte array.
*
* This makes an unchecked assumption that {{@link #serialize(IonDatagram)}
* is already called.
*
* TODO To be deprecated along with {@link IonDatagram#getBytes(byte[])}
*
* @param dst the byte array into which bytes are to be written
*
* @return the number of bytes copied into {@code dst}
*/
int toNewByteArray(byte[] dst)
{
int length = myBuffer.length - myOffset;
System.arraycopy(myBuffer, myOffset, dst, 0, length);
return length;
}
/**
* Copies the current contents of the Ion binary-encoded byte array into a
* a given byte sub-array.
*
* The given sub-array must be large enough to contain all the bytes of the
* Ion binary-encoded byte array.
*
* This makes an unchecked assumption that {{@link #serialize(IonDatagram)}
* is already called.
*
* TODO To be deprecated along with {@link IonDatagram#getBytes(byte[], int)}
*
* @param dst the byte sub-array into which bytes are to be written
* @param offset the offset within the sub-array of the first byte to be
* written; must be non-negative and no larger
* than {@code dst.length}
*
* @return the number of bytes copied into {@code dst}
*/
int toNewByteArray(byte[] dst, int offset)
{
int length = myBuffer.length - myOffset;
System.arraycopy(myBuffer, myOffset, dst, offset, length);
return length;
}
/**
* Copies the current contents of the Ion binary-encoded byte array to a
* specified stream.
*
* This makes an unchecked assumption that {{@link #serialize(IonDatagram)}
* is already called.
*
* @return the number of bytes written into {@code out}
*
* @throws IOException
*/
int writeBytes(OutputStream out)
throws IOException
{
int length = myBuffer.length - myOffset;
byte[] bytes = new byte[length];
System.arraycopy(myBuffer, myOffset, bytes, 0, length);
out.write(bytes);
return length;
}
/**
* Serialize the IonDatagram into Ion binary-encoding, to the internal
* byte array buffer of the encoder.
*
* If the IonDatagram has been modified after this method call, you
* must call this method again to correctly reflect the
* modifications.
*
* @throws IonException
*/
void serialize(IonDatagram dg)
throws IonException
{
myIonSystem = dg.getSystem();
mySymbolTable = null;
// Write all top-level values in reverse
writeIonValue(dg);
// After all top-level values are written, write the local symbol table
// that is attached to the top-level value that has just been written,
// if it exists.
if (mySymbolTable != null && mySymbolTable.isLocalTable()) {
writeLocalSymbolTable(mySymbolTable);
}
// Write IVM
writeBytes(BINARY_VERSION_MARKER_1_0);
}
void serialize(SymbolTable symTab)
throws IonException
{
writeLocalSymbolTable(symTab);
}
/**
* Grows the current buffer and returns the updated offset.
*
* @param offset the original offset
* @return the updated offset
*/
private int growBuffer(int offset)
{
assert offset < 0;
byte[] oldBuf = myBuffer;
int oldLen = oldBuf.length;
byte[] newBuf = new byte[(-offset + oldLen) << 1]; // Double the buffer
int oldBegin = newBuf.length - oldLen;
System.arraycopy(oldBuf, 0, newBuf, oldBegin, oldLen);
myBuffer = newBuf;
myOffset += oldBegin;
return offset + oldBegin;
}
/**
* Writes the IonValue and its nested values recursively, including
* annotations.
*
* @param value
* @throws IonException
*/
private void writeIonValue(IonValue value)
throws IonException
{
final int valueOffset = myBuffer.length - myOffset;
switch (value.getType())
{
// scalars
case BLOB: writeIonBlobContent((IonBlob) value); break;
case BOOL: writeIonBoolContent((IonBool) value); break;
case CLOB: writeIonClobContent((IonClob) value); break;
case DECIMAL: writeIonDecimalContent((IonDecimal) value); break;
case FLOAT: writeIonFloatContent((IonFloat) value); break;
case INT: writeIonIntContent((IonInt) value); break;
case NULL: writeIonNullContent(); break;
case STRING: writeIonStringContent((IonString) value); break;
case SYMBOL: writeIonSymbolContent((IonSymbol) value); break;
case TIMESTAMP: writeIonTimestampContent((IonTimestamp) value); break;
// containers
case LIST: writeIonListContent((IonList) value); break;
case SEXP: writeIonSexpContent((IonSexp) value); break;
case STRUCT: writeIonStructContent((IonStruct) value); break;
// IonDatagram
case DATAGRAM: writeIonDatagramContent((IonDatagram) value); break;
default:
throw new IonException("IonType is unknown: " + value.getType());
}
writeAnnotations(value, valueOffset);
}
// =========================================================================
// Basic Field Formats (Primitive Fields)
// =========================================================================
private void writeByte(int b)
{
int offset = myOffset;
if (--offset < 0) {
offset = growBuffer(offset);
}
// Using narrowing primitive conversion from int to byte
myBuffer[offset] = (byte) b;
myOffset = offset;
}
private void writeBytes(byte[] bytes)
{
int length = bytes.length;
int offset = myOffset;
if ((offset -= length) < 0) {
offset = growBuffer(offset);
}
System.arraycopy(bytes, 0, myBuffer, offset, length);
myOffset = offset;
}
private void writeUInt(long v)
{
int offset = myOffset;
if (v < (1L << (8 * 1)))
{
if (--offset < 0) {
offset = growBuffer(offset);
}
myBuffer[offset] = (byte) v;
}
else if (v < (1L << (8 * 2)))
{
offset -= 2;
if (offset < 0) {
offset = growBuffer(offset);
}
myBuffer[offset] = (byte) (v >>> (8 * 1));
myBuffer[offset+1] = (byte) v;
}
else if (v < (1L << (8 * 3)))
{
offset -= 3;
if (offset < 0) {
offset = growBuffer(offset);
}
myBuffer[offset] = (byte) (v >>> (8 * 2));
myBuffer[offset+1] = (byte) (v >>> (8 * 1));
myBuffer[offset+2] = (byte) v;
}
else if (v < (1L << (8 * 4)))
{
offset -= 4;
if (offset < 0) {
offset = growBuffer(offset);
}
myBuffer[offset] = (byte) (v >>> (8 * 3));
myBuffer[offset+1] = (byte) (v >>> (8 * 2));
myBuffer[offset+2] = (byte) (v >>> (8 * 1));
myBuffer[offset+3] = (byte) v;
}
else if (v < (1L << (8 * 5)))
{
offset -= 5;
if (offset < 0) {
offset = growBuffer(offset);
}
myBuffer[offset] = (byte) (v >>> (8 * 4));
myBuffer[offset+1] = (byte) (v >>> (8 * 3));
myBuffer[offset+2] = (byte) (v >>> (8 * 2));
myBuffer[offset+3] = (byte) (v >>> (8 * 1));
myBuffer[offset+4] = (byte) v;
}
else if (v < (1L << (8 * 6)))
{
offset -= 6;
if (offset < 0) {
offset = growBuffer(offset);
}
myBuffer[offset] = (byte) (v >>> (8 * 5));
myBuffer[offset+1] = (byte) (v >>> (8 * 4));
myBuffer[offset+2] = (byte) (v >>> (8 * 3));
myBuffer[offset+3] = (byte) (v >>> (8 * 2));
myBuffer[offset+4] = (byte) (v >>> (8 * 1));
myBuffer[offset+5] = (byte) v;
}
else if (v < (1L << (8 * 7)))
{
offset -= 7;
if (offset < 0) {
offset = growBuffer(offset);
}
myBuffer[offset] = (byte) (v >>> (8 * 6));
myBuffer[offset+1] = (byte) (v >>> (8 * 5));
myBuffer[offset+2] = (byte) (v >>> (8 * 4));
myBuffer[offset+3] = (byte) (v >>> (8 * 3));
myBuffer[offset+4] = (byte) (v >>> (8 * 2));
myBuffer[offset+5] = (byte) (v >>> (8 * 1));
myBuffer[offset+6] = (byte) v;
}
else
{
offset -= 8;
if (offset < 0) {
offset = growBuffer(offset);
}
myBuffer[offset] = (byte) (v >>> (8 * 7));
myBuffer[offset+1] = (byte) (v >>> (8 * 6));
myBuffer[offset+2] = (byte) (v >>> (8 * 5));
myBuffer[offset+3] = (byte) (v >>> (8 * 4));
myBuffer[offset+4] = (byte) (v >>> (8 * 3));
myBuffer[offset+5] = (byte) (v >>> (8 * 2));
myBuffer[offset+6] = (byte) (v >>> (8 * 1));
myBuffer[offset+7] = (byte) v;
}
myOffset = offset;
}
/**
* Write a VarUInt field. VarUInts are sequence of bytes. The high-order
* bit of the last octet is one, indicating the end of the sequence. All
* other high-order bits must be zero.
*
* Writes at least one byte, even for zero values. int parameter is enough
* as the scalar and container writers do not have APIs that return long or
* BigInteger representations.
*
* @param v
*/
private void writeVarUInt(int v)
{
int offset = myOffset;
if (v < (1 << (7 * 1))) // 1 byte - 7 bits used - 0x7f max
{
if (--offset < 0) {
offset = growBuffer(offset);
}
myBuffer[offset] = (byte) (v | 0x80 );
}
else if (v < (1 << (7 * 2))) // 2 bytes - 14 bits used - 0x3fff max
{
if ((offset -= 2) < 0) {
offset = growBuffer(offset);
}
myBuffer[offset] = (byte) (v >>> (7 * 1));
myBuffer[offset + 1] = (byte) (v | 0x80);
}
else if (v < (1 << (7 * 3))) // 3 bytes - 21 bits used - 0x1fffff max
{
if ((offset -= 3) < 0) {
offset = growBuffer(offset);
}
myBuffer[offset] = (byte) ( v >>> (7 * 2));
myBuffer[offset + 1] = (byte) ((v >>> (7 * 1)) & 0x7f);
myBuffer[offset + 2] = (byte) ( v | 0x80);
}
else if (v < (1 << (7 * 4))) // 4 bytes - 28 bits used - 0xfffffff max
{
if ((offset -= 4) < 0) {
offset = growBuffer(offset);
}
myBuffer[offset] = (byte) ( v >>> (7 * 3));
myBuffer[offset + 1] = (byte) ((v >>> (7 * 2)) & 0x7f);
myBuffer[offset + 2] = (byte) ((v >>> (7 * 1)) & 0x7f);
myBuffer[offset + 3] = (byte) ( v | 0x80);
}
else // 5 bytes - 32 bits used - 0x7fffffff max (Integer.MAX_VALUE)
{
if ((offset -= 5) < 0) {
offset = growBuffer(offset);
}
myBuffer[offset] = (byte) ( v >>> (7 * 4));
myBuffer[offset + 1] = (byte) ((v >>> (7 * 3)) & 0x7f);
myBuffer[offset + 2] = (byte) ((v >>> (7 * 2)) & 0x7f);
myBuffer[offset + 3] = (byte) ((v >>> (7 * 1)) & 0x7f);
myBuffer[offset + 4] = (byte) ( v | 0x80);
}
myOffset = offset;
}
/**
* Write a VarInt field. VarInts are sequence of bytes. The high-order bit
* of the last octet is one, indicating the end of the sequence. All other
* high-order bits must be zero. The second-highest order bit (0x40) is a
* sign flag in the first octet of the representation, but part of the
* extension bits for all other octets.
*
* Writes at least one byte, even for zero values. int parameter is enough
* as the scalar and container writers do not have APIs that return long or
* BigInteger representations.
*
* @param v
*/
private void writeVarInt(int v)
{
if (v == 0)
{
writeByte(0x80);
}
else
{
int offset = myOffset;
boolean is_negative = (v < 0);
if (is_negative)
{
// note that for Integer.MIN_VALUE (0x80000000) the negative
// is the same, but that's also the bit pattern we need to
// write out - so no worries
v = -v;
}
if (v < (1 << (7 * 1 - 1))) // 1 byte - 6 bits used - 0x3f max
{
if (--offset < 0) {
offset = growBuffer(offset);
}
if (is_negative)
v |= 0x40;
myBuffer[offset] = (byte) (v | 0x80);
}
else if (v < (1 << (7 * 2 - 1))) // 2 bytes - 13 bits used - 0x1fff max
{
if ((offset -= 2) < 0) {
offset = growBuffer(offset);
}
if (is_negative)
v |= 0x2000;
myBuffer[offset] = (byte) (v >>> (7 * 1));
myBuffer[offset + 1] = (byte) (v | 0x80);
}
else if (v < (1 << (7 * 3 - 1))) // 3 bytes - 20 bits used - 0xfffff max
{
if ((offset -= 3) < 0) {
offset = growBuffer(offset);
}
if (is_negative)
v |= 0x100000;
myBuffer[offset] = (byte) ( v >>> (7 * 2));
myBuffer[offset + 1] = (byte) ((v >>> (7 * 1)) & 0x7f);
myBuffer[offset + 2] = (byte) ( v | 0x80);
}
else if (v < (1 << (7 * 4 - 1))) // 4 bytes - 27 bits used - 0x7ffffff max
{
if ((offset -= 4) < 0) {
offset = growBuffer(offset);
}
if (is_negative)
v |= 0x8000000;
myBuffer[offset] = (byte) ( v >>> (7 * 3));
myBuffer[offset + 1] = (byte) ((v >>> (7 * 2)) & 0x7f);
myBuffer[offset + 2] = (byte) ((v >>> (7 * 1)) & 0x7f);
myBuffer[offset + 3] = (byte) ( v | 0x80);
}
else // 5 bytes - 31 bits used - 0x7fffffff max (Integer.MAX_VALUE)
{
if ((offset -= 5) < 0) {
offset = growBuffer(offset);
}
// This is different from the previous if-blocks because we
// cannot represent a int with more than 32 bits to perform
// the "OR-assignment" (|=).
myBuffer[offset] = (byte) ((v >>> (7 * 4)) & 0x7f);
if (is_negative) {
myBuffer[offset] |= 0x40;
}
myBuffer[offset + 1] = (byte) ((v >>> (7 * 3)) & 0x7f);
myBuffer[offset + 2] = (byte) ((v >>> (7 * 2)) & 0x7f);
myBuffer[offset + 3] = (byte) ((v >>> (7 * 1)) & 0x7f);
myBuffer[offset + 4] = (byte) ( v | 0x80);
}
myOffset = offset;
}
}
// =========================================================================
// Type Descriptors
// =========================================================================
/**
* Writes the prefix (type and length) preceding the body of an encoded
* value. This method is only called after a value's body is
* written to the buffer.
*
* @param type
* the value's type, a four-bit high-nibble mask
* @param length
* the number of bytes (octets) in the body, excluding the prefix
* itself
*/
private void writePrefix(int type, int length)
{
if (length >= lnIsVarLen)
{
writeVarUInt(length);
length = lnIsVarLen;
}
int offset = myOffset;
if (--offset < 0) {
offset = growBuffer(offset);
}
myBuffer[offset] = (byte) (type | length);
myOffset = offset;
}
private void writeAnnotations(IonValue value, int endOfValueOffset)
{
SymbolToken[] annotationSymTokens = value.getTypeAnnotationSymbols();
if (annotationSymTokens.length > 0)
{
final int annotatedValueOffset = myBuffer.length - myOffset;
int sid;
for (int i = annotationSymTokens.length; --i >= 0;)
{
sid = findSid(annotationSymTokens[i]);
writeVarUInt(sid);
}
writeVarUInt(myBuffer.length - myOffset - annotatedValueOffset);
writePrefix(TYPE_ANNOTATIONS,
myBuffer.length - myOffset - endOfValueOffset);
}
}
// =========================================================================
// Scalars
// =========================================================================
private void writeIonNullContent()
{
// null.null
int encoded = TYPE_NULL | NULL_LENGTH_MASK;
writeByte(encoded);
}
private void writeIonBoolContent(IonBool val)
{
int encoded;
if (val.isNullValue())
{
encoded = TYPE_BOOL | NULL_LENGTH_MASK;
}
else
{
boolean b = val.booleanValue();
encoded = b ? (TYPE_BOOL | lnBooleanTrue) :
(TYPE_BOOL | lnBooleanFalse);
}
writeByte(encoded);
}
/**
* @see IonBinary#writeUIntValue(BigInteger value, int len)
* @param val
*/
private void writeIonIntContent(IonInt val)
{
if (val.isNullValue())
{
// NOTE: We are only writing the positive binary representation of
// null value here.
writeByte((byte) (TYPE_POS_INT | NULL_LENGTH_MASK));
}
else
{
BigInteger bigInt = val.bigIntegerValue();
int signum = bigInt.signum();
int type;
final int originalOffset = myBuffer.length - myOffset;
if (signum == 0)
{
// Zero has no bytes of data at all
writeByte((byte) TYPE_POS_INT);
return; // Finished writing IonInt as zero.
}
else if (signum < 0)
{
type = TYPE_NEG_INT;
bigInt = bigInt.negate();
}
else
{
type = TYPE_POS_INT;
}
// Check the value if it's smaller than a long, if so we can use a
// simpler routine to write the BigInteger value.
if (bigInt.compareTo(MAX_LONG_VALUE) < 0)
{
long lvalue = bigInt.longValue();
writeUInt(lvalue);
}
else
{
// BigInteger.toByteArray() returns a two's complement
// representation byte array. However, we have negated all
// negative BigInts to become positive BigInts, so essentially
// we don't have to convert the two's complement representation
// to sign-magnitude UInt.
byte[] bits = bigInt.toByteArray();
// BigInteger will pad this with a null byte sometimes
// for negative numbers. Let's skip past any leading null bytes.
int offset = 0;
while (offset < bits.length && bits[offset] == 0)
{
offset++;
}
int actualBitLength = bits.length - offset;
int bufferOffset = myOffset - actualBitLength;
if (bufferOffset < 0) {
bufferOffset = growBuffer(bufferOffset);
}
System.arraycopy(bits, offset, myBuffer, bufferOffset,
actualBitLength);
myOffset = bufferOffset;
}
writePrefix(type, myBuffer.length - myOffset - originalOffset);
}
}
private void writeIonFloatContent(IonFloat val)
{
if (val.isNullValue())
{
writeByte((byte) (TYPE_FLOAT | NULL_LENGTH_MASK));
}
else
{
// Write a 64-bit value in IEE-754 standard. This format happens to
// match the 8-byte UInt encoding.
long bits = Double.doubleToRawLongBits(val.doubleValue());
int offset = myOffset;
if ((offset -= 8) < 0) {
offset = growBuffer(offset);
}
myBuffer[offset] = (byte) (bits >>> (8 * 7));
myBuffer[offset + 1] = (byte) (bits >>> (8 * 6));
myBuffer[offset + 2] = (byte) (bits >>> (8 * 5));
myBuffer[offset + 3] = (byte) (bits >>> (8 * 4));
myBuffer[offset + 4] = (byte) (bits >>> (8 * 3));
myBuffer[offset + 5] = (byte) (bits >>> (8 * 2));
myBuffer[offset + 6] = (byte) (bits >>> (8 * 1));
myBuffer[offset + 7] = (byte) bits;
myOffset = offset;
writePrefix(TYPE_FLOAT, 8); // 64-bit IEE-754
}
}
private static final byte[] negativeZeroBitArray = new byte[] { (byte) 0x80 };
private static final byte[] positiveZeroBitArray = new byte[0];
/**
* @see com.amazon.ion.impl.IonBinary.Writer#writeDecimalContent
*/
private void writeIonDecimalContent(BigDecimal bd)
{
BigInteger mantissa = bd.unscaledValue();
byte[] mantissaBits;
switch (mantissa.signum())
{
case 0:
if (Decimal.isNegativeZero(bd))
{
mantissaBits = negativeZeroBitArray;
}
else
{
mantissaBits = positiveZeroBitArray;
}
break;
case -1:
// Obtain the unsigned value of the BigInteger
// We cannot use the twos complement representation of a
// negative BigInteger as this is different from the encoding
// of basic field Int.
mantissaBits = mantissa.negate().toByteArray();
// Set the sign on the highest order bit of the first octet
mantissaBits[0] |= 0x80;
break;
case 1:
mantissaBits = mantissa.toByteArray();
break;
default:
throw new IllegalStateException("mantissa signum out of range");
}
writeBytes(mantissaBits);
// Ion stores exponent, BigDecimal uses the negation 'scale' instead
int exponent = -bd.scale();
writeVarInt(exponent);
}
private void writeIonDecimalContent(IonDecimal val)
{
if (val.isNullValue())
{
writeByte((byte) (TYPE_DECIMAL | NULL_LENGTH_MASK));
}
else
{
final int originalOffset = myBuffer.length - myOffset;
writeIonDecimalContent(val.decimalValue());
writePrefix(TYPE_DECIMAL,
myBuffer.length - myOffset - originalOffset);
}
}
private void writeIonTimestampContent(IonTimestamp val)
{
if (val.isNullValue())
{
writeByte((byte) (TYPE_TIMESTAMP | NULL_LENGTH_MASK));
}
else
{
final int originalOffset = myBuffer.length - myOffset;
Timestamp t = val.timestampValue();
// Time and date portion
switch (t.getPrecision())
{
// Fall through each case - by design
case FRACTION:
case SECOND:
{
BigDecimal fraction = t.getZFractionalSecond();
if (fraction != null)
{
assert (fraction.signum() >= 0
&& ! fraction.equals(BigDecimal.ZERO))
: "Bad timestamp fraction: " + fraction;
writeIonDecimalContent(fraction);
}
writeVarUInt(t.getZSecond());
}
case MINUTE:
writeVarUInt(t.getZMinute());
writeVarUInt(t.getZHour());
case DAY:
writeVarUInt(t.getZDay());
case MONTH:
writeVarUInt(t.getZMonth());
case YEAR:
writeVarUInt(t.getZYear());
break;
default:
throw new IllegalStateException(
"unrecognized Timestamp precision: " +
t.getPrecision());
}
// Offset portion
Integer offset = t.getLocalOffset();
if (offset == null)
{
writeByte((byte) (0x80 | 0x40)); // Negative 0 (no timezone)
}
else
{
writeVarInt(offset.intValue());
}
writePrefix(TYPE_TIMESTAMP,
myBuffer.length - myOffset - originalOffset);
}
}
private void writeIonSymbolContent(IonSymbol val)
{
if (val.isNullValue())
{
writeByte((byte) (TYPE_SYMBOL | NULL_LENGTH_MASK));
}
else
{
final int originalOffset = myBuffer.length - myOffset;
SymbolToken symToken = val.symbolValue();
int sid = findSid(symToken);
writeUInt(sid);
writePrefix(TYPE_SYMBOL,
myBuffer.length - myOffset - originalOffset);
}
}
private void writeIonStringContent(IonString val)
{
if (val.isNullValue())
{
writeByte((byte) (TYPE_STRING | NULL_LENGTH_MASK));
}
else
{
writeIonStringContent(val.stringValue());
}
}
private void writeIonStringContent(String str)
{
int strlen = str.length();
byte[] buffer = myBuffer;
int offset = myOffset;
// The number of UTF-8 code units (bytes) we will write is at least as
// large as the number of UTF-16 code units (ints) that are in the
// input string. Ensure we have at least that much capacity, to reduce
// the number of times we need to grow the buffer.
offset -= strlen;
if (offset < 0)
{
offset = growBuffer(offset);
buffer = myBuffer;
}
offset += strlen;
// Optimize for ASCII, under the assumption that it happens a lot.
// This fits within the capacity allocated above, so we don't have to
// grow the buffer within this loop.
int i = strlen - 1;
for (; i >= 0; --i)
{
int c = str.charAt(i);
if (!(c <= 0x7f))
break;
buffer[--offset] = (byte) c;
}
for (; i >= 0; --i)
{
int c = str.charAt(i);
if (c <= 0x7f) // U+0000 to U+007f codepoints
{
if (--offset < 0)
{
offset = growBuffer(offset);
buffer = myBuffer;
}
buffer[offset] = (byte) c;
}
else if (c <= 0x7ff) // U+0080 to U+07ff codepoints
{
if ((offset -= 2) < 0)
{
offset = growBuffer(offset);
buffer = myBuffer;
}
buffer[offset] = (byte) (0xc0 | ((c >> 6) & 0x1f));
buffer[offset + 1] = (byte) (0x80 | (c & 0x3f));
}
else if (c >= 0xd800 && c <= 0xdfff) // Surrogate!
{
// high surrogate not followed by low surrogate
if (c <= 0xdbff)
{
throw new IonException("invalid string, unpaired high surrogate character");
}
// string starts with low surrogate
if (i == 0)
{
throw new IonException("invalid string, unpaired low surrogate character");
}
// low surrogate not preceded by high surrogate
// charAt(--i) is never out of bounds as i == 0 is asserted to
// be false in previous if-block
int c2 = str.charAt(--i);
if (!(c2 >= 0xd800 && c2 <= 0xdbff))
{
throw new IonException("invalid string, unpaired low surrogate character");
}
// valid surrogate pair: (c2, c)
int codepoint = 0x10000 + (((c2 & 0x3ff) << 10) | (c & 0x3ff));
if ((offset -= 4) < 0)
{
offset = growBuffer(offset);
buffer = myBuffer;
}
buffer[offset] = (byte) (0xF0 | ((codepoint >> 18) & 0x07));
buffer[offset + 1] = (byte) (0x80 | ((codepoint >> 12) & 0x3F));
buffer[offset + 2] = (byte) (0x80 | ((codepoint >> 6) & 0x3F));
buffer[offset + 3] = (byte) (0x80 | ((codepoint >> 0) & 0x3F));
}
else // U+0800 to U+D7FF and U+E000 to U+FFFF codepoints
{
if ((offset -= 3) < 0)
{
offset = growBuffer(offset);
buffer = myBuffer;
}
buffer[offset] = (byte) (0xE0 | ((c >> 12) & 0x0F));
buffer[offset + 1] = (byte) (0x80 | ((c >> 6) & 0x3F));
buffer[offset + 2] = (byte) (0x80 | (c & 0x3F));
}
}
int length = myOffset - offset;
myOffset = offset;
writePrefix(TYPE_STRING, length);
}
private void writeIonClobContent(IonClob val)
{
if (val.isNullValue())
{
writeByte((byte) (TYPE_CLOB | NULL_LENGTH_MASK));
}
else
{
byte[] lob = val.getBytes();
writeLobContent(lob);
writePrefix(TYPE_CLOB, lob.length);
}
}
private void writeIonBlobContent(IonBlob val)
{
if (val.isNullValue())
{
writeByte((byte) (TYPE_BLOB | NULL_LENGTH_MASK));
}
else
{
byte[] lob = val.getBytes();
writeLobContent(lob);
writePrefix(TYPE_BLOB, lob.length);
}
}
private void writeLobContent(byte[] lob)
{
int length = lob.length;
int offset = myOffset - length;
if (offset < 0) {
offset = growBuffer(offset);
}
System.arraycopy(lob, 0, myBuffer, offset, length);
myOffset = offset;
}
// =========================================================================
// Containers
// =========================================================================
private void writeIonListContent(IonList val)
{
if (val.isNullValue())
{
writeByte((byte) (TYPE_LIST | NULL_LENGTH_MASK));
}
else
{
writeIonSequenceContent(val);
}
}
private void writeIonSexpContent(IonSexp val)
{
if (val.isNullValue())
{
writeByte((byte) (TYPE_SEXP | NULL_LENGTH_MASK));
}
else
{
writeIonSequenceContent(val);
}
}
private void writeIonSequenceContent(IonSequence seq)
{
final int originalOffset = myBuffer.length - myOffset;
IonValue[] values = seq.toArray();
for (int i = values.length; --i >= 0;)
{
writeIonValue(values[i]);
}
switch (seq.getType())
{
case LIST:
writePrefix(TYPE_LIST,
myBuffer.length - myOffset - originalOffset);
break;
case SEXP:
writePrefix(TYPE_SEXP,
myBuffer.length - myOffset - originalOffset);
break;
default:
throw new IonException(
"cannot identify instance of IonSequence");
}
}
private void writeIonStructContent(IonStruct val)
{
if (val.isNullValue())
{
writeByte((byte) (TYPE_STRUCT | NULL_LENGTH_MASK));
}
else
{
final int originalOffset = myBuffer.length - myOffset;
// TODO amazon-ion/ion-java/issues/31 should not preserve the ordering of fields
ArrayList values = new ArrayList();
// Fill ArrayList with IonValues, the add() just copies the
// references of the IonValues
for (IonValue curr : val)
{
values.add(curr);
}
for (int i = values.size(); --i >= 0; )
{
IonValue v = values.get(i);
SymbolToken symToken = v.getFieldNameSymbol();
writeIonValue(v);
int sid = findSid(symToken);
writeVarUInt(sid);
}
// TODO amazon-ion/ion-java/issues/41 Detect if the struct fields are sorted in ascending
// order of Sids. If so, 1 should be written into 'length' field.
// Note that this 'length' field is not the same as the four-bit
// length L in the type descriptor octet.
writePrefix(TYPE_STRUCT,
myBuffer.length - myOffset - originalOffset);
}
}
private void writeIonDatagramContent(IonDatagram dg)
{
ListIterator reverseIter = dg.listIterator(dg.size());
while (reverseIter.hasPrevious())
{
IonValue currentTopLevelValue = reverseIter.previous();
checkLocalSymbolTablePlacement(currentTopLevelValue);
writeIonValue(currentTopLevelValue);
}
}
// =========================================================================
// Symbol Tables
// =========================================================================
private int findSid(SymbolToken symToken)
{
int sid = symToken.getSid();
String text = symToken.getText();
if (sid != UNKNOWN_SYMBOL_ID) // sid is assigned
{
assert text == null ||
text.equals(mySymbolTable.findKnownSymbol(sid));
}
else // sid is not assigned
{
if (mySymbolTable.isSystemTable())
{
// Replace current symtab with a new local symbol table
// using the default system symtab
mySymbolTable = myIonSystem.newLocalSymbolTable();
}
// Intern the new symbol and get its assigned sid
sid = mySymbolTable.intern(text).getSid();
}
return sid;
}
/**
* Determine if the local symbol table attached to the previous top-level
* value (TLV), {@link #mySymbolTable}, needs to be encoded before the
* next TLV is encoded. This is called before encoding each
* TLV by {@link #writeIonValue(IonValue)}.
*
* Connotations of "Previous TLV" and "Next TLV" in this method are
* different from those defined outside of this method.
* This is done on purpose within this method to facilitate a clear
* understanding of what is going on within this method.
*
* - "Previous top-level value" refers to the top-level IonValue that
* has already been encoded into the buffer.
*
- "Next top-level value" refers to the top-level IonValue that
* is about to be encoded to the buffer. Its contents are not
* traversed yet.
*
- "Previous symbol table" refers to previous TLV's symbol table.
*
- "Next symbol table" refers to next TLV's symbol table.
*
*
* Local symbol tables and IVMs can be interspersed within an IonDatagram.
* This method checks for such cases by looking at the next symtab and
* previous symtab.
*
* The following 4 cases define the scenarios where a LST/IVM is
* written to the buffer:
*
* Next symtab is a local table:
*
* - Previous symtab is a local table - write LST if the two symtabs
* are different references
*
- Previous symtab is a system table - write IVM always
*
*
* Next symtab is a system table:
*
* - Previous symtab is a local table - propagate LST upwards
*
- Previous symtab is a system table - write IVM if the two symtabs
* have different Ion versions.
*
*
* TODO amazon-ion/ion-java/issues/25 Currently, {@link IonDatagram#systemIterator()} doesn't
* retain information about interspersed IVMs within the IonDatagram.
* As such, we cannot obtain the location of interspersed IVMs, if any.
*
* @param nextTopLevelValue the next top-level IonValue to be encoded
*/
private void checkLocalSymbolTablePlacement(IonValue nextTopLevelValue)
{
// Check that nextTopLevelValue is indeed a top-level value
assert nextTopLevelValue == nextTopLevelValue.topLevelValue();
SymbolTable nextSymTab = nextTopLevelValue.getSymbolTable();
if (nextSymTab == null) {
throw new IllegalStateException(
"Binary reverse encoder isn't using LiteImpl");
}
if (mySymbolTable == null) {
// There is no current symtab, i.e. there wasn't any TLV encoded
// before this, return and continue encoding next TLV.
mySymbolTable = nextSymTab;
return;
}
assert nextSymTab.isLocalTable() || nextSymTab.isSystemTable();
if (nextSymTab.isLocalTable())
{
if (mySymbolTable.isSystemTable())
{
writeBytes(BINARY_VERSION_MARKER_1_0);
mySymbolTable = nextSymTab;
}
// mySymbolTable is local
else if (nextSymTab != mySymbolTable)
{
writeLocalSymbolTable(mySymbolTable);
mySymbolTable = nextSymTab;
}
}
// nextSymTab is system
else if (mySymbolTable.isSystemTable() &&
!mySymbolTable.getIonVersionId().equals(nextSymTab.getIonVersionId()))
{
writeBytes(BINARY_VERSION_MARKER_1_0);
mySymbolTable = nextSymTab;
}
}
/**
* Write contents of a local symbol table as a struct.
* The contents are the IST:: annotation, declared symbols, and import
* declarations (that refer to shared symtabs) if they exist.
*
* @param symTab the local symbol table, not shared, not system
*/
private void writeLocalSymbolTable(SymbolTable symTab)
{
assert symTab.isLocalTable();
final int originalOffset = myBuffer.length - myOffset;
// Write declared local symbol strings if any exists
writeSymbolsField(symTab);
// Write import declarations if any exists
writeImportsField(symTab);
// Write the struct prefix
writePrefix(TYPE_STRUCT, myBuffer.length - myOffset - originalOffset);
// Write the $ion_symbol_table annotation
byte[] ionSymbolTableByteArray = {
(byte) (0x80 | 1), /* annot-length */
(byte) (0x80 | ION_SYMBOL_TABLE_SID) /* annot */
};
writeBytes(ionSymbolTableByteArray);
writePrefix(TYPE_ANNOTATIONS,
myBuffer.length - myOffset - originalOffset);
}
/**
* Write a single import declaration (which refers to a shared SymbolTable).
*
* @param symTab the shared symbol table, not local, not system
*/
private void writeImport(SymbolTable symTab)
{
assert symTab.isSharedTable();
final int originalOffset = myBuffer.length - myOffset;
// Write the maxId as int
int maxId = symTab.getMaxId();
if (maxId == 0) {
writeByte((byte) TYPE_POS_INT);
} else {
writeUInt(maxId);
writePrefix(TYPE_POS_INT,
myBuffer.length - myOffset - originalOffset);
}
// Write the "max_id" field name
writeByte((byte) (0x80 | MAX_ID_SID));
final int maxIdOffset = myBuffer.length - myOffset;
// Write the version as int (version will be at least one)
int version = symTab.getVersion();
writeUInt(version);
writePrefix(TYPE_POS_INT, myBuffer.length - myOffset - maxIdOffset);
// Write the "version" field name
writeByte((byte) (0x80 | VERSION_SID));
// Write the name as string
String name = symTab.getName();
writeIonStringContent(name);
// Write the "name" field name
writeByte((byte) (0x80 | NAME_SID));
// Write the struct prefix
writePrefix(TYPE_STRUCT, myBuffer.length - myOffset - originalOffset);
}
/**
* Write import declarations (which refer to shared symbol tables) if any
* exists.
*
* @param symTab the local symbol table, not shared, not system
*/
private void writeImportsField(SymbolTable symTab)
{
// SymbolTable[] holds accurate information, i.e. it contains the
// actual import declaration information, through the means of
// substitute tables if an exact match was not found by the catalog.
SymbolTable[] sharedSymTabs = symTab.getImportedTables();
if (sharedSymTabs.length == 0)
{
return;
}
final int importsOffset = myBuffer.length - myOffset;
for (int i = sharedSymTabs.length; --i >= 0;)
{
writeImport(sharedSymTabs[i]);
}
writePrefix(TYPE_LIST, myBuffer.length - myOffset - importsOffset);
writeByte((byte) (0x80 | IMPORTS_SID));
}
/**
* Write declared local symbol names if any exists.
*
* @param symTab the local symbol table, not shared, not system
*/
private void writeSymbolsField(SymbolTable symTab)
{
// SymbolTable's APIs doesn't expose an Iterator to traverse declared
// symbol strings in reverse order. As such, we utilize these two
// indexes to traverse the strings in reverse.
int importedMaxId = symTab.getImportedMaxId();
int maxId = symTab.getMaxId();
if (importedMaxId == maxId) {
// There are no declared local symbols
return;
}
final int originalOffset = myBuffer.length - myOffset;
for (int i = maxId; i > importedMaxId; i--)
{
String str = symTab.findKnownSymbol(i);
if (str == null) {
writeByte((byte) (TYPE_STRING | NULL_LENGTH_MASK));
} else {
writeIonStringContent(str);
}
}
writePrefix(TYPE_LIST, myBuffer.length - myOffset - originalOffset);
writeByte((byte) (0x80 | SYMBOLS_SID));
}
}