All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.amazon.ion.impl._Private_IonTextAppender Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2007-2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

package com.amazon.ion.impl;

import static com.amazon.ion.impl._Private_IonConstants.MAX_LONG_TEXT_SIZE;
import static com.amazon.ion.impl._Private_IonConstants.isHighSurrogate;
import static com.amazon.ion.impl._Private_IonConstants.isLowSurrogate;
import static com.amazon.ion.impl._Private_IonConstants.makeUnicodeScalar;

import com.amazon.ion.Decimal;
import com.amazon.ion.impl.Base64Encoder.TextStream;
import com.amazon.ion.system.IonTextWriterBuilder;
import com.amazon.ion.util._Private_FastAppendable;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.Flushable;
import java.io.IOException;
import java.io.OutputStream;
import java.math.BigDecimal;
import java.math.BigInteger;
import java.nio.CharBuffer;
import java.nio.charset.Charset;


/**
 * NOT FOR APPLICATION USE!
 * 

* Generic text sink that enables optimized output of both ASCII and UTF-16. */ public final class _Private_IonTextAppender implements Closeable, Flushable { private static boolean is8bitValue(int v) { return (v & ~0xff) == 0; } private static boolean isDecimalDigit(int codePoint) { return (codePoint >= '0' && codePoint <= '9'); } private static final boolean[] IDENTIFIER_START_CHAR_FLAGS; private static final boolean[] IDENTIFIER_FOLLOW_CHAR_FLAGS; static { IDENTIFIER_START_CHAR_FLAGS = new boolean[256]; IDENTIFIER_FOLLOW_CHAR_FLAGS = new boolean[256]; for (int ii='a'; ii<='z'; ii++) { IDENTIFIER_START_CHAR_FLAGS[ii] = true; IDENTIFIER_FOLLOW_CHAR_FLAGS[ii] = true; } for (int ii='A'; ii<='Z'; ii++) { IDENTIFIER_START_CHAR_FLAGS[ii] = true; IDENTIFIER_FOLLOW_CHAR_FLAGS[ii] = true; } IDENTIFIER_START_CHAR_FLAGS ['_'] = true; IDENTIFIER_FOLLOW_CHAR_FLAGS['_'] = true; IDENTIFIER_START_CHAR_FLAGS ['$'] = true; IDENTIFIER_FOLLOW_CHAR_FLAGS['$'] = true; for (int ii='0'; ii<='9'; ii++) { IDENTIFIER_FOLLOW_CHAR_FLAGS[ii] = true; } } public static boolean isIdentifierStart(int codePoint) { return IDENTIFIER_START_CHAR_FLAGS[codePoint & 0xff] && is8bitValue(codePoint); } public static boolean isIdentifierPart(int codePoint) { return IDENTIFIER_FOLLOW_CHAR_FLAGS[codePoint & 0xff] && is8bitValue(codePoint); } public static final boolean[] OPERATOR_CHAR_FLAGS; static { final char[] operatorChars = { '<', '>', '=', '+', '-', '*', '&', '^', '%', '~', '/', '?', '.', ';', '!', '|', '@', '`', '#' }; OPERATOR_CHAR_FLAGS = new boolean[256]; for (int ii=0; iinull, true, or false. *

* This does not check for non-identifier keywords such as * null.int. * * @param text the symbol to check. * @return true if the text is an identifier keyword. */ public static boolean isIdentifierKeyword(CharSequence text) { int pos = 0; int valuelen = text.length(); if(valuelen == 0) { return false; } boolean keyword = false; // there has to be at least 1 character or we wouldn't be here switch (text.charAt(pos++)) { case '$': if (valuelen == 1) return false; while (pos < valuelen) { char c = text.charAt(pos++); if (! isDecimalDigit(c)) return false; } return true; case 'f': if (valuelen == 5 // 'f' && text.charAt(pos++) == 'a' && text.charAt(pos++) == 'l' && text.charAt(pos++) == 's' && text.charAt(pos++) == 'e' ) { keyword = true; } break; case 'n': if (valuelen == 4 // 'n' && text.charAt(pos++) == 'u' && text.charAt(pos++) == 'l' && text.charAt(pos++) == 'l' ) { keyword = true; } else if (valuelen == 3 // 'n' && text.charAt(pos++) == 'a' && text.charAt(pos++) == 'n' ) { keyword = true; } break; case 't': if (valuelen == 4 // 't' && text.charAt(pos++) == 'r' && text.charAt(pos++) == 'u' && text.charAt(pos++) == 'e' ) { keyword = true; } break; } return keyword; } /** * Determines whether the text of a symbol requires (single) quotes. * * @param symbol must be a non-empty string. * @param quoteOperators indicates whether the caller wants operators to be * quoted; if true then operator symbols like != * will return true. * has looser quoting requirements than other containers. * @return true if the given symbol requires quoting. * * @throws NullPointerException * if symbol is null. */ public static boolean symbolNeedsQuoting(CharSequence symbol, boolean quoteOperators) { int length = symbol.length(); // If the symbol's text matches an Ion keyword or it's an empty symbol, we must quote it. // Eg, the symbol 'false' and '' must be rendered quoted. if(length == 0 || isIdentifierKeyword(symbol)) { return true; } char c = symbol.charAt(0); // Surrogates are neither identifierStart nor operatorPart, so the // first one we hit will fall through and return true. // TODO test that if (!quoteOperators && isOperatorPart(c)) { for (int ii = 0; ii < length; ii++) { c = symbol.charAt(ii); // We don't need to look for escapes since all // operator characters are ASCII. if (!isOperatorPart(c)) { return true; } } return false; } else if (isIdentifierStart(c)) { for (int ii = 0; ii < length; ii++) { c = symbol.charAt(ii); if ((c == '\'' || c < 32 || c > 126) || !isIdentifierPart(c)) { return true; } } return false; } // quote by default return true; } /** * Print an Ion Symbol type. This method will check if symbol needs quoting * @param text * @throws IOException */ public final void printSymbol(CharSequence text) throws IOException { if (text == null) { appendAscii("null.symbol"); } else if (symbolNeedsQuoting(text, true)) { appendAscii('\''); printCodePoints(text, SYMBOL_ESCAPE_CODES); appendAscii('\''); } else { appendAscii(text); } } /** * Print single-quoted Ion Symbol type * @param text * @throws IOException */ public final void printQuotedSymbol(CharSequence text) throws IOException { if (text == null) { appendAscii("null.symbol"); } else { appendAscii('\''); printCodePoints(text, SYMBOL_ESCAPE_CODES); appendAscii('\''); } } private final void printCodePoints(CharSequence text, String[] escapes) throws IOException { int len = text.length(); for (int i = 0; i < len; ++i) { // Find a span of non-escaped ASCII code points so we can write // them as quickly as possible. char c = 0; int j; for (j = i; j < len; ++j) { c = text.charAt(j); // The escapes array always includes U+80 through U+FF. if (c >= 0x100 || escapes[c] != null) { // c is escaped and/or outside ASCII range. if (j > i) { appendAscii(text, i, j); i = j; } break; } } if (j == len) { // we've reached the end of sequence; append it and break appendAscii(text, i, j); break; } // We've found a code point that's escaped and/or non-ASCII. if (c < 0x80) { // An escaped ASCII character. assert escapes[c] != null; appendAscii(escapes[c]); } else if (c < 0x100) { // Non-ASCII LATIN-1; we will have an escape sequence but may // not use it. assert escapes[c] != null; // Always escape the C1 control codes U+80 through U+9F. if (escapeNonAscii || c <= 0x9F) { appendAscii(escapes[c]); } else { appendUtf16(c); } } else if (c < 0xD800 || c >= 0xE000) { // Not LATIN-1, but still in the BMP. String s = Integer.toHexString(c); if (escapeNonAscii) { appendAscii(HEX_4_PREFIX); appendAscii(ZERO_PADDING[4 - s.length()]); appendAscii(s); } else { appendUtf16(c); } } else if (isHighSurrogate(c)) { // Outside the BMP! High surrogate must be followed by low. char c2; if (++i == len || !isLowSurrogate(c2 = text.charAt(i))) { String message = "text is invalid UTF-16. It contains an unmatched " + "leading surrogate 0x" + Integer.toHexString(c) + " at index " + (i-1); throw new IllegalArgumentException(message); } if (escapeNonAscii) { int cp = makeUnicodeScalar(c, c2); String s = Integer.toHexString(cp); appendAscii(HEX_8_PREFIX); appendAscii(ZERO_PADDING[8 - s.length()]); appendAscii(s); } else { appendUtf16Surrogate(c, c2); } } else { // unmatched low surrogate assert isLowSurrogate(c); String message = "text is invalid UTF-16. It contains an unmatched " + "trailing surrogate 0x" + Integer.toHexString(c) + " at index " + i; throw new IllegalArgumentException(message); } } } //========================================================================= // Numeric scalars /** ONLY FOR USE BY {@link #printInt(long)}. */ private final char[] _fixedIntBuffer = new char[MAX_LONG_TEXT_SIZE]; public void printInt(long value) throws IOException { int j = _fixedIntBuffer.length; if (value == 0) { _fixedIntBuffer[--j] = '0'; } else { if (value < 0) { while (value != 0) { _fixedIntBuffer[--j] = (char)(0x30 - value % 10); value /= 10; } _fixedIntBuffer[--j] = '-'; } else { while (value != 0) { _fixedIntBuffer[--j] = (char)(0x30 + value % 10); value /= 10; } } } // Using CharBuffer avoids copying the _fixedIntBuffer into a String appendAscii(CharBuffer.wrap(_fixedIntBuffer), j, _fixedIntBuffer.length); } public void printInt(BigInteger value) throws IOException { if (value == null) { appendAscii("null.int"); return; } appendAscii(bigIntegerToString(value)); } public void printDecimal(_Private_IonTextWriterBuilder _options, BigDecimal value) throws IOException { if (value == null) { appendAscii("null.decimal"); return; } BigInteger unscaled = value.unscaledValue(); int signum = value.signum(); if (signum < 0) { appendAscii('-'); unscaled = unscaled.negate(); } else if (value instanceof Decimal && ((Decimal)value).isNegativeZero()) { // for the various forms of negative zero we have to // write the sign ourselves, since neither BigInteger // nor BigDecimal recognize negative zero, but Ion does. appendAscii('-'); } final String unscaledText = bigIntegerToString(unscaled); final int significantDigits = unscaledText.length(); final int scale = value.scale(); final int exponent = -scale; if (_options._decimal_as_float) { appendAscii(unscaledText); appendAscii('e'); appendAscii(Integer.toString(exponent)); } else if (exponent == 0) { appendAscii(unscaledText); appendAscii('.'); } else if (exponent < 0) { // Avoid printing small negative exponents using a heuristic // adapted from http://speleotrove.com/decimal/daconvs.html final int adjustedExponent = significantDigits - 1 - scale; if (adjustedExponent >= 0) { int wholeDigits = significantDigits - scale; appendAscii(unscaledText, 0, wholeDigits); appendAscii('.'); appendAscii(unscaledText, wholeDigits, significantDigits); } else if (adjustedExponent >= -6) { appendAscii("0."); appendAscii("00000", 0, scale - significantDigits); appendAscii(unscaledText); } else { appendAscii(unscaledText); appendAscii("d-"); appendAscii(Integer.toString(scale)); } } else // (exponent > 0) { // We cannot move the decimal point to the right, adding // rightmost zeros, because that would alter the precision. appendAscii(unscaledText); appendAscii('d'); appendAscii(Integer.toString(exponent)); } } public void printFloat(_Private_IonTextWriterBuilder _options, double value) throws IOException { // shortcut zero cases if (value == 0.0) { if (Double.compare(value, 0d) == 0) // Only matches positive zero { appendAscii("0e0"); } else { appendAscii("-0e0"); } } else if (Double.isNaN(value)) { if (_options._float_nan_and_inf_as_null) { appendAscii("null"); } else { appendAscii("nan"); } } else if (value == Double.POSITIVE_INFINITY) { if (_options._float_nan_and_inf_as_null) { appendAscii("null"); } else { appendAscii("+inf"); } } else if (value == Double.NEGATIVE_INFINITY) { if (_options._float_nan_and_inf_as_null) { appendAscii("null"); } else { appendAscii("-inf"); } } else { // Double.toString() forces a digit after the decimal point. // Remove it when it's not meaningful. String str = Double.toString(value); if (str.endsWith(".0")) { appendAscii(str, 0, str.length() - 2); appendAscii("e0"); } else { appendAscii(str); if (str.indexOf('E') == -1) { appendAscii("e0"); } } } } public void printFloat(_Private_IonTextWriterBuilder _options, Double value) throws IOException { if (value == null) { appendAscii("null.float"); } else { printFloat(_options, value.doubleValue()); } } //========================================================================= // LOBs public void printBlob(_Private_IonTextWriterBuilder _options, byte[] value, int start, int len) throws IOException { if (value == null) { appendAscii("null.blob"); return; } @SuppressWarnings("resource") TextStream ts = new TextStream(new ByteArrayInputStream(value, start, len)); // base64 encoding is 6 bits per char so // it evens out at 3 bytes in 4 characters char[] buf = new char[_options.isPrettyPrintOn() ? 80 : 400]; CharBuffer cb = CharBuffer.wrap(buf); if (_options._blob_as_string) { appendAscii('"'); } else { appendAscii("{{"); if (_options.isPrettyPrintOn()) { appendAscii(' '); } } for (;;) { // TODO is it better to fill up the CharBuffer before outputting? int clen = ts.read(buf, 0, buf.length); if (clen < 1) break; appendAscii(cb, 0, clen); } if (_options._blob_as_string) { appendAscii('"'); } else { if (_options.isPrettyPrintOn()) { appendAscii(' '); } appendAscii("}}"); } } private void printClobBytes(byte[] value, int start, int end, String[] escapes) throws IOException { for (int i = start; i < end; i++) { char c = (char)(value[i] & 0xff); String escapedByte = escapes[c]; if (escapedByte != null) { appendAscii(escapedByte); } else { appendAscii(c); } } } public void printClob(_Private_IonTextWriterBuilder _options, byte[] value, int start, int len) throws IOException { if (value == null) { appendAscii("null.clob"); return; } final boolean json = _options._clob_as_string && _options._string_as_json; final int threshold = _options.getLongStringThreshold(); final boolean longString = (0 < threshold && threshold < value.length); if (!_options._clob_as_string) { appendAscii("{{"); if (_options.isPrettyPrintOn()) { appendAscii(' '); } } if (json) { appendAscii('"'); printClobBytes(value, start, start + len, JSON_ESCAPE_CODES); appendAscii('"'); } else if (longString) { // This may escape more often than is necessary, but doing it // minimally is very tricky. Must be sure to account for // quotes at the end of the content. // TODO Account for NL versus CR+NL streams appendAscii(TRIPLE_QUOTES); printClobBytes(value, start, start + len, LONG_STRING_ESCAPE_CODES); appendAscii(TRIPLE_QUOTES); } else { appendAscii('"'); printClobBytes(value, start, start + len, STRING_ESCAPE_CODES); appendAscii('"'); } if (! _options._clob_as_string) { if (_options.isPrettyPrintOn()) { appendAscii(' '); } appendAscii("}}"); } } /** * Convert {@link BigInteger} to a {@link String}. *

* The current implementation of {@link BigInteger#toString()} is suboptimal * speed-wise, as it uses non-native division for arbitrary-sized integers * to convert the binary representation to a string. * This is inefficient for integers that can fit into a {@link Long}, where * native division can be used to convert the long to its string representation, * which is currently implemented in {@link Long#toString()}. *

* This method delegates to {@link Long#toString()} if it's possible to * do so, which results in a speedup from 2x to 5x. * * @param value the integer to convert * @return the string representation in base 10 */ private String bigIntegerToString(final BigInteger value) { if (value.bitLength() >= 64) { // if it's out of long range, the only way is through toString() return value.toString(); } else { return Long.toString(value.longValue()); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy