All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.fasterxml.jackson.core.io.JsonStringEncoder Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
package com.fasterxml.jackson.core.io;

import java.io.IOException;
import java.util.Arrays;

import com.fasterxml.jackson.core.util.ByteArrayBuilder;
import com.fasterxml.jackson.core.util.TextBuffer;

/**
 * Helper class used for efficient encoding of JSON String values (including
 * JSON field names) into Strings or UTF-8 byte arrays.
 *

* Note that methods in here are somewhat optimized, but not ridiculously so. * Reason is that conversion method results are expected to be cached so that * these methods will not be hot spots during normal operation. */ public final class JsonStringEncoder { /* /********************************************************************** /* Constants /********************************************************************** */ private final static char[] HC = CharTypes.copyHexChars(true); private final static byte[] HB = CharTypes.copyHexBytes(true); private final static int SURR1_FIRST = 0xD800; private final static int SURR1_LAST = 0xDBFF; private final static int SURR2_FIRST = 0xDC00; private final static int SURR2_LAST = 0xDFFF; // 18-Aug-2021, tatu: [core#712] Change to more dynamic allocation; try // to estimate ok initial encoding buffer, switch to segmented for // possible (but rare) big content final static int MIN_CHAR_BUFFER_SIZE = 16; final static int MAX_CHAR_BUFFER_SIZE = 32000; // use segments beyond final static int MIN_BYTE_BUFFER_SIZE = 24; final static int MAX_BYTE_BUFFER_SIZE = 32000; // use segments beyond /* /********************************************************************** /* Construction, instance access /********************************************************************** */ // Since 2.10 we have stateless singleton and NO fancy ThreadLocal/SofRef caching!!! private final static JsonStringEncoder instance = new JsonStringEncoder(); public JsonStringEncoder() { } /** * Factory method for getting an instance; this is either recycled per-thread instance, * or a newly constructed one. * * @return Static stateless encoder instance */ public static JsonStringEncoder getInstance() { return instance; } /* /********************************************************************** /* Public API /********************************************************************** */ /** * Method that will escape text contents using JSON standard escaping, * and return results as a character array. * * @param input Value String to process * * @return JSON-escaped String matching {@code input} */ public char[] quoteAsString(String input) { final int inputLen = input.length(); char[] outputBuffer = new char[_initialCharBufSize(inputLen)]; final int[] escCodes = CharTypes.get7BitOutputEscapes(); final int escCodeCount = escCodes.length; int inPtr = 0; TextBuffer textBuffer = null; int outPtr = 0; char[] qbuf = null; outer: while (inPtr < inputLen) { tight_loop: while (true) { char c = input.charAt(inPtr); if (c < escCodeCount && escCodes[c] != 0) { break tight_loop; } if (outPtr >= outputBuffer.length) { if (textBuffer == null) { textBuffer = TextBuffer.fromInitial(outputBuffer); } try { outputBuffer = textBuffer.finishCurrentSegment(); } catch (IOException e) { // IOException won't happen here, can only occur when ReadConstrainedTextBuffer is used throw new IllegalStateException(e); } outPtr = 0; } outputBuffer[outPtr++] = c; if (++inPtr >= inputLen) { break outer; } } // something to escape; 2 or 6-char variant? if (qbuf == null) { qbuf = _qbuf(); } char d = input.charAt(inPtr++); int escCode = escCodes[d]; int length = (escCode < 0) ? _appendNumeric(d, qbuf) : _appendNamed(escCode, qbuf); if ((outPtr + length) > outputBuffer.length) { int first = outputBuffer.length - outPtr; if (first > 0) { System.arraycopy(qbuf, 0, outputBuffer, outPtr, first); } if (textBuffer == null) { textBuffer = TextBuffer.fromInitial(outputBuffer); } try { outputBuffer = textBuffer.finishCurrentSegment(); } catch (IOException e) { // IOException won't happen here, can only occur when ReadConstrainedTextBuffer is used throw new IllegalStateException(e); } int second = length - first; System.arraycopy(qbuf, first, outputBuffer, 0, second); outPtr = second; } else { System.arraycopy(qbuf, 0, outputBuffer, outPtr, length); outPtr += length; } } if (textBuffer == null) { return Arrays.copyOfRange(outputBuffer, 0, outPtr); } textBuffer.setCurrentLength(outPtr); try { return textBuffer.contentsAsArray(); } catch (IOException e) { // IOException won't happen here, can only occur when ReadConstrainedTextBuffer is used throw new IllegalStateException(e); } } /** * Overloaded variant of {@link #quoteAsString(String)}. * * @param input Value {@link CharSequence} to process * * @return JSON-escaped String matching {@code input} * * @since 2.10 */ public char[] quoteAsString(CharSequence input) { // 15-Aug-2019, tatu: Optimize common case as JIT can't get rid of overhead otherwise if (input instanceof String) { return quoteAsString((String) input); } TextBuffer textBuffer = null; final int inputLen = input.length(); char[] outputBuffer = new char[_initialCharBufSize(inputLen)]; final int[] escCodes = CharTypes.get7BitOutputEscapes(); final int escCodeCount = escCodes.length; int inPtr = 0; int outPtr = 0; char[] qbuf = null; outer: while (inPtr < inputLen) { tight_loop: while (true) { char c = input.charAt(inPtr); if (c < escCodeCount && escCodes[c] != 0) { break tight_loop; } if (outPtr >= outputBuffer.length) { if (textBuffer == null) { textBuffer = TextBuffer.fromInitial(outputBuffer); } try { outputBuffer = textBuffer.finishCurrentSegment(); } catch (IOException e) { // IOException won't happen here, can only occur when ReadConstrainedTextBuffer is used throw new IllegalStateException(e); } outPtr = 0; } outputBuffer[outPtr++] = c; if (++inPtr >= inputLen) { break outer; } } // something to escape; 2 or 6-char variant? if (qbuf == null) { qbuf = _qbuf(); } char d = input.charAt(inPtr++); int escCode = escCodes[d]; int length = (escCode < 0) ? _appendNumeric(d, qbuf) : _appendNamed(escCode, qbuf); if ((outPtr + length) > outputBuffer.length) { int first = outputBuffer.length - outPtr; if (first > 0) { System.arraycopy(qbuf, 0, outputBuffer, outPtr, first); } if (textBuffer == null) { textBuffer = TextBuffer.fromInitial(outputBuffer); } try { outputBuffer = textBuffer.finishCurrentSegment(); } catch (IOException e) { // IOException won't happen here, can only occur when ReadConstrainedTextBuffer is used throw new IllegalStateException(e); } int second = length - first; System.arraycopy(qbuf, first, outputBuffer, 0, second); outPtr = second; } else { System.arraycopy(qbuf, 0, outputBuffer, outPtr, length); outPtr += length; } } if (textBuffer == null) { return Arrays.copyOfRange(outputBuffer, 0, outPtr); } textBuffer.setCurrentLength(outPtr); try { return textBuffer.contentsAsArray(); } catch (IOException e) { // IOException won't happen here, can only occur when ReadConstrainedTextBuffer is used throw new IllegalStateException(e); } } /** * Method that will quote text contents using JSON standard quoting, * and append results to a supplied {@link StringBuilder}. * Use this variant if you have e.g. a {@link StringBuilder} and want to avoid superfluous copying of it. * * @param input Value {@link CharSequence} to process * @param output {@link StringBuilder} to append escaped contents to * * @since 2.8 */ public void quoteAsString(CharSequence input, StringBuilder output) { final int[] escCodes = CharTypes.get7BitOutputEscapes(); final int escCodeCount = escCodes.length; int inPtr = 0; final int inputLen = input.length(); char[] qbuf = null; outer: while (inPtr < inputLen) { tight_loop: while (true) { char c = input.charAt(inPtr); if (c < escCodeCount && escCodes[c] != 0) { break tight_loop; } output.append(c); if (++inPtr >= inputLen) { break outer; } } // something to escape; 2 or 6-char variant? if (qbuf == null) { qbuf = _qbuf(); } char d = input.charAt(inPtr++); int escCode = escCodes[d]; int length = (escCode < 0) ? _appendNumeric(d, qbuf) : _appendNamed(escCode, qbuf); output.append(qbuf, 0, length); } } /** * Method that will escape text contents using JSON standard escaping, * encode resulting String as UTF-8 bytes * and return results as a byte array. * * @param text Value {@link String} to process * * @return UTF-8 encoded bytes of JSON-escaped {@code text} */ @SuppressWarnings("resource") public byte[] quoteAsUTF8(String text) { int inputPtr = 0; int inputEnd = text.length(); int outputPtr = 0; byte[] outputBuffer = new byte[_initialByteBufSize(inputEnd)]; ByteArrayBuilder bb = null; main: while (inputPtr < inputEnd) { final int[] escCodes = CharTypes.get7BitOutputEscapes(); inner_loop: // ASCII and escapes while (true) { int ch = text.charAt(inputPtr); if (ch > 0x7F || escCodes[ch] != 0) { break inner_loop; } if (outputPtr >= outputBuffer.length) { if (bb == null) { bb = ByteArrayBuilder.fromInitial(outputBuffer, outputPtr); } outputBuffer = bb.finishCurrentSegment(); outputPtr = 0; } outputBuffer[outputPtr++] = (byte) ch; if (++inputPtr >= inputEnd) { break main; } } if (bb == null) { bb = ByteArrayBuilder.fromInitial(outputBuffer, outputPtr); } if (outputPtr >= outputBuffer.length) { outputBuffer = bb.finishCurrentSegment(); outputPtr = 0; } // Ok, so what did we hit? int ch = text.charAt(inputPtr++); if (ch <= 0x7F) { // needs quoting int escape = escCodes[ch]; // ctrl-char, 6-byte escape... outputPtr = _appendByte(ch, escape, bb, outputPtr); outputBuffer = bb.getCurrentSegment(); continue main; } if (ch <= 0x7FF) { // fine, just needs 2 byte output outputBuffer[outputPtr++] = (byte) (0xc0 | (ch >> 6)); ch = (0x80 | (ch & 0x3f)); } else { // 3 or 4 bytes // Surrogates? if (ch < SURR1_FIRST || ch > SURR2_LAST) { // nope outputBuffer[outputPtr++] = (byte) (0xe0 | (ch >> 12)); if (outputPtr >= outputBuffer.length) { outputBuffer = bb.finishCurrentSegment(); outputPtr = 0; } outputBuffer[outputPtr++] = (byte) (0x80 | ((ch >> 6) & 0x3f)); ch = (0x80 | (ch & 0x3f)); } else { // yes, surrogate pair if (ch > SURR1_LAST) { // must be from first range _illegal(ch); } // and if so, followed by another from next range if (inputPtr >= inputEnd) { _illegal(ch); } ch = _convert(ch, text.charAt(inputPtr++)); if (ch > 0x10FFFF) { // illegal, as per RFC 4627 _illegal(ch); } outputBuffer[outputPtr++] = (byte) (0xf0 | (ch >> 18)); if (outputPtr >= outputBuffer.length) { outputBuffer = bb.finishCurrentSegment(); outputPtr = 0; } outputBuffer[outputPtr++] = (byte) (0x80 | ((ch >> 12) & 0x3f)); if (outputPtr >= outputBuffer.length) { outputBuffer = bb.finishCurrentSegment(); outputPtr = 0; } outputBuffer[outputPtr++] = (byte) (0x80 | ((ch >> 6) & 0x3f)); ch = (0x80 | (ch & 0x3f)); } } if (outputPtr >= outputBuffer.length) { outputBuffer = bb.finishCurrentSegment(); outputPtr = 0; } outputBuffer[outputPtr++] = (byte) ch; } if (bb == null) { return Arrays.copyOfRange(outputBuffer, 0, outputPtr); } return bb.completeAndCoalesce(outputPtr); } /** * Will encode given String as UTF-8 (without any escaping) and return * the resulting byte array. * * @param text Value {@link String} to process * * @return UTF-8 encoded bytes of {@code text} (without any escaping) */ @SuppressWarnings("resource") public byte[] encodeAsUTF8(String text) { int inputPtr = 0; int inputEnd = text.length(); int outputPtr = 0; byte[] outputBuffer = new byte[_initialByteBufSize(inputEnd)]; int outputEnd = outputBuffer.length; ByteArrayBuilder bb = null; main_loop: while (inputPtr < inputEnd) { int c = text.charAt(inputPtr++); // first tight loop for ascii while (c <= 0x7F) { if (outputPtr >= outputEnd) { if (bb == null) { bb = ByteArrayBuilder.fromInitial(outputBuffer, outputPtr); } outputBuffer = bb.finishCurrentSegment(); outputEnd = outputBuffer.length; outputPtr = 0; } outputBuffer[outputPtr++] = (byte) c; if (inputPtr >= inputEnd) { break main_loop; } c = text.charAt(inputPtr++); } // then multi-byte... if (bb == null) { bb = ByteArrayBuilder.fromInitial(outputBuffer, outputPtr); } if (outputPtr >= outputEnd) { outputBuffer = bb.finishCurrentSegment(); outputEnd = outputBuffer.length; outputPtr = 0; } if (c < 0x800) { // 2-byte outputBuffer[outputPtr++] = (byte) (0xc0 | (c >> 6)); } else { // 3 or 4 bytes // Surrogates? if (c < SURR1_FIRST || c > SURR2_LAST) { // nope outputBuffer[outputPtr++] = (byte) (0xe0 | (c >> 12)); if (outputPtr >= outputEnd) { outputBuffer = bb.finishCurrentSegment(); outputEnd = outputBuffer.length; outputPtr = 0; } outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f)); } else { // yes, surrogate pair if (c > SURR1_LAST) { // must be from first range _illegal(c); } // and if so, followed by another from next range if (inputPtr >= inputEnd) { _illegal(c); } c = _convert(c, text.charAt(inputPtr++)); if (c > 0x10FFFF) { // illegal, as per RFC 4627 _illegal(c); } outputBuffer[outputPtr++] = (byte) (0xf0 | (c >> 18)); if (outputPtr >= outputEnd) { outputBuffer = bb.finishCurrentSegment(); outputEnd = outputBuffer.length; outputPtr = 0; } outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 12) & 0x3f)); if (outputPtr >= outputEnd) { outputBuffer = bb.finishCurrentSegment(); outputEnd = outputBuffer.length; outputPtr = 0; } outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f)); } } if (outputPtr >= outputEnd) { outputBuffer = bb.finishCurrentSegment(); outputEnd = outputBuffer.length; outputPtr = 0; } outputBuffer[outputPtr++] = (byte) (0x80 | (c & 0x3f)); } if (bb == null) { return Arrays.copyOfRange(outputBuffer, 0, outputPtr); } return bb.completeAndCoalesce(outputPtr); } /** * Overloaded variant of {@link #encodeAsUTF8(String)}. * * @param text Value {@link CharSequence} to process * * @return UTF-8 encoded bytes of {@code text} (without any escaping) * * @since 2.11 */ @SuppressWarnings("resource") public byte[] encodeAsUTF8(CharSequence text) { int inputPtr = 0; int inputEnd = text.length(); int outputPtr = 0; byte[] outputBuffer = new byte[_initialByteBufSize(inputEnd)]; int outputEnd = outputBuffer.length; ByteArrayBuilder bb = null; main_loop: while (inputPtr < inputEnd) { int c = text.charAt(inputPtr++); // first tight loop for ascii while (c <= 0x7F) { if (outputPtr >= outputEnd) { if (bb == null) { bb = ByteArrayBuilder.fromInitial(outputBuffer, outputPtr); } outputBuffer = bb.finishCurrentSegment(); outputEnd = outputBuffer.length; outputPtr = 0; } outputBuffer[outputPtr++] = (byte) c; if (inputPtr >= inputEnd) { break main_loop; } c = text.charAt(inputPtr++); } // then multi-byte... if (bb == null) { bb = ByteArrayBuilder.fromInitial(outputBuffer, outputPtr); } if (outputPtr >= outputEnd) { outputBuffer = bb.finishCurrentSegment(); outputEnd = outputBuffer.length; outputPtr = 0; } if (c < 0x800) { // 2-byte outputBuffer[outputPtr++] = (byte) (0xc0 | (c >> 6)); } else { // 3 or 4 bytes // Surrogates? if (c < SURR1_FIRST || c > SURR2_LAST) { // nope outputBuffer[outputPtr++] = (byte) (0xe0 | (c >> 12)); if (outputPtr >= outputEnd) { outputBuffer = bb.finishCurrentSegment(); outputEnd = outputBuffer.length; outputPtr = 0; } outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f)); } else { // yes, surrogate pair if (c > SURR1_LAST) { // must be from first range _illegal(c); } // and if so, followed by another from next range if (inputPtr >= inputEnd) { _illegal(c); } c = _convert(c, text.charAt(inputPtr++)); if (c > 0x10FFFF) { // illegal, as per RFC 4627 _illegal(c); } outputBuffer[outputPtr++] = (byte) (0xf0 | (c >> 18)); if (outputPtr >= outputEnd) { outputBuffer = bb.finishCurrentSegment(); outputEnd = outputBuffer.length; outputPtr = 0; } outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 12) & 0x3f)); if (outputPtr >= outputEnd) { outputBuffer = bb.finishCurrentSegment(); outputEnd = outputBuffer.length; outputPtr = 0; } outputBuffer[outputPtr++] = (byte) (0x80 | ((c >> 6) & 0x3f)); } } if (outputPtr >= outputEnd) { outputBuffer = bb.finishCurrentSegment(); outputEnd = outputBuffer.length; outputPtr = 0; } outputBuffer[outputPtr++] = (byte) (0x80 | (c & 0x3f)); } if (bb == null) { return Arrays.copyOfRange(outputBuffer, 0, outputPtr); } return bb.completeAndCoalesce(outputPtr); } /* /********************************************************************** /* Internal methods /********************************************************************** */ private char[] _qbuf() { char[] qbuf = new char[6]; qbuf[0] = '\\'; qbuf[2] = '0'; qbuf[3] = '0'; return qbuf; } private int _appendNumeric(int value, char[] qbuf) { qbuf[1] = 'u'; // We know it's a control char, so only the last 2 chars are non-0 qbuf[4] = HC[value >> 4]; qbuf[5] = HC[value & 0xF]; return 6; } private int _appendNamed(int esc, char[] qbuf) { qbuf[1] = (char) esc; return 2; } private int _appendByte(int ch, int esc, ByteArrayBuilder bb, int ptr) { bb.setCurrentSegmentLength(ptr); bb.append('\\'); if (esc < 0) { // standard escape bb.append('u'); if (ch > 0xFF) { int hi = (ch >> 8); bb.append(HB[hi >> 4]); bb.append(HB[hi & 0xF]); ch &= 0xFF; } else { bb.append('0'); bb.append('0'); } bb.append(HB[ch >> 4]); bb.append(HB[ch & 0xF]); } else { // 2-char simple escape bb.append((byte) esc); } return bb.getCurrentSegmentLength(); } private static int _convert(int p1, int p2) { // Ok, then, is the second part valid? if (p2 < SURR2_FIRST || p2 > SURR2_LAST) { throw new IllegalArgumentException("Broken surrogate pair: first char 0x"+Integer.toHexString(p1)+", second 0x"+Integer.toHexString(p2)+"; illegal combination"); } return (p1 << 10) + p2 + UTF8Writer.SURROGATE_BASE; } private static void _illegal(int c) { throw new IllegalArgumentException(UTF8Writer.illegalSurrogateDesc(c)); } // non-private for unit test access static int _initialCharBufSize(int strLen) { // char->char won't expand but we need to give some room for escaping // like 1/8 (12.5% expansion) but cap addition to something modest final int estimated = Math.max(MIN_CHAR_BUFFER_SIZE, strLen + Math.min(6 + (strLen >> 3), 1000)); return Math.min(estimated, MAX_CHAR_BUFFER_SIZE); } // non-private for unit test access static int _initialByteBufSize(int strLen) { // char->byte for UTF-8 can expand size by x3 itself, and escaping // more... but let's use lower factor of 1.5 final int doubled = Math.max(MIN_BYTE_BUFFER_SIZE, strLen + 6 + (strLen>>1)); // but use upper bound for humongous cases (segmented) return Math.min(doubled, MAX_BYTE_BUFFER_SIZE); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy