org.elasticsearch.hadoop.serialization.json.BackportedJsonStringEncoder Maven / Gradle / Ivy
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.hadoop.serialization.json;
import java.lang.ref.SoftReference;
import org.codehaus.jackson.util.BufferRecycler;
import org.codehaus.jackson.util.TextBuffer;
/**
* Backported class from Jackson 1.8.8 for Jackson 1.5.2
*
* Helper class used for efficient encoding of JSON String values (including
* JSON field names) into Strings or UTF-8 byte arrays.
*
* Note that methods in here are somewhat optimized, but not ridiculously so.
* Reason is that conversion method results are expected to be cached so that
* these methods will not be hot spots during normal operation.
*
* @since 1.6
*/
public class BackportedJsonStringEncoder {
private final static char[] HEX_CHARS_SOURCE = "0123456789ABCDEF".toCharArray();
private final static char[] HEX_CHARS = HEX_CHARS_SOURCE.clone();
/**
* Lookup table used for determining which output characters in
* 7-bit ASCII range need to be quoted.
*/
final static int[] sOutputEscapes128;
static {
int[] table = new int[128];
// Control chars need generic escape sequence
for (int i = 0; i < 32; ++i) {
// 04-Mar-2011, tatu: Used to use "-(i + 1)", replaced with constant
table[i] = -1;
}
/* Others (and some within that range too) have explicit shorter
* sequences
*/
table['"'] = '"';
table['\\'] = '\\';
// Escaping of slash is optional, so let's not add it
table[0x08] = 'b';
table[0x09] = 't';
table[0x0C] = 'f';
table[0x0A] = 'n';
table[0x0D] = 'r';
sOutputEscapes128 = table;
}
/**
* This ThreadLocal
contains a {@link java.lang.ref.SoftRerefence}
* to a {@link BufferRecycler} used to provide a low-cost
* buffer recycling between reader and writer instances.
*/
final protected static ThreadLocal> _threadEncoder = new ThreadLocal>();
/**
* Lazily constructed text buffer used to produce JSON encoded Strings
* as characters (without UTF-8 encoding)
*/
protected TextBuffer _textBuffer;
/**
* Temporary buffer used for composing quote/escape sequences
*/
protected final char[] _quoteBuffer;
public BackportedJsonStringEncoder() {
_quoteBuffer = new char[6];
_quoteBuffer[0] = '\\';
_quoteBuffer[2] = '0';
_quoteBuffer[3] = '0';
}
/**
* Factory method for getting an instance; this is either recycled per-thread instance,
* or a newly constructed one.
*/
public static BackportedJsonStringEncoder getInstance() {
SoftReference ref = _threadEncoder.get();
BackportedJsonStringEncoder enc = (ref == null) ? null : ref.get();
if (enc == null) {
enc = new BackportedJsonStringEncoder();
_threadEncoder.set(new SoftReference(enc));
}
return enc;
}
/**
* Method that will quote text contents using JSON standard quoting,
* and return results as a character array
*/
public char[] quoteAsString(String input) {
TextBuffer textBuffer = _textBuffer;
if (textBuffer == null) {
// no allocator; can add if we must, shouldn't need to
_textBuffer = textBuffer = new TextBuffer(null);
}
char[] outputBuffer = textBuffer.emptyAndGetCurrentSegment();
final int[] escCodes = sOutputEscapes128;
final int escCodeCount = escCodes.length;
int inPtr = 0;
final int inputLen = input.length();
int outPtr = 0;
outer_loop: while (inPtr < inputLen) {
tight_loop: while (true) {
char c = input.charAt(inPtr);
if (c < escCodeCount && escCodes[c] != 0) {
break tight_loop;
}
if (outPtr >= outputBuffer.length) {
outputBuffer = textBuffer.finishCurrentSegment();
outPtr = 0;
}
outputBuffer[outPtr++] = c;
if (++inPtr >= inputLen) {
break outer_loop;
}
}
// something to escape; 2 or 6-char variant?
int escCode = escCodes[input.charAt(inPtr++)];
int length = _appendSingleEscape(escCode, _quoteBuffer);
if ((outPtr + length) > outputBuffer.length) {
int first = outputBuffer.length - outPtr;
if (first > 0) {
System.arraycopy(_quoteBuffer, 0, outputBuffer, outPtr, first);
}
outputBuffer = textBuffer.finishCurrentSegment();
int second = length - first;
System.arraycopy(_quoteBuffer, first, outputBuffer, outPtr, second);
outPtr += second;
}
else {
System.arraycopy(_quoteBuffer, 0, outputBuffer, outPtr, length);
outPtr += length;
}
}
textBuffer.setCurrentLength(outPtr);
return textBuffer.contentsAsArray();
}
private int _appendSingleEscape(int escCode, char[] quoteBuffer) {
if (escCode < 0) { // control char, value -(char + 1)
int value = -(escCode + 1);
quoteBuffer[1] = 'u';
// We know it's a control char, so only the last 2 chars are non-0
quoteBuffer[4] = HEX_CHARS[value >> 4];
quoteBuffer[5] = HEX_CHARS[value & 0xF];
return 6;
}
quoteBuffer[1] = (char) escCode;
return 2;
}
}