All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.hadoop.serialization.json.BackportedJsonStringEncoder Maven / Gradle / Ivy

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.hadoop.serialization.json;

import java.lang.ref.SoftReference;

import org.codehaus.jackson.util.BufferRecycler;
import org.codehaus.jackson.util.TextBuffer;

/**
 * Backported class from Jackson 1.8.8 for Jackson 1.5.2
 *
 * Helper class used for efficient encoding of JSON String values (including
 * JSON field names) into Strings or UTF-8 byte arrays.
 *

* Note that methods in here are somewhat optimized, but not ridiculously so. * Reason is that conversion method results are expected to be cached so that * these methods will not be hot spots during normal operation. * * @since 1.6 */ public class BackportedJsonStringEncoder { private final static char[] HEX_CHARS_SOURCE = "0123456789ABCDEF".toCharArray(); private final static char[] HEX_CHARS = HEX_CHARS_SOURCE.clone(); /** * Lookup table used for determining which output characters in * 7-bit ASCII range need to be quoted. */ final static int[] sOutputEscapes128; static { int[] table = new int[128]; // Control chars need generic escape sequence for (int i = 0; i < 32; ++i) { // 04-Mar-2011, tatu: Used to use "-(i + 1)", replaced with constant table[i] = -1; } /* Others (and some within that range too) have explicit shorter * sequences */ table['"'] = '"'; table['\\'] = '\\'; // Escaping of slash is optional, so let's not add it table[0x08] = 'b'; table[0x09] = 't'; table[0x0C] = 'f'; table[0x0A] = 'n'; table[0x0D] = 'r'; sOutputEscapes128 = table; } /** * This ThreadLocal contains a {@link java.lang.ref.SoftRerefence} * to a {@link BufferRecycler} used to provide a low-cost * buffer recycling between reader and writer instances. */ final protected static ThreadLocal> _threadEncoder = new ThreadLocal>(); /** * Lazily constructed text buffer used to produce JSON encoded Strings * as characters (without UTF-8 encoding) */ protected TextBuffer _textBuffer; /** * Temporary buffer used for composing quote/escape sequences */ protected final char[] _quoteBuffer; public BackportedJsonStringEncoder() { _quoteBuffer = new char[6]; _quoteBuffer[0] = '\\'; _quoteBuffer[2] = '0'; _quoteBuffer[3] = '0'; } /** * Factory method for getting an instance; this is either recycled per-thread instance, * or a newly constructed one. */ public static BackportedJsonStringEncoder getInstance() { SoftReference ref = _threadEncoder.get(); BackportedJsonStringEncoder enc = (ref == null) ? null : ref.get(); if (enc == null) { enc = new BackportedJsonStringEncoder(); _threadEncoder.set(new SoftReference(enc)); } return enc; } /** * Method that will quote text contents using JSON standard quoting, * and return results as a character array */ public char[] quoteAsString(String input) { TextBuffer textBuffer = _textBuffer; if (textBuffer == null) { // no allocator; can add if we must, shouldn't need to _textBuffer = textBuffer = new TextBuffer(null); } char[] outputBuffer = textBuffer.emptyAndGetCurrentSegment(); final int[] escCodes = sOutputEscapes128; final int escCodeCount = escCodes.length; int inPtr = 0; final int inputLen = input.length(); int outPtr = 0; outer_loop: while (inPtr < inputLen) { tight_loop: while (true) { char c = input.charAt(inPtr); if (c < escCodeCount && escCodes[c] != 0) { break tight_loop; } if (outPtr >= outputBuffer.length) { outputBuffer = textBuffer.finishCurrentSegment(); outPtr = 0; } outputBuffer[outPtr++] = c; if (++inPtr >= inputLen) { break outer_loop; } } // something to escape; 2 or 6-char variant? int escCode = escCodes[input.charAt(inPtr++)]; int length = _appendSingleEscape(escCode, _quoteBuffer); if ((outPtr + length) > outputBuffer.length) { int first = outputBuffer.length - outPtr; if (first > 0) { System.arraycopy(_quoteBuffer, 0, outputBuffer, outPtr, first); } outputBuffer = textBuffer.finishCurrentSegment(); int second = length - first; System.arraycopy(_quoteBuffer, first, outputBuffer, outPtr, second); outPtr += second; } else { System.arraycopy(_quoteBuffer, 0, outputBuffer, outPtr, length); outPtr += length; } } textBuffer.setCurrentLength(outPtr); return textBuffer.contentsAsArray(); } private int _appendSingleEscape(int escCode, char[] quoteBuffer) { if (escCode < 0) { // control char, value -(char + 1) int value = -(escCode + 1); quoteBuffer[1] = 'u'; // We know it's a control char, so only the last 2 chars are non-0 quoteBuffer[4] = HEX_CHARS[value >> 4]; quoteBuffer[5] = HEX_CHARS[value & 0xF]; return 6; } quoteBuffer[1] = (char) escCode; return 2; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy