org.elasticsearch.hadoop.serialization.json.BackportedJsonStringEncoder Maven / Gradle / Ivy
Show all versions of elasticsearch-hadoop-mr Show documentation
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.elasticsearch.hadoop.serialization.json;
import java.lang.ref.SoftReference;
import org.codehaus.jackson.util.BufferRecycler;
import org.codehaus.jackson.util.TextBuffer;
/**
 * Backported class from Jackson 1.8.8 for Jackson 1.5.2
 *
 * Helper class used for efficient encoding of JSON String values (including
 * JSON field names) into Strings or UTF-8 byte arrays.
 *
 * Note that methods in here are somewhat optimized, but not ridiculously so.
 * Reason is that conversion method results are expected to be cached so that
 * these methods will not be hot spots during normal operation.
 *
 * @since 1.6
 */
public class BackportedJsonStringEncoder {
    private final static char[] HEX_CHARS_SOURCE = "0123456789ABCDEF".toCharArray();
    private final static char[] HEX_CHARS = HEX_CHARS_SOURCE.clone();
    /**
     * Lookup table used for determining which output characters in
     * 7-bit ASCII range need to be quoted.
     */
    final static int[] sOutputEscapes128;
    static {
        int[] table = new int[128];
        // Control chars need generic escape sequence
        for (int i = 0; i < 32; ++i) {
            // 04-Mar-2011, tatu: Used to use "-(i + 1)", replaced with constant
            table[i] = -1;
        }
        /* Others (and some within that range too) have explicit shorter
         * sequences
         */
        table['"'] = '"';
        table['\\'] = '\\';
        // Escaping of slash is optional, so let's not add it
        table[0x08] = 'b';
        table[0x09] = 't';
        table[0x0C] = 'f';
        table[0x0A] = 'n';
        table[0x0D] = 'r';
        sOutputEscapes128 = table;
    }
    /**
     * This ThreadLocal contains a {@link java.lang.ref.SoftRerefence}
     * to a {@link BufferRecycler} used to provide a low-cost
     * buffer recycling between reader and writer instances.
     */
    final protected static ThreadLocal> _threadEncoder = new ThreadLocal>();
    /**
     * Lazily constructed text buffer used to produce JSON encoded Strings
     * as characters (without UTF-8 encoding)
     */
    protected TextBuffer _textBuffer;
    /**
     * Temporary buffer used for composing quote/escape sequences
     */
    protected final char[] _quoteBuffer;
    public BackportedJsonStringEncoder() {
        _quoteBuffer = new char[6];
        _quoteBuffer[0] = '\\';
        _quoteBuffer[2] = '0';
        _quoteBuffer[3] = '0';
    }
    /**
     * Factory method for getting an instance; this is either recycled per-thread instance,
     * or a newly constructed one.
     */
    public static BackportedJsonStringEncoder getInstance() {
        SoftReference ref = _threadEncoder.get();
        BackportedJsonStringEncoder enc = (ref == null) ? null : ref.get();
        if (enc == null) {
            enc = new BackportedJsonStringEncoder();
            _threadEncoder.set(new SoftReference(enc));
        }
        return enc;
    }
    /**
     * Method that will quote text contents using JSON standard quoting,
     * and return results as a character array
     */
    public char[] quoteAsString(String input) {
        TextBuffer textBuffer = _textBuffer;
        if (textBuffer == null) {
            // no allocator; can add if we must, shouldn't need to
            _textBuffer = textBuffer = new TextBuffer(null);
        }
        char[] outputBuffer = textBuffer.emptyAndGetCurrentSegment();
        final int[] escCodes = sOutputEscapes128;
        final int escCodeCount = escCodes.length;
        int inPtr = 0;
        final int inputLen = input.length();
        int outPtr = 0;
        outer_loop: while (inPtr < inputLen) {
            tight_loop: while (true) {
                char c = input.charAt(inPtr);
                if (c < escCodeCount && escCodes[c] != 0) {
                    break tight_loop;
                }
                if (outPtr >= outputBuffer.length) {
                    outputBuffer = textBuffer.finishCurrentSegment();
                    outPtr = 0;
                }
                outputBuffer[outPtr++] = c;
                if (++inPtr >= inputLen) {
                    break outer_loop;
                }
            }
        // something to escape; 2 or 6-char variant?
        int escCode = escCodes[input.charAt(inPtr++)];
        int length = _appendSingleEscape(escCode, _quoteBuffer);
        if ((outPtr + length) > outputBuffer.length) {
            int first = outputBuffer.length - outPtr;
            if (first > 0) {
                System.arraycopy(_quoteBuffer, 0, outputBuffer, outPtr, first);
            }
            outputBuffer = textBuffer.finishCurrentSegment();
            int second = length - first;
            System.arraycopy(_quoteBuffer, first, outputBuffer, outPtr, second);
            outPtr += second;
        }
        else {
            System.arraycopy(_quoteBuffer, 0, outputBuffer, outPtr, length);
            outPtr += length;
        }
        }
        textBuffer.setCurrentLength(outPtr);
        return textBuffer.contentsAsArray();
    }
    private int _appendSingleEscape(int escCode, char[] quoteBuffer) {
        if (escCode < 0) { // control char, value -(char + 1)
            int value = -(escCode + 1);
            quoteBuffer[1] = 'u';
            // We know it's a control char, so only the last 2 chars are non-0
            quoteBuffer[4] = HEX_CHARS[value >> 4];
            quoteBuffer[5] = HEX_CHARS[value & 0xF];
            return 6;
        }
        quoteBuffer[1] = (char) escCode;
        return 2;
    }
}