org.owasp.encoder.JavaEncoder Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of aem-sdk-api Show documentation
The Adobe Experience Manager SDK
There is a newer version: 2024.11.18751.20241128T090041Z-241100
// Copyright (c) 2012 Jeff Ichnowski
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
//
//     * Redistributions of source code must retain the above
//       copyright notice, this list of conditions and the following
//       disclaimer.
//
//     * Redistributions in binary form must reproduce the above
//       copyright notice, this list of conditions and the following
//       disclaimer in the documentation and/or other materials
//       provided with the distribution.
//
//     * Neither the name of the OWASP nor the names of its
//       contributors may be used to endorse or promote products
//       derived from this software without specific prior written
//       permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
// FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
// COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT,
// INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
// HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
// OF THE POSSIBILITY OF SUCH DAMAGE.
package org.owasp.encoder;

import java.nio.CharBuffer;
import java.nio.charset.CoderResult;

/**
 * JavaEncoder -- Encoder for Java based strings. Useful if in Java code
 * generators to generate efficiently encoded strings for arbitrary data. This
 * encoder uses the minimal sequence of characters required to encode a
 * character (e.g. standard backslash escapes, such as "\n", "\\" , "\'", octal
 * escapes, and unicode escapes). This encoder does NOT check UTF-16 surrogate
 * pair sequences. The target output context supports mismatched UTF-16 pairs
 * (e.g. it will compile, run, etc... with them).
 *
 * @author Jeff Ichnowski
 */
class JavaEncoder extends Encoder {

    /**
     * The length of a Unicode escape, e.g. "\\u1234".
     */
    static final int U_ESCAPE_LENGTH = 6;
    /**
     * The length of a octal escape sequence, e.g. "\377".
     */
    static final int OCT_ESCAPE_LENGTH = 4;
    /**
     * Number of bits to shift for each octal unit.
     */
    static final int OCT_SHIFT = 3;
    /**
     * The bit-mask for an octal unit.
     */
    static final int OCT_MASK = 7;

    @Override
    protected int maxEncodedLength(int n) {
        // "\\u####"
        return n * U_ESCAPE_LENGTH;
    }

    @Override
    protected int firstEncodedOffset(String input, int off, int len) {
        final int n = off + len;
        for (int i = off; i < n; ++i) {
            char ch = input.charAt(i);
            if (ch >= ' ' && ch <= '~') {
                if (ch == '\\' || ch == '\'' || ch == '\"') {
                    return i;
                }
            } else {
                return i;
            }
        }
        return n;
    }

    @Override
    protected CoderResult encodeArrays(CharBuffer input, CharBuffer output, boolean endOfInput) {
        final char[] in = input.array();
        final char[] out = output.array();
        int i = input.arrayOffset() + input.position();
        final int n = input.arrayOffset() + input.limit();
        int j = output.arrayOffset() + output.position();
        final int m = output.arrayOffset() + output.limit();

        charLoop:
        for (; i < n; ++i) {
            final char ch = in[i];
            if (ch >= ' ' && ch <= '~') {
                if (ch == '\\' || ch == '\'' || ch == '\"') {
                    if (j + 1 >= m) {
                        return overflow(input, i, output, j);
                    }
                    out[j++] = '\\';
                    out[j++] = ch;
                } else {
                    if (j >= m) {
                        return overflow(input, i, output, j);
                    }
                    out[j++] = ch;
                }
            } else {
                switch (ch) {
                    case '\b':
                        if (j + 1 >= m) {
                            return overflow(input, i, output, j);
                        }
                        out[j++] = '\\';
                        out[j++] = 'b';
                        break;
                    case '\t':
                        if (j + 1 >= m) {
                            return overflow(input, i, output, j);
                        }
                        out[j++] = '\\';
                        out[j++] = 't';
                        break;
                    case '\n':
                        if (j + 1 >= m) {
                            return overflow(input, i, output, j);
                        }
                        out[j++] = '\\';
                        out[j++] = 'n';
                        break;
                    case '\f':
                        if (j + 1 >= m) {
                            return overflow(input, i, output, j);
                        }
                        out[j++] = '\\';
                        out[j++] = 'f';
                        break;
                    case '\r':
                        if (j + 1 >= m) {
                            return overflow(input, i, output, j);
                        }
                        out[j++] = '\\';
                        out[j++] = 'r';
                        break;
                    default:
                        if (ch <= '\377') {
                            longEscapeNeeded:
                            {
                                if (ch <= '\37') {
                                    // "short" octal escapes: '\0' to '\37'
                                    // cannot be followed by '0' to '7' thus
                                    // require a lookahead to use.
                                    if (i + 1 < n) {
                                        char la = in[i + 1];
                                        if ('0' <= la && la <= '7') {
                                            break longEscapeNeeded;
                                        }
                                    } else if (!endOfInput) {
                                        // need more characters to see if we can use
                                        // a short octal escape.
                                        break charLoop;
                                    }

                                    if (ch <= '\7') {
                                        if (j + 1 >= m) {
                                            return overflow(input, i, output, j);
                                        }
                                        out[j++] = '\\';
                                        out[j++] = (char) (ch + '0');
                                    } else {
                                        if (j + 2 >= m) {
                                            return overflow(input, i, output, j);
                                        }
                                        out[j++] = '\\';
                                        out[j++] = (char) ((ch >>> OCT_SHIFT) + '0');
                                        out[j++] = (char) ((ch & OCT_MASK) + '0');
                                    }

                                    continue;
                                }
                            }

                            if (j + OCT_ESCAPE_LENGTH > m) {
                                return overflow(input, i, output, j);
                            }
                            out[j++] = '\\';
                            out[j++] = (char) ((ch >>> 2 * OCT_SHIFT) + '0');
                            out[j++] = (char) (((ch >>> OCT_SHIFT) & OCT_MASK) + '0');
                            out[j++] = (char) ((ch & OCT_MASK) + '0');
                        } else {
                            if (j + U_ESCAPE_LENGTH > m) {
                                return overflow(input, i, output, j);
                            }
                            out[j++] = '\\';
                            out[j++] = 'u';
                            out[j++] = HEX[ch >>> 3 * HEX_SHIFT];
                            out[j++] = HEX[(ch >>> 2 * HEX_SHIFT) & HEX_MASK];
                            out[j++] = HEX[(ch >>> HEX_SHIFT) & HEX_MASK];
                            out[j++] = HEX[ch & HEX_MASK];
                        }
                }
            }
        }

        return underflow(input, i, output, j);
    }
}