com.sun.enterprise.admin.util.TokenizerImpl Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of payara-micro Show documentation
Micro Distribution of the Payara Project
There is a newer version: 7.2024.1.Alpha1
/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright (c) 1997-2012 Oracle and/or its affiliates. All rights reserved.
 *
 * The contents of this file are subject to the terms of either the GNU
 * General Public License Version 2 only ("GPL") or the Common Development
 * and Distribution License("CDDL") (collectively, the "License").  You
 * may not use this file except in compliance with the License.  You can
 * obtain a copy of the License at
 * https://glassfish.dev.java.net/public/CDDL+GPL_1_1.html
 * or packager/legal/LICENSE.txt.  See the License for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing the software, include this License Header Notice in each
 * file and include the License file at packager/legal/LICENSE.txt.
 *
 * GPL Classpath Exception:
 * Oracle designates this particular file as subject to the "Classpath"
 * exception as provided by Oracle in the GPL Version 2 section of the License
 * file that accompanied this code.
 *
 * Modifications:
 * If applicable, add the following below the License Header, with the fields
 * enclosed by brackets [] replaced by your own identifying information:
 * "Portions Copyright [year] [name of copyright owner]"
 *
 * Contributor(s):
 * If you wish your version of this file to be governed by only the CDDL or
 * only the GPL Version 2, indicate your decision by adding "[Contributor]
 * elects to include this software in this distribution under the [CDDL or GPL
 * Version 2] license."  If you don't indicate a single choice of license, a
 * recipient has the option to distribute your version of this file under
 * either the CDDL, the GPL Version 2 or to extend the choice of license to
 * its licensees as provided above.  However, if you add GPL Version 2 code
 * and therefore, elected the GPL Version 2 license, then the option applies
 * only if the new code is made subject to such option by the copyright
 * holder.
 */
package com.sun.enterprise.admin.util;

import java.text.StringCharacterIterator;
import java.util.ArrayList;
import java.util.Iterator;

class IllegalEscapeSequenceException extends TokenizerException {

    public IllegalEscapeSequenceException(String msg) {
        super(msg);
    }
}

class UnterminatedLiteralStringException extends TokenizerException {

    public UnterminatedLiteralStringException(String msg) {
        super(msg);
    }
}

class MalformedUnicodeSequenceException extends IllegalEscapeSequenceException {

    public MalformedUnicodeSequenceException(String msg) {
        super(msg);
    }
}

/**
 */
public final class TokenizerImpl implements Tokenizer {

    final String[] mTokens;

    public TokenizerImpl(
            String input,
            String delimiters,
            char escapeChar,
            String escapableChars)
            throws TokenizerException {
        this(input, delimiters, true, escapeChar, escapableChars);
    }
    private static final char QUOTE_CHAR = '\"';

    public TokenizerImpl(
            String input,
            String delimiters,
            boolean multipleDelimsCountAsOne,
            char escapeChar,
            String escapableChars)
            throws TokenizerException {
        final TokenizerInternal worker =
                new TokenizerInternal(input, delimiters, escapeChar, escapableChars);

        ArrayList allTokens = worker.parseTokens();

        if (multipleDelimsCountAsOne) {
            allTokens = removeMultipleDelims(allTokens);
        }

        mTokens = interpretTokenList(allTokens);
    }

    static ArrayList removeMultipleDelims(ArrayList list) {
        final ArrayList resultList = new ArrayList();

        boolean lastWasDelim = false;
        final Iterator iter = list.iterator();
        while (iter.hasNext()) {
            final Object value = iter.next();

            if (value instanceof String) {
                resultList.add(value);
                lastWasDelim = false;
            } else if (!lastWasDelim) {
                // add the delimiter
                resultList.add(value);
                lastWasDelim = true;
            }
        }

        return (resultList);
    }

    /**
     * Interpret the parsed token list, which consists of a series of strings
     * and tokens. We need to handle the special cases where the list starts
     * with a delimiter and/or ends with a delimiter. Examples:
     *
     * ""	=> {} "."	=> { "", "" } "..."	=> { "", "", "", "" } "x."	=> { "x", ""
     * } ".x"	=> { "", "x" } "y.x"	=> { "y", "x" }
     */
    static String[] interpretTokenList(ArrayList list) {
        final ArrayList resultList = new ArrayList();

        boolean lastWasDelim = true;

        final Iterator iter = list.iterator();
        while (iter.hasNext()) {
            final Object value = iter.next();
            if (value instanceof String) {
                resultList.add(value);
                lastWasDelim = false;
            } else {
                if (lastWasDelim) {
                    // this one's a delimiter, and so was the last one
                    // insert the implicit empty string
                    resultList.add("");
                } else {
                    lastWasDelim = true;
                }
            }
        }

        // a trailing delimiter implies an empty string after it
        if (lastWasDelim && !list.isEmpty()) {
            resultList.add("");
        }

        return ((String[]) resultList.toArray(new String[resultList.size()]));
    }

    @Override
    public String[] getTokens() {
        return (mTokens);
    }
}

final class TokenizerInternal {

    final String mDelimiters;
    final char mEscapeChar;
    final String mEscapableChars;
    final StringCharacterIterator mIter;

    // a distinct object used to denote a delimiter
    private static class Delim {

        private Delim() {
        }

        public static Delim getInstance() {
            return (new Delim());
        }

        @Override
        public String toString() {
            return ("");
        }
    }
    final static Delim DELIM = Delim.getInstance();

    public TokenizerInternal(
            String input,
            String delimiters,
            char escapeChar,
            String escapableChars) {
        mDelimiters = delimiters;
        mEscapeChar = escapeChar;
        mEscapableChars = escapableChars;
        mIter = new StringCharacterIterator(input);
    }

    static boolean isSpecialEscapeChar(char theChar) {
        // carriage return or newline
        return (theChar == 'n' || theChar == 'r' || theChar == 't' || theChar == QUOTE_CHAR);
    }

    boolean isCallerProvidedEscapableChar(char theChar) {
        return (mEscapableChars.indexOf(theChar) >= 0 || theChar == mEscapeChar);
    }

    boolean isEscapableChar(char theChar) {
        return (isCallerProvidedEscapableChar(theChar) || isSpecialEscapeChar(theChar));
    }

    boolean isDelim(String delims, char theChar) {
        return (delims.indexOf(theChar) >= 0 || theChar == StringCharacterIterator.DONE);
    }

    static boolean isDigit(char theChar) {
        return ((theChar >= '0' && theChar <= '9'));
    }

    static boolean isHexDigit(char theChar) {
        return (isDigit(theChar) || (theChar >= 'a' && theChar <= 'f') || isUpper(theChar));
    }

    static boolean isUpper(char c) {
        return ((c >= 'A' && c <= 'F'));
    }

    boolean hasMoreChars() {
        return (mIter.current() != StringCharacterIterator.DONE);
    }

    char nextChar() {
        final char theChar = mIter.current();
        mIter.next();

        return (theChar);
    }
    private static final char QUOTE_CHAR = '\"';
    private static final char TAB_CHAR = '\t';

    char decodeUnicodeSequence()
            throws MalformedUnicodeSequenceException {
        int value = 0;

        try {
            for (int i = 0; i < 4; ++i) {
                value = (value << 4) | hexValue(nextChar());
            }
        } catch (Exception e) {
            throw new MalformedUnicodeSequenceException("");
        }

        return ((char) value);
    }

    static int hexValue(char c) {
        if (!isHexDigit(c)) {
            throw new IllegalArgumentException();
        }

        int value;

        if (isDigit(c)) {
            value = (int) c - (int) '0';
        } else if (isUpper(c)) {
            value = (int) c - (int) 'A';
        } else {
            value = (int) c - (int) 'a';
        }
        return value;
    }

    char getEscapedChar(final char inputChar)
            throws MalformedUnicodeSequenceException, IllegalEscapeSequenceException {
        char outChar;

        if (isCallerProvidedEscapableChar(inputChar)) {
            outChar = inputChar;
        } else {
            switch (inputChar) {
                default:
                    throw new IllegalEscapeSequenceException("" + inputChar);
                case 'n':
                    outChar = '\n';
                    break;
                case 'r':
                    outChar = '\r';
                    break;
                case 't':
                    outChar = '\t';
                    break;
                case QUOTE_CHAR:
                    outChar = QUOTE_CHAR;
                    break;
                case 'u':
                    outChar = decodeUnicodeSequence();
                    break;
            }
        }

        return (outChar);
    }

    ArrayList parseTokens()
            throws UnterminatedLiteralStringException,
            MalformedUnicodeSequenceException, IllegalEscapeSequenceException {
        final StringBuffer tok = new StringBuffer();
        final ArrayList tokens = new ArrayList();
        boolean insideStringLiteral = false;

        /**
         * Escape sequences are always processed regardless of whether we're
         * inside a quoted string or not. A quote string really only alters
         * whether delimiters are treated as literal characters, or not.
         */
        while (hasMoreChars()) {
            final char theChar = nextChar();

            if (theChar == mEscapeChar) {
                tok.append(getEscapedChar(nextChar()));
            } else if (theChar == Tokenizer.LITERAL_STRING_DELIM) {
                // special cases of "", """", """""", etc require forcing an empty string out
                // these case have no delimiter or regular characters to cause a string to
                // be emitted
                if (insideStringLiteral && tok.length() == 0 && tokens.isEmpty()) {
                    tokens.add("");
                }

                insideStringLiteral = !insideStringLiteral;
            } else if (insideStringLiteral) {
                tok.append(theChar);
            } else if (isDelim(mDelimiters, theChar)) {
                // we've hit a delimiter...if characters have accumulated, spit them out
                // then spit out the delimiter token.
                if (tok.length() != 0) {
                    tokens.add(tok.toString());
                    tok.setLength(0);
                }
                tokens.add(DELIM);
            } else {
                tok.append(theChar);
            }
        }

        if (tok.length() != 0) {
            tokens.add(tok.toString());
        }

        if (insideStringLiteral) {
            throw new UnterminatedLiteralStringException(tok.toString());
        }

        return (tokens);
    }
}