org.wings.style.CSSParser Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of wings Show documentation
The newest version!
/*
 * Copyright 2000,2005 wingS development team.
 *
 * This file is part of wingS (http://wingsframework.org).
 *
 * wingS is free software; you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License
 * as published by the Free Software Foundation; either version 2.1
 * of the License, or (at your option) any later version.
 *
 * Please see COPYING for the complete licence.
 */
package org.wings.style;

import java.io.IOException;
import java.io.Reader;

/**
 * A CSS parser. This works by way of a delegate that implements the
 * CSSParserCallback interface. The delegate is notified of the following
 * events:
 * 
 * Import statement: handleImport
 * 
Selectors handleSelector. This is invoked for each
 * string. For example if the Reader contained p, bar , a {}, the delegate
 * would be notified 4 times, for 'p,' 'bar' ',' and 'a'.
 * 
When a rule starts, startRule
 * 
Properties in the rule via the handleProperty. This
 * is invoked one per property/value key, eg font size: foo;, would
 * cause the delegate to be notified once with a value of 'font size'.
 * 
Values in the rule via the handleValue, this is notified
 * for the total value.
 * 
When a rule ends, endRule
 * 
 * This will parse much more than CSS 1, and loosely implements the
 * recommendation for Forward-compatible parsing in section
 * 7.1 of the CSS spec found at:
 * http://www.w3.org/TR/REC-CSS1.
 * If an error results in parsing, a RuntimeException will be thrown.
 * 
 * This will preserve case. If the callback wishes to treat certain poritions
 * case insensitively (such as selectors), it should use toLowerCase, or
 * something similar.
 *
 * @author Scott Violet
 * @version 1.5 03/20/00
 */
class CSSParser {
    // Parsing something like the following:
    // (@rule | ruleset | block)*
    // 
    // @rule       (block | identifier)*; (block with {} ends @rule)
    // block       matching [] () {} (that is, [()] is a block, [(){}{[]}]
    //                                is a block, ()[] is two blocks)
    // identifier  "*" | '*' | anything but a [](){} and whitespace
    // 
    // ruleset     selector decblock
    // selector    (identifier | (block, except block '{}') )* 
    // declblock   declaration* block*
    // declaration (identifier* stopping when identifier ends with :)
    //             (identifier* stopping when identifier ends with ;)
    //
    // comments /* */ can appear any where, and are stripped.


    // identifier - letters, digits, dashes and escaped characters
    // block starts with { ends with matching }, () [] and {} always occur 
    //   in matching pairs, '' and "" also occur in pairs, except " may be


    // Indicates the type of token being parsed.
    private static final int IDENTIFIER = 1;
    private static final int BRACKET_OPEN = 2;
    private static final int BRACKET_CLOSE = 3;
    private static final int BRACE_OPEN = 4;
    private static final int BRACE_CLOSE = 5;
    private static final int PAREN_OPEN = 6;
    private static final int PAREN_CLOSE = 7;
    private static final int END = -1;

    private static final char[] charMapping = {0, 0, '[', ']', '{', '}', '(',
                                               ')', 0};


    /**
     * Set to true if one character has been read ahead.
     */
    private boolean didPushChar;
    /**
     * The read ahead character.
     */
    private int pushedChar;
    /**
     * Temporary place to hold identifiers.
     */
    private StringBuilder unitBuffer;
    /**
     * Used to indicate blocks.
     */
    private int[] unitStack;
    /**
     * Number of valid blocks.
     */
    private int stackCount;
    /**
     * Holds the incoming CSS rules.
     */
    private Reader reader;
    /**
     * Set to true when the first non @ rule is encountered.
     */
    private boolean encounteredRuleSet;
    /**
     * Notified of state.
     */
    private CSSParserCallback callback;
    /**
     * nextToken() inserts the string here.
     */
    private char[] tokenBuffer;
    /**
     * Current number of chars in tokenBufferLength.
     */
    private int tokenBufferLength;
    /**
     * Set to true if any whitespace is read.
     */
    private boolean readWS;


    // The delegate interface.
    static interface CSSParserCallback {
        /**
         * Called when an @import is encountered.
         */
        void handleImport(String importString);

        // There is currently no way to distinguish between '"foo,"' and
        // 'foo,'. But this generally isn't valid CSS. If it becomes
        // a problem, handleSelector will have to be told if the string is
        // quoted.
        void handleSelector(String selector);

        void startRule();

        // Property names are mapped to lower case before being passed to
        // the delegate.
        void handleProperty(String property);

        void handleValue(String value);

        void endRule();
    }

    CSSParser() {
        unitStack = new int[2];
        tokenBuffer = new char[80];
        unitBuffer = new StringBuilder();
    }

    void parse(Reader reader, CSSParserCallback callback,
               boolean inRule) throws IOException {
        this.callback = callback;
        stackCount = tokenBufferLength = 0;
        this.reader = reader;
        encounteredRuleSet = false;
        try {
            if (inRule) {
                parseDeclarationBlock();
            } else {
                while (getNextStatement()) ;
            }
        } finally {
            callback = null;
            reader = null;
        }
    }

    /**
     * Gets the next statement, returning false if the end is reached. A
     * statement is either an @rule, or a ruleset.
     */
    private boolean getNextStatement() throws IOException {
        unitBuffer.setLength(0);

        int token = nextToken((char) 0);

        switch (token) {
            case IDENTIFIER:
                if (tokenBufferLength > 0) {
                    if (tokenBuffer[0] == '@') {
                        parseAtRule();
                    } else {
                        encounteredRuleSet = true;
                        parseRuleSet();
                    }
                }
                return true;
            case BRACKET_OPEN:
            case BRACE_OPEN:
            case PAREN_OPEN:
                parseTillClosed(token);
                return true;

            case BRACKET_CLOSE:
            case BRACE_CLOSE:
            case PAREN_CLOSE:
                // Shouldn't happen...
                throw new RuntimeException("Unexpected top level block close");

            case END:
                return false;
        }
        return true;
    }

    /**
     * Parses an @ rule, stopping at a matching brace pair, or ;.
     */
    private void parseAtRule() throws IOException {
        // PENDING: make this more effecient.
        boolean isImport = (tokenBufferLength == 7 &&
                tokenBuffer[0] == '@' && tokenBuffer[1] == 'i' &&
                tokenBuffer[2] == 'm' && tokenBuffer[3] == 'p' &&
                tokenBuffer[4] == 'o' && tokenBuffer[5] == 'r' &&
                tokenBuffer[6] == 't');

        unitBuffer.setLength(0);
        boolean done = false;
        while (!done) {
            int nextToken = nextToken(';');

            switch (nextToken) {
                case IDENTIFIER:
                    if (tokenBufferLength > 0 &&
                            tokenBuffer[tokenBufferLength - 1] == ';') {
                        --tokenBufferLength;
                        done = true;
                    }
                    if (tokenBufferLength > 0) {
                        if (unitBuffer.length() > 0 && readWS) {
                            unitBuffer.append(' ');
                        }
                        unitBuffer.append(tokenBuffer, 0, tokenBufferLength);
                    }
                    break;

                case BRACE_OPEN:
                    if (unitBuffer.length() > 0 && readWS) {
                        unitBuffer.append(' ');
                    }
                    unitBuffer.append(charMapping[nextToken]);
                    parseTillClosed(nextToken);
                    done = true;
                    // Skip a tailing ';', not really to spec.
                    {
                        int nextChar = readWS();
                        if (nextChar != -1 && nextChar != ';') {
                            pushChar(nextChar);
                        }
                    }
                    break;

                case BRACKET_OPEN:
                case PAREN_OPEN:
                    unitBuffer.append(charMapping[nextToken]);
                    parseTillClosed(nextToken);
                    break;

                case BRACKET_CLOSE:
                case BRACE_CLOSE:
                case PAREN_CLOSE:
                    throw new RuntimeException("Unexpected close in @ rule");

                case END:
                    done = true;
                    break;
            }
        }
        if (isImport && !encounteredRuleSet) {
            callback.handleImport(unitBuffer.toString());
        }
    }

    /**
     * Parses the next rule set, which is a selector followed by a
     * declaration block.
     */
    private void parseRuleSet() throws IOException {
        if (parseSelectors()) {
            callback.startRule();
            parseDeclarationBlock();
            callback.endRule();
        }
    }

    /**
     * Parses a set of selectors, returning false if the end of the stream
     * is reached.
     */
    private boolean parseSelectors() throws IOException {
        // Parse the selectors

        if (tokenBufferLength > 0) {
            callback.handleSelector(new String(tokenBuffer, 0,
                    tokenBufferLength));
        }

        unitBuffer.setLength(0);
        for (; ;) {
            int nextToken;
            while ((nextToken = nextToken((char) 0)) == IDENTIFIER) {
                if (tokenBufferLength > 0) {
                    callback.handleSelector(new String(tokenBuffer, 0,
                            tokenBufferLength));
                }
            }
            switch (nextToken) {
                case BRACE_OPEN:
                    return true;

                case BRACKET_OPEN:
                case PAREN_OPEN:
                    parseTillClosed(nextToken);
                    // Not too sure about this, how we handle this isn't very
                    // well spec'd.
                    unitBuffer.setLength(0);
                    break;

                case BRACKET_CLOSE:
                case BRACE_CLOSE:
                case PAREN_CLOSE:
                    throw new RuntimeException("Unexpected block close in selector");

                case END:
                    // Prematurely hit end.
                    return false;
            }
        }
    }

    /**
     * Parses a declaration block. Which a number of declarations followed
     * by a })].
     */
    private void parseDeclarationBlock() throws IOException {
        for (; ;) {
            int token = parseDeclaration();
            switch (token) {
                case END:
                case BRACE_CLOSE:
                    return;

                case BRACKET_CLOSE:
                case PAREN_CLOSE:
                    // Bail
                    throw new RuntimeException("Unexpected close in declaration block");
                case IDENTIFIER:
                    break;
            }
        }
    }

    /**
     * Parses a single declaration, which is an identifier a : and another
     * identifier. This returns the last token seen.
     */
    // identifier+: identifier* ;|}
    private int parseDeclaration() throws IOException {
        int token;

        if ((token = parseIdentifiers(':', false)) != IDENTIFIER) {
            return token;
        }
        // Make the property name to lowercase
        for (int counter = unitBuffer.length() - 1; counter >= 0; counter--) {
            unitBuffer.setCharAt(counter, Character.toLowerCase
                    (unitBuffer.charAt(counter)));
        }
        callback.handleProperty(unitBuffer.toString());

        token = parseIdentifiers(';', true);
        callback.handleValue(unitBuffer.toString());
        return token;
    }

    /**
     * Parses identifiers until extraChar is encountered,
     * returning the ending token, which will be IDENTIFIER if extraChar
     * is found.
     */
    private int parseIdentifiers(char extraChar,
                                 boolean wantsBlocks) throws IOException {

        unitBuffer.setLength(0);
        for (; ;) {
            int nextToken = nextToken(extraChar);

            int ubl;
            switch (nextToken) {
                case IDENTIFIER:
                    if (tokenBufferLength > 0) {
                        if (tokenBuffer[tokenBufferLength - 1] == extraChar) {
                            if (--tokenBufferLength > 0) {
                                if (readWS && unitBuffer.length() > 0) {
                                    unitBuffer.append(' ');
                                }
                                unitBuffer.append(tokenBuffer, 0,
                                        tokenBufferLength);
                            }
                            return IDENTIFIER;
                        }
                        if (readWS && unitBuffer.length() > 0) {
                            unitBuffer.append(' ');
                        }
                        unitBuffer.append(tokenBuffer, 0, tokenBufferLength);
                    }
                    break;

                case BRACKET_OPEN:
                case BRACE_OPEN:
                case PAREN_OPEN:
                    ubl = unitBuffer.length();
                    if (wantsBlocks) {
                        unitBuffer.append(charMapping[nextToken]);
                    }
                    parseTillClosed(nextToken);
                    if (!wantsBlocks) {
                        unitBuffer.setLength(ubl);
                    }
                    break;

                case BRACE_CLOSE:
                    // No need to throw for these two, we return token and
                    // caller can do whatever.
                case BRACKET_CLOSE:
                case PAREN_CLOSE:
                case END:
                    // Hit the end
                    return nextToken;
            }
        }
    }

    /**
     * Parses till a matching block close is encountered. This is only
     * appropriate to be called at the top level (no nesting).
     */
    private void parseTillClosed(int openToken) throws IOException {

        startBlock(openToken);
        boolean done = false;
        while (!done) {
            int nextToken = nextToken((char) 0);
            switch (nextToken) {
                case IDENTIFIER:
                    if (unitBuffer.length() > 0 && readWS) {
                        unitBuffer.append(' ');
                    }
                    if (tokenBufferLength > 0) {
                        unitBuffer.append(tokenBuffer, 0, tokenBufferLength);
                    }
                    break;

                case BRACKET_OPEN:
                case BRACE_OPEN:
                case PAREN_OPEN:
                    if (unitBuffer.length() > 0 && readWS) {
                        unitBuffer.append(' ');
                    }
                    unitBuffer.append(charMapping[nextToken]);
                    startBlock(nextToken);
                    break;

                case BRACKET_CLOSE:
                case BRACE_CLOSE:
                case PAREN_CLOSE:
                    if (unitBuffer.length() > 0 && readWS) {
                        unitBuffer.append(' ');
                    }
                    unitBuffer.append(charMapping[nextToken]);
                    endBlock(nextToken);
                    if (!inBlock()) {
                        done = true;
                    }
                    break;

                case END:
                    // Prematurely hit end.
                    throw new RuntimeException("Unclosed block");
            }
        }
    }

    /**
     * Fetches the next token.
     */
    private int nextToken(char idChar) throws IOException {
        readWS = false;

        int nextChar = readWS();

        switch (nextChar) {
            case '\'':
                readTill('\'');
                if (tokenBufferLength > 0) {
                    tokenBufferLength--;
                }
                return IDENTIFIER;
            case '"':
                readTill('"');
                if (tokenBufferLength > 0) {
                    tokenBufferLength--;
                }
                return IDENTIFIER;
            case '[':
                return BRACKET_OPEN;
            case ']':
                return BRACKET_CLOSE;
            case '{':
                return BRACE_OPEN;
            case '}':
                return BRACE_CLOSE;
            case '(':
                return PAREN_OPEN;
            case ')':
                return PAREN_CLOSE;
            case -1:
                return END;
            default:
                pushChar(nextChar);
                getIdentifier(idChar);
                return IDENTIFIER;
        }
    }

    /**
     * Gets an identifier, returning true if the length of the string is greater than 0,
     * stopping when stopChar, whitespace, or one of {}()[] is
     * hit.
     */
    // NOTE: this could be combined with readTill, as they contain somewhat
    // similiar functionality.
    private boolean getIdentifier(char stopChar) throws IOException {
        int intStopChar = (int) stopChar;
        // 1 for '\', 2 for valid escape char [0-9a-fA-F], 3 for
        // stop character (white space, ()[]{}) 0 otherwise

        tokenBufferLength = 0;
        int escapeOffset = 0;
        int escapeChar = 0;
        int escapeCount = 0;
        boolean done = false;
        boolean lastWasEscape = false;
        while (!done) {
            int nextChar = readChar();
            short type;
            switch (nextChar) {
                case '\\':
                    type = 1;
                    break;

                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    type = 2;
                    escapeOffset = nextChar - '0';
                    break;

                case 'a':
                case 'b':
                case 'c':
                case 'd':
                case 'e':
                case 'f':
                    type = 2;
                    escapeOffset = nextChar - 'a' + 10;
                    break;

                case 'A':
                case 'B':
                case 'C':
                case 'D':
                case 'E':
                case 'F':
                    type = 2;
                    escapeOffset = nextChar - 'A' + 10;
                    break;

                case '\'':
                case '"':
                case '[':
                case ']':
                case '{':
                case '}':
                case '(':
                case ')':
                case ' ':
                case '\n':
                case '\t':
                case '\r':
                    type = 3;
                    break;

                case '/':
                    type = 4;
                    break;

                case -1:
                    // Reached the end
                    done = true;
                    type = 0;
                    break;

                default:
                    type = 0;
                    break;
            }
            if (lastWasEscape) {
                if (type == 2) {
                    // Continue with escape.
                    escapeChar = escapeChar * 16 + escapeOffset;
                    if (++escapeCount == 4) {
                        lastWasEscape = false;
                        append((char) escapeChar);
                    }
                } else {
                    // no longer escaped
                    lastWasEscape = false;
                    if (escapeCount > 0) {
                        append((char) escapeChar);
                        // Make this simpler, reprocess the character.
                        pushChar(nextChar);
                    } else if (!done) {
                        append((char) nextChar);
                    }
                }
            } else if (!done) {
                if (type == 1) {
                    lastWasEscape = true;
                    escapeChar = escapeCount = 0;
                } else if (type == 3) {
                    done = true;
                    pushChar(nextChar);
                } else if (type == 4) {
                    // Potential comment
                    nextChar = readChar();
                    if (nextChar == '*') {
                        done = true;
                        readComment();
                        readWS = true;
                    } else {
                        append('/');
                        if (nextChar == -1) {
                            done = true;
                        } else {
                            pushChar(nextChar);
                        }
                    }
                } else {
                    append((char) nextChar);
                    if (nextChar == intStopChar) {
                        done = true;
                    }
                }
            }
        }
        return (tokenBufferLength > 0);
    }

    /**
     * Reads till a stopChar is encountered, escaping characters
     * as necessary.
     */
    private void readTill(char stopChar) throws IOException {
        int intStopChar = (int) stopChar;
        // 1 for '\', 2 for valid escape char [0-9a-fA-F], 0 otherwise

        tokenBufferLength = 0;
        int escapeOffset = 0;
        boolean done = false;
        int escapeChar = 0;
        int escapeCount = 0;
        boolean lastWasEscape = false;
        while (!done) {
            int nextChar = readChar();
            short type;
            switch (nextChar) {
                case '\\':
                    type = 1;
                    break;

                case '0':
                case '1':
                case '2':
                case '3':
                case '4':
                case '5':
                case '6':
                case '7':
                case '8':
                case '9':
                    type = 2;
                    escapeOffset = nextChar - '0';
                    break;

                case 'a':
                case 'b':
                case 'c':
                case 'd':
                case 'e':
                case 'f':
                    type = 2;
                    escapeOffset = nextChar - 'a' + 10;
                    break;

                case 'A':
                case 'B':
                case 'C':
                case 'D':
                case 'E':
                case 'F':
                    type = 2;
                    escapeOffset = nextChar - 'A' + 10;
                    break;

                case -1:
                    // Prematurely reached the end!
                    throw new RuntimeException("Unclosed " + stopChar);

                default:
                    type = 0;
                    break;
            }
            if (lastWasEscape) {
                if (type == 2) {
                    // Continue with escape.
                    escapeChar = escapeChar * 16 + escapeOffset;
                    if (++escapeCount == 4) {
                        lastWasEscape = false;
                        append((char) escapeChar);
                    }
                } else {
                    // no longer escaped
                    if (escapeCount > 0) {
                        append((char) escapeChar);
                        if (type == 1) {
                            lastWasEscape = true;
                            escapeChar = escapeCount = 0;
                        } else {
                            if (nextChar == intStopChar) {
                                done = true;
                            }
                            append((char) nextChar);
                            lastWasEscape = false;
                        }
                    } else {
                        append((char) nextChar);
                        lastWasEscape = false;
                    }
                }
            } else if (type == 1) {
                lastWasEscape = true;
                escapeChar = escapeCount = 0;
            } else {
                if (nextChar == intStopChar) {
                    done = true;
                }
                append((char) nextChar);
            }
        }
    }

    private void append(char character) {
        if (tokenBufferLength == tokenBuffer.length) {
            char[] newBuffer = new char[tokenBuffer.length * 2];
            System.arraycopy(tokenBuffer, 0, newBuffer, 0, tokenBuffer.length);
            tokenBuffer = newBuffer;
        }
        tokenBuffer[tokenBufferLength++] = character;
    }

    /**
     * Parses a comment block.
     */
    private void readComment() throws IOException {

        for (; ;) {
            int nextChar = readChar();
            switch (nextChar) {
                case -1:
                    throw new RuntimeException("Unclosed comment");
                case '*':
                    nextChar = readChar();
                    if (nextChar == '/') {
                        return;
                    }
                    if (nextChar == -1) {
                        throw new RuntimeException("Unclosed comment");
                    }
                    pushChar(nextChar);
                    break;
                default:
                    break;
            }
        }
    }

    /**
     * Called when a block start is encountered ({[.
     */
    private void startBlock(int startToken) {
        if (stackCount == unitStack.length) {
            int[] newUS = new int[stackCount * 2];

            System.arraycopy(unitStack, 0, newUS, 0, stackCount);
            unitStack = newUS;
        }
        unitStack[stackCount++] = startToken;
    }

    /**
     * Called when an end block is encountered )]}
     */
    private void endBlock(int endToken) {
        int startToken;

        switch (endToken) {
            case BRACKET_CLOSE:
                startToken = BRACKET_OPEN;
                break;
            case BRACE_CLOSE:
                startToken = BRACE_OPEN;
                break;
            case PAREN_CLOSE:
                startToken = PAREN_OPEN;
                break;
            default:
                // Will never happen.
                startToken = -1;
                break;
        }
        if (stackCount > 0 && unitStack[stackCount - 1] == startToken) {
            stackCount--;
        } else {
            // Invalid state, should do something.
            throw new RuntimeException("Unmatched block");
        }
    }

    /**
     * @return true if currently in a block.
     */
    private boolean inBlock() {
        return (stackCount > 0);
    }

    /**
     * Skips any white space, returning the character after the white space.
     */
    private int readWS() throws IOException {
        int nextChar;
        while ((nextChar = readChar()) != -1 &&
                Character.isWhitespace((char) nextChar)) {
            readWS = true;
        }
        return nextChar;
    }

    /**
     * Reads a character from the stream.
     */
    private int readChar() throws IOException {
        if (didPushChar) {
            didPushChar = false;
            return pushedChar;
        }
        return reader.read();
        // Uncomment the following to do case insensitive parsing.
        /*
        if (retValue != -1) {
            return (int)Character.toLowerCase((char)retValue);
        }
        return retValue;
        */
    }

    /**
     * Supports one character look ahead, this will throw if called twice
     * in a row.
     */
    private void pushChar(int tempChar) {
        if (didPushChar) {
            // Should never happen.
            throw new RuntimeException("Can not handle look ahead of more than one character");
        }
        didPushChar = true;
        pushedChar = tempChar;
    }
}