net.sf.jett.parser.StyleScanner Maven / Gradle / Ivy

package net.sf.jett.parser;

/**
 * A StyleScanner scans CSS text and returns tokens.
 *
 * @author Randy Gettman
 * @since 0.5.0
 */
public class StyleScanner
{
    /**
     * Enumeration for the different types of Tokens in "CSS".
     */
    public enum Token
    {
        TOKEN_ERROR_EOI_IN_COMMENT(-3),
        TOKEN_ERROR_BUF_NULL(-2),
        TOKEN_UNKNOWN(-1),
        TOKEN_WHITESPACE(0),
        TOKEN_STRING(1),
        TOKEN_COLON(11),
        TOKEN_PERIOD(12),
        TOKEN_BEGIN_BRACE(13),
        TOKEN_END_BRACE(14),
        TOKEN_SEMICOLON(15),
        TOKEN_COMMENT(98),
        TOKEN_EOI(99);

        private int myCode;

        // Create a token with a code.
        private Token(int code)
        {
            myCode = code;
        }

        /**
         * Returns the unique code associated with this Token.
         * @return The unique code.
         */
        public int getCode()
        {
            return myCode;
        }
    }
    private static final String PUNCT_CHARS_NOT_AS_STRING = ":.{};/*";

    private String myCssText;
    private int myOffset;
    private String myCurrLexeme;

    /**
     * Construct a StyleScanner object, with empty input.
     */
    public StyleScanner()
    {
        this("");
    }

    /**
     * Construct a StyleScanner object, with the given input.
     * @param cssText The CSS text to scan.
     */
    public StyleScanner(String cssText)
    {
        setCssText(cssText);
    }

    /**
     * Returns the Token.  After this call completes, the current
     * lexeme is available via a call to getCurrLexeme.
     * Starts looking at the current offset, and once the token is found, then
     * the offset is advanced to the start of the next token.
     * @return A Token.
     * @see #getCurrLexeme
     */
    public Token getNextToken()
    {
        int iStartOfToken = myOffset;
        int iTokenLength = 0;
        Token tokenType = Token.TOKEN_UNKNOWN;

        // EOI test.
        if (iStartOfToken >= myCssText.length())
        {
            // End of input string.
            return Token.TOKEN_EOI;
        }
        if (myCssText.charAt(iStartOfToken) == '/' &&
                (iStartOfToken + 1) < myCssText.length() && myCssText.charAt(iStartOfToken + 1) == '*')
        {
            // Comment.
            // Skip everything until "*/" found, or error if not found.
            iTokenLength += 2;
            boolean endOfCommentFound = false;
            while ((iStartOfToken + iTokenLength) < myCssText.length())
            {
                if (myCssText.charAt(iStartOfToken + iTokenLength) == '*' &&
                        (iStartOfToken + iTokenLength + 1 < myCssText.length()) && myCssText.charAt(iStartOfToken + iTokenLength + 1) == '/')
                {
                    iTokenLength += 2;
                    endOfCommentFound = true;
                    break;
                }
                iTokenLength++;
            }
            if (!endOfCommentFound)
            {
                myCurrLexeme = null;
                return Token.TOKEN_ERROR_EOI_IN_COMMENT;
            }
            myOffset += iTokenLength;
            iStartOfToken = myOffset;
            iTokenLength = 0;
        }
        // First char starts a string consisting of letters, numbers, and
        // all but a few punctuation characters.
        if ((iStartOfToken + iTokenLength) < myCssText.length() &&
                !Character.isWhitespace(myCssText.charAt(iStartOfToken + iTokenLength)) &&
                PUNCT_CHARS_NOT_AS_STRING.indexOf(myCssText.charAt(iStartOfToken + iTokenLength)) == -1)
        {
            // String mode.
            while ((iStartOfToken + iTokenLength) < myCssText.length() &&
                    !Character.isWhitespace(myCssText.charAt(iStartOfToken + iTokenLength)) &&
                    PUNCT_CHARS_NOT_AS_STRING.indexOf(myCssText.charAt(iStartOfToken + iTokenLength)) == -1)
            {
                iTokenLength++;
            }
            tokenType = Token.TOKEN_STRING;
        }
        else if (myCssText.charAt(iStartOfToken) == ':')
        {
            // Colon.
            iTokenLength = 1;
            tokenType = Token.TOKEN_COLON;
        }
        else if (myCssText.charAt(iStartOfToken) == '.')
        {
            // Period.
            iTokenLength = 1;
            tokenType = Token.TOKEN_PERIOD;
        }
        else if (myCssText.charAt(iStartOfToken) == '}')
        {
            // End brace.
            iTokenLength = 1;
            tokenType = Token.TOKEN_END_BRACE;
        }
        else if (myCssText.charAt(iStartOfToken) == '{')
        {
            // Begin brace.
            iTokenLength = 1;
            tokenType = Token.TOKEN_BEGIN_BRACE;
        }
        else if (myCssText.charAt(iStartOfToken) == ';')
        {
            // Semicolon.
            iTokenLength = 1;
            tokenType = Token.TOKEN_SEMICOLON;
        }
        else if (Character.isWhitespace(myCssText.charAt(iStartOfToken)))
        {
            // Whitespace.
            while ((iStartOfToken + iTokenLength) < myCssText.length() &&
                    Character.isWhitespace(myCssText.charAt(iStartOfToken + iTokenLength)))
                iTokenLength++;
            tokenType = Token.TOKEN_WHITESPACE;
        }

        // Note down lexeme for access later.
        myCurrLexeme = myCssText.substring(iStartOfToken, iStartOfToken + iTokenLength);

        // Update the offset.
        myOffset += iTokenLength;

        return tokenType;
    }

    /**
     * Returns the current lexeme after a call to getNextToken.
     * @return The current lexeme, or null if
     *    getNextToken hasn't been called yet after a reset.
     * @see #getNextToken
     * @see #reset
     */
    public String getCurrLexeme()
    {
        return myCurrLexeme;
    }

    /**
     * Returns the current position of the next token.
     * @return The current position of the next token.
     */
    public int getNextPosition()
    {
        return myOffset;
    }

    /**
     * Resets the scanner to the beginning of the CSS text.
     */
    public void reset()
    {
        myOffset = 0;
        myCurrLexeme = null;
    }

    /**
     * Give the StyleScanner another CSS text to scan.
     * Resets to the beginning of the string.
     * @param cssText The css Text to scan.
     */
    public void setCssText(String cssText)
    {
        myCssText = cssText;
        reset();
    }
}