All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.gwt.dev.js.rhino.TokenStream Maven / Gradle / Ivy

There is a newer version: 2.1.0-Beta1
Show newest version
/* -*- Mode: java; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
 *
 * The contents of this file are subject to the Netscape Public
 * License Version 1.1 (the "License"); you may not use this file
 * except in compliance with the License. You may obtain a copy of
 * the License at http://www.mozilla.org/NPL/
 *
 * Software distributed under the License is distributed on an "AS
 * IS" basis, WITHOUT WARRANTY OF ANY KIND, either express or
 * implied. See the License for the specific language governing
 * rights and limitations under the License.
 *
 * The Original Code is Rhino code, released
 * May 6, 1999.
 *
 * The Initial Developer of the Original Code is Netscape
 * Communications Corporation.  Portions created by Netscape are
 * Copyright (C) 1997-1999 Netscape Communications Corporation. All
 * Rights Reserved.
 *
 * Contributor(s):
 * Roger Lawrence
 * Mike McCabe
 *
 * Alternatively, the contents of this file may be used under the
 * terms of the GNU Public License (the "GPL"), in which case the
 * provisions of the GPL are applicable instead of those above.
 * If you wish to allow use of your version of this file only
 * under the terms of the GPL and not to allow others to use your
 * version of this file under the NPL, indicate your decision by
 * deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL.  If you do not delete
 * the provisions above, a recipient may use your version of this
 * file under either the NPL or the GPL.
 */
// Modified by Google

package com.google.gwt.dev.js.rhino;

import java.io.*;
import java.util.HashMap;
import java.util.Map;

/**
 * This class implements the JavaScript scanner.
 *
 * It is based on the C source files jsscan.c and jsscan.h
 * in the jsref package.
 */

public class TokenStream {

    private static final Map KEYWORDS = new HashMap();

    /*
     * JSTokenStream flags, mirroring those in jsscan.h.  These are used
     * by the parser to change/check the state of the scanner.
     */

    final static int
        TSF_NEWLINES    = 1 << 0,  // tokenize newlines
        TSF_FUNCTION    = 1 << 1,  // scanning inside function body
        TSF_RETURN_EXPR = 1 << 2,  // function has 'return expr;'
        TSF_RETURN_VOID = 1 << 3,  // function has 'return;'
        TSF_REGEXP      = 1 << 4,  // looking for a regular expression
        TSF_DIRTYLINE   = 1 << 5;  // stuff other than whitespace since
                                   // start of line

    /*
     * For chars - because we need something out-of-range
     * to check.  (And checking EOF by exception is annoying.)
     * Note distinction from EOF token type!
     */
    private final static int
        EOF_CHAR = -1;

    /**
     * Token types.  These values correspond to JSTokenType values in
     * jsscan.c.
     */

    public final static int
    // start enum
        ERROR       = -1, // well-known as the only code < EOF
        EOF         = 0,  // end of file token - (not EOF_CHAR)
        EOL         = 1,  // end of line
        // Beginning here are interpreter bytecodes. Their values
        // must not exceed 127.
        POPV        = 2,
        ENTERWITH   = 3,
        LEAVEWITH   = 4,
        RETURN      = 5,
        GOTO        = 6,
        IFEQ        = 7,
        IFNE        = 8,
        DUP         = 9,
        SETNAME     = 10,
        BITOR       = 11,
        BITXOR      = 12,
        BITAND      = 13,
        EQ          = 14,
        NE          = 15,
        LT          = 16,
        LE          = 17,
        GT          = 18,
        GE          = 19,
        LSH         = 20,
        RSH         = 21,
        URSH        = 22,
        ADD         = 23,
        SUB         = 24,
        MUL         = 25,
        DIV         = 26,
        MOD         = 27,
        BITNOT      = 28,
        NEG         = 29,
        NEW         = 30,
        DELPROP     = 31,
        TYPEOF      = 32,
        NAMEINC     = 33,
        PROPINC     = 34,
        ELEMINC     = 35,
        NAMEDEC     = 36,
        PROPDEC     = 37,
        ELEMDEC     = 38,
        GETPROP     = 39,
        SETPROP     = 40,
        GETELEM     = 41,
        SETELEM     = 42,
        CALL        = 43,
        NAME        = 44,
        NUMBER      = 45,
        STRING      = 46,
        ZERO        = 47,
        ONE         = 48,
        NULL        = 49,
        THIS        = 50,
        FALSE       = 51,
        TRUE        = 52,
        SHEQ        = 53,   // shallow equality (===)
        SHNE        = 54,   // shallow inequality (!==)
        CLOSURE     = 55,
        REGEXP      = 56,
        POP         = 57,
        POS         = 58,
        VARINC      = 59,
        VARDEC      = 60,
        BINDNAME    = 61,
        THROW       = 62,
        IN          = 63,
        INSTANCEOF  = 64,
        GOSUB       = 65,
        RETSUB      = 66,
        CALLSPECIAL = 67,
        GETTHIS     = 68,
        NEWTEMP     = 69,
        USETEMP     = 70,
        GETBASE     = 71,
        GETVAR      = 72,
        SETVAR      = 73,
        UNDEFINED   = 74,
        TRY         = 75,
        ENDTRY      = 76,
        NEWSCOPE    = 77,
        TYPEOFNAME  = 78,
        ENUMINIT    = 79,
        ENUMNEXT    = 80,
        GETPROTO    = 81,
        GETPARENT   = 82,
        SETPROTO    = 83,
        SETPARENT   = 84,
        SCOPE       = 85,
        GETSCOPEPARENT = 86,
        THISFN      = 87,
        JTHROW      = 88,
        // End of interpreter bytecodes
        SEMI        = 89,  // semicolon
        LB          = 90,  // left and right brackets
        RB          = 91,
        LC          = 92,  // left and right curlies (braces)
        RC          = 93,
        LP          = 94,  // left and right parentheses
        GWT          = 95,
        COMMA       = 96,  // comma operator
        ASSIGN      = 97, // assignment ops (= += -= etc.)
        HOOK        = 98, // conditional (?:)
        COLON       = 99,
        OR          = 100, // logical or (||)
        AND         = 101, // logical and (&&)
        EQOP        = 102, // equality ops (== !=)
        RELOP       = 103, // relational ops (< <= > >=)
        SHOP        = 104, // shift ops (<< >> >>>)
        UNARYOP     = 105, // unary prefix operator
        INC         = 106, // increment/decrement (++ --)
        DEC         = 107,
        DOT         = 108, // member operator (.)
        PRIMARY     = 109, // true, false, null, this
        FUNCTION    = 110, // function keyword
        EXPORT      = 111, // export keyword
        IMPORT      = 112, // import keyword
        IF          = 113, // if keyword
        ELSE        = 114, // else keyword
        SWITCH      = 115, // switch keyword
        CASE        = 116, // case keyword
        DEFAULT     = 117, // default keyword
        WHILE       = 118, // while keyword
        DO          = 119, // do keyword
        FOR         = 120, // for keyword
        BREAK       = 121, // break keyword
        CONTINUE    = 122, // continue keyword
        VAR         = 123, // var keyword
        WITH        = 124, // with keyword
        CATCH       = 125, // catch keyword
        FINALLY     = 126, // finally keyword

        /** Added by Mike - these are JSOPs in the jsref, but I
         * don't have them yet in the java implementation...
         * so they go here.  Also whatever I needed.

         * Most of these go in the 'op' field when returning
         * more general token types, eg. 'DIV' as the op of 'ASSIGN'.
         */
        NOP         = 128, // NOP
        NOT         = 129, // etc.
        PRE         = 130, // for INC, DEC nodes.
        POST        = 131,

        /**
         * For JSOPs associated with keywords...
         * eg. op = THIS; token = PRIMARY
         */

        VOID        = 132,

        /* types used for the parse tree - these never get returned
         * by the scanner.
         */
        BLOCK       = 133, // statement block
        ARRAYLIT    = 134, // array literal
        OBJLIT      = 135, // object literal
        LABEL       = 136, // label
        TARGET      = 137,
        LOOP        = 138,
        ENUMDONE    = 139,
        EXPRSTMT    = 140,
        PARENT      = 141,
        CONVERT     = 142,
        JSR         = 143,
        NEWLOCAL    = 144,
        USELOCAL    = 145,
        DEBUGGER    = 146,
        SCRIPT      = 147,   // top-level node for entire script

        LAST_TOKEN  = 147,
        NUMBER_INT  = 148,
    
        // This value is only used as a return value for getTokenHelper,
        // which is only called from getToken and exists to avoid an excessive
        // recursion problem if a number of lines in a row are comments.
        RETRY_TOKEN     = 65535;

    // end enum


    public static String tokenToName(int token) {
        if (Context.printTrees || Context.printICode) {
            switch (token) {
                case ERROR:           return "error";
                case EOF:             return "eof";
                case EOL:             return "eol";
                case POPV:            return "popv";
                case ENTERWITH:       return "enterwith";
                case LEAVEWITH:       return "leavewith";
                case RETURN:          return "return";
                case GOTO:            return "goto";
                case IFEQ:            return "ifeq";
                case IFNE:            return "ifne";
                case DUP:             return "dup";
                case SETNAME:         return "setname";
                case BITOR:           return "bitor";
                case BITXOR:          return "bitxor";
                case BITAND:          return "bitand";
                case EQ:              return "eq";
                case NE:              return "ne";
                case LT:              return "lt";
                case LE:              return "le";
                case GT:              return "gt";
                case GE:              return "ge";
                case LSH:             return "lsh";
                case RSH:             return "rsh";
                case URSH:            return "ursh";
                case ADD:             return "add";
                case SUB:             return "sub";
                case MUL:             return "mul";
                case DIV:             return "div";
                case MOD:             return "mod";
                case BITNOT:          return "bitnot";
                case NEG:             return "neg";
                case NEW:             return "new";
                case DELPROP:         return "delprop";
                case TYPEOF:          return "typeof";
                case NAMEINC:         return "nameinc";
                case PROPINC:         return "propinc";
                case ELEMINC:         return "eleminc";
                case NAMEDEC:         return "namedec";
                case PROPDEC:         return "propdec";
                case ELEMDEC:         return "elemdec";
                case GETPROP:         return "getprop";
                case SETPROP:         return "setprop";
                case GETELEM:         return "getelem";
                case SETELEM:         return "setelem";
                case CALL:            return "call";
                case NAME:            return "name";
                case NUMBER_INT:      return "integer";
                case NUMBER:          return "double";
                case STRING:          return "string";
                case ZERO:            return "zero";
                case ONE:             return "one";
                case NULL:            return "null";
                case THIS:            return "this";
                case FALSE:           return "false";
                case TRUE:            return "true";
                case SHEQ:            return "sheq";
                case SHNE:            return "shne";
                case CLOSURE:         return "closure";
                case REGEXP:          return "object";
                case POP:             return "pop";
                case POS:             return "pos";
                case VARINC:          return "varinc";
                case VARDEC:          return "vardec";
                case BINDNAME:        return "bindname";
                case THROW:           return "throw";
                case IN:              return "in";
                case INSTANCEOF:      return "instanceof";
                case GOSUB:           return "gosub";
                case RETSUB:          return "retsub";
                case CALLSPECIAL:     return "callspecial";
                case GETTHIS:         return "getthis";
                case NEWTEMP:         return "newtemp";
                case USETEMP:         return "usetemp";
                case GETBASE:         return "getbase";
                case GETVAR:          return "getvar";
                case SETVAR:          return "setvar";
                case UNDEFINED:       return "undefined";
                case TRY:             return "try";
                case ENDTRY:          return "endtry";
                case NEWSCOPE:        return "newscope";
                case TYPEOFNAME:      return "typeofname";
                case ENUMINIT:        return "enuminit";
                case ENUMNEXT:        return "enumnext";
                case GETPROTO:        return "getproto";
                case GETPARENT:       return "getparent";
                case SETPROTO:        return "setproto";
                case SETPARENT:       return "setparent";
                case SCOPE:           return "scope";
                case GETSCOPEPARENT:  return "getscopeparent";
                case THISFN:          return "thisfn";
                case JTHROW:          return "jthrow";
                case SEMI:            return "semi";
                case LB:              return "lb";
                case RB:              return "rb";
                case LC:              return "lc";
                case RC:              return "rc";
                case LP:              return "lp";
                case GWT:              return "gwt";
                case COMMA:           return "comma";
                case ASSIGN:          return "assign";
                case HOOK:            return "hook";
                case COLON:           return "colon";
                case OR:              return "or";
                case AND:             return "and";
                case EQOP:            return "eqop";
                case RELOP:           return "relop";
                case SHOP:            return "shop";
                case UNARYOP:         return "unaryop";
                case INC:             return "inc";
                case DEC:             return "dec";
                case DOT:             return "dot";
                case PRIMARY:         return "primary";
                case FUNCTION:        return "function";
                case EXPORT:          return "export";
                case IMPORT:          return "import";
                case IF:              return "if";
                case ELSE:            return "else";
                case SWITCH:          return "switch";
                case CASE:            return "case";
                case DEFAULT:         return "default";
                case WHILE:           return "while";
                case DO:              return "do";
                case FOR:             return "for";
                case BREAK:           return "break";
                case CONTINUE:        return "continue";
                case VAR:             return "var";
                case WITH:            return "with";
                case CATCH:           return "catch";
                case FINALLY:         return "finally";
                case NOP:             return "nop";
                case NOT:             return "not";
                case PRE:             return "pre";
                case POST:            return "post";
                case VOID:            return "void";
                case BLOCK:           return "block";
                case ARRAYLIT:        return "arraylit";
                case OBJLIT:          return "objlit";
                case LABEL:           return "label";
                case TARGET:          return "target";
                case LOOP:            return "loop";
                case ENUMDONE:        return "enumdone";
                case EXPRSTMT:        return "exprstmt";
                case PARENT:          return "parent";
                case CONVERT:         return "convert";
                case JSR:             return "jsr";
                case NEWLOCAL:        return "newlocal";
                case USELOCAL:        return "uselocal";
                case SCRIPT:          return "script";
            }
            return "";
        }
        return "";
    }

    /* This function uses the cached op, string and number fields in
     * TokenStream; if getToken has been called since the passed token
     * was scanned, the op or string printed may be incorrect.
     */
    public String tokenToString(int token) {
        if (Context.printTrees) {
            String name = tokenToName(token);

            switch (token) {
                case UNARYOP:
                case ASSIGN:
                case PRIMARY:
                case EQOP:
                case SHOP:
                case RELOP:
                    return name + " " + tokenToName(this.op);

                case STRING:
                case REGEXP:
                case NAME:
                    return name + " `" + this.string + "'";

                case NUMBER_INT:
                    return "NUMBER_INT " + (int) this.number;
                case NUMBER:
                    return "NUMBER " + this.number;
            }

            return name;
        }
        return "";
    }

    static {
        KEYWORDS.put("break", BREAK);
        KEYWORDS.put("case", CASE);
        KEYWORDS.put("continue", CONTINUE);
        KEYWORDS.put("default", DEFAULT);
        KEYWORDS.put("delete", DELPROP);
        KEYWORDS.put("do", DO);
        KEYWORDS.put("else", ELSE);
        KEYWORDS.put("export", EXPORT);
        KEYWORDS.put("false", PRIMARY | (FALSE << 8));
        KEYWORDS.put("for", FOR);
        KEYWORDS.put("function", FUNCTION);
        KEYWORDS.put("if", IF);
        KEYWORDS.put("in", RELOP | (IN << 8));
        KEYWORDS.put("new", NEW);
        KEYWORDS.put("null", PRIMARY | (NULL << 8));
        KEYWORDS.put("return", RETURN);
        KEYWORDS.put("switch", SWITCH);
        KEYWORDS.put("this", PRIMARY | (THIS << 8));
        KEYWORDS.put("true", PRIMARY | (TRUE << 8));
        KEYWORDS.put("typeof", UNARYOP | (TYPEOF << 8));
        KEYWORDS.put("var", VAR);
        KEYWORDS.put("void", UNARYOP | (VOID << 8));
        KEYWORDS.put("while", WHILE);
        KEYWORDS.put("with", WITH);
        KEYWORDS.put("catch", CATCH);
        KEYWORDS.put("debugger", DEBUGGER);
        KEYWORDS.put("finally", FINALLY);
        KEYWORDS.put("import", IMPORT);
        KEYWORDS.put("instanceof", RELOP | (INSTANCEOF << 8));
        KEYWORDS.put("throw", THROW);
        KEYWORDS.put("try", TRY);
    }
    
    private int stringToKeyword(String name) {
        Integer id = KEYWORDS.get(name);
        if (id == null) return EOF;

        this.op = id >> 8;
        return id & 0xff;
    }

    public TokenStream(Reader in,
                       String sourceName, int lineno)
    {
        this.in = new LineBuffer(in, lineno);
        this.pushbackToken = EOF;
        this.sourceName = sourceName;
        flags = 0;
        secondToLastPosition = new CodePosition(lineno, 0);
        lastPosition = new CodePosition(lineno, 0);
    }

    /* return and pop the token from the stream if it matches...
     * otherwise return null
     */
    public boolean matchToken(int toMatch) throws IOException {
        int token = getToken();
        if (token == toMatch)
            return true;

        // didn't match, push back token
        tokenno--;
        this.pushbackToken = token;
        return false;
    }

    public void ungetToken(int tt) {
        if (this.pushbackToken != EOF && tt != ERROR) {
            String message = Context.getMessage2("msg.token.replaces.pushback",
                tokenToString(tt), tokenToString(this.pushbackToken));
            throw new RuntimeException(message);
        }
        this.pushbackToken = tt;
        tokenno--;
    }

    public int peekToken() throws IOException {
        int result = getToken();

        this.pushbackToken = result;
        tokenno--;
        return result;
    }

    public int peekTokenSameLine() throws IOException {
        int result;

        flags |= TSF_NEWLINES;          // SCAN_NEWLINES from jsscan.h
        result = peekToken();
        flags &= ~TSF_NEWLINES;         // HIDE_NEWLINES from jsscan.h
        if (this.pushbackToken == EOL)
            this.pushbackToken = EOF;
        return result;
    }

    private static boolean isAlpha(int c) {
        return ((c >= 'a' && c <= 'z')
                || (c >= 'A' && c <= 'Z'));
    }

    static boolean isDigit(int c) {
        return (c >= '0' && c <= '9');
    }

    static int xDigitToInt(int c) {
        if ('0' <= c && c <= '9') { return c - '0'; }
        if ('a' <= c && c <= 'f') { return c - ('a' - 10); }
        if ('A' <= c && c <= 'F') { return c - ('A' - 10); }
        return -1;
    }

    /* As defined in ECMA.  jsscan.c uses C isspace() (which allows
     * \v, I think.)  note that code in in.read() implicitly accepts
     * '\r' == \u000D as well.
     */
    public static boolean isJSSpace(int c) {
        return (c == '\u0020' || c == '\u0009'
                || c == '\u000C' || c == '\u000B'
                || c == '\u00A0'
                || Character.getType((char)c) == Character.SPACE_SEPARATOR);
    }

    private void skipLine() throws IOException {
        // skip to end of line
        int c;
        while ((c = in.read()) != EOF_CHAR && c != '\n') { }
        in.unread();
    }

    public int getToken() throws IOException {
      int c;
      do {
        c = getTokenHelper();
      } while (c == RETRY_TOKEN);

      updatePosition();
      return c;
    }

    private int getTokenHelper() throws IOException {
        int c;
        tokenno++;

        // Check for pushed-back token
        if (this.pushbackToken != EOF) {
            int result = this.pushbackToken;
            this.pushbackToken = EOF;
            return result;
        }

        // Eat whitespace, possibly sensitive to newlines.
        do {
            c = in.read();
            if (c == '\n') {
                flags &= ~TSF_DIRTYLINE;
                if ((flags & TSF_NEWLINES) != 0)
                    break;
            }
        } while (isJSSpace(c) || c == '\n');

        if (c == EOF_CHAR)
            return EOF;
        if (c != '-' && c != '\n')
            flags |= TSF_DIRTYLINE;

        // identifier/keyword/instanceof?
        // watch out for starting with a 
        boolean identifierStart;
        boolean isUnicodeEscapeStart = false;
        if (c == '\\') {
            c = in.read();
            if (c == 'u') {
                identifierStart = true;
                isUnicodeEscapeStart = true;
                stringBufferTop = 0;
            } else {
                identifierStart = false;
                c = '\\';
                in.unread();
            }
        } else {
            identifierStart = Character.isJavaIdentifierStart((char)c);
            if (identifierStart) {
                stringBufferTop = 0;
                addToString(c);
            }
            
            // bruce: special handling of JSNI signatures
            // - it would be nice to handle Unicode escapes in the future
            //
            if (c == '@') {
              stringBufferTop = 0;
              addToString(c);
              return jsniMatchReference();
            }
        }

        if (identifierStart) {
            boolean containsEscape = isUnicodeEscapeStart;
            for (;;) {
                if (isUnicodeEscapeStart) {
                    // strictly speaking we should probably push-back
                    // all the bad characters if the uXXXX
                    // sequence is malformed. But since there isn't a
                    // correct context(is there?) for a bad Unicode
                    // escape sequence in an identifier, we can report
                    // an error here.
                    int escapeVal = 0;
                    for (int i = 0; i != 4; ++i) {
                        c = in.read();
                        escapeVal = (escapeVal << 4) | xDigitToInt(c);
                        // Next check takes care about c < 0 and bad escape
                        if (escapeVal < 0) { break; }
                    }
                    if (escapeVal < 0) {
                        reportTokenError("msg.invalid.escape", null);
                        return ERROR;
                    }
                    addToString(escapeVal);
                    isUnicodeEscapeStart = false;
                } else {
                    c = in.read();
                    if (c == '\\') {
                        c = in.read();
                        if (c == 'u') {
                            isUnicodeEscapeStart = true;
                            containsEscape = true;
                        } else {
                            reportTokenError("msg.illegal.character", null);
                            return ERROR;
                        }
                    } else {
                        if (!Character.isJavaIdentifierPart((char)c)) {
                            break;
                        }
                        addToString(c);
                    }
                }
            }
            in.unread();

               String str = getStringFromBuffer();
            if (!containsEscape) {
                // OPT we shouldn't have to make a string (object!) to
                // check if it's a keyword.

                // Return the corresponding token if it's a keyword
                int result = stringToKeyword(str);
                if (result != EOF) {
                    return result;
                }
            }
            this.string = str;
            return NAME;
        }

        // is it a number?
        if (isDigit(c) || (c == '.' && isDigit(in.peek()))) {

            stringBufferTop = 0;
            int base = 10;

            if (c == '0') {
                c = in.read();
                if (c == 'x' || c == 'X') {
                    base = 16;
                    c = in.read();
                } else if (isDigit(c)) {
                    base = 8;
                } else {
                    addToString('0');
                }
            }

            if (base == 16) {
                while (0 <= xDigitToInt(c)) {
                    addToString(c);
                    c = in.read();
                }
            } else {
                while ('0' <= c && c <= '9') {
                    /*
                     * We permit 08 and 09 as decimal numbers, which
                     * makes our behavior a superset of the ECMA
                     * numeric grammar.  We might not always be so
                     * permissive, so we warn about it.
                     */
                    if (base == 8 && c >= '8') {
                        Object[] errArgs = { c == '8' ? "8" : "9" };
                        reportTokenWarning("msg.bad.octal.literal", errArgs);
                        base = 10;
                    }
                    addToString(c);
                    c = in.read();
                }
            }

            boolean isInteger = true;

            if (base == 10 && (c == '.' || c == 'e' || c == 'E')) {
                isInteger = false;
                if (c == '.') {
                    do {
                        addToString(c);
                        c = in.read();
                    } while (isDigit(c));
                }
                if (c == 'e' || c == 'E') {
                    addToString(c);
                    c = in.read();
                    if (c == '+' || c == '-') {
                        addToString(c);
                        c = in.read();
                    }
                    if (!isDigit(c)) {
                        reportTokenError("msg.missing.exponent", null);
                        return ERROR;
                    }
                    do {
                        addToString(c);
                        c = in.read();
                    } while (isDigit(c));
                }
            }
            in.unread();
            String numString = getStringFromBuffer();

            double dval;
            if (base == 10 && !isInteger) {
                try {
                    // Use Java conversion to number from string...
                    dval = (Double.valueOf(numString)).doubleValue();
                }
                catch (NumberFormatException ex) {
                    Object[] errArgs = { ex.getMessage() };
                    reportTokenError("msg.caught.nfe", errArgs);
                    return ERROR;
                }
            } else {
                dval = ScriptRuntime.stringToNumber(numString, 0, base);
            }

            this.number = dval;

            if (isInteger) {
                return NUMBER_INT;
            }

            return NUMBER;
        }

        // is it a string?
        if (c == '"' || c == '\'') {
            // We attempt to accumulate a string the fast way, by
            // building it directly out of the reader.  But if there
            // are any escaped characters in the string, we revert to
            // building it out of a StringBuffer.

            int quoteChar = c;
            int val = 0;
            stringBufferTop = 0;

            c = in.read();
        strLoop: while (c != quoteChar) {
                if (c == '\n' || c == EOF_CHAR) {
                    in.unread();
                    reportTokenError("msg.unterminated.string.lit", null);
                    return ERROR;
                }

                if (c == '\\') {
                    // We've hit an escaped character

                    c = in.read();
                    switch (c) {
                        case 'b': c = '\b'; break;
                        case 'f': c = '\f'; break;
                        case 'n': c = '\n'; break;
                        case 'r': c = '\r'; break;
                        case 't': c = '\t'; break;

                        // \v a late addition to the ECMA spec,
                        // it is not in Java, so use 0xb
                        case 'v': c = 0xb; break;

                        case 'u': {
                            /*
                             * Get 4 hex digits; if the u escape is not
                             * followed by 4 hex digits, use 'u' + the literal
                             * character sequence that follows.
                             */
                            int escapeStart = stringBufferTop;
                            addToString('u');
                            int escapeVal = 0;
                            for (int i = 0; i != 4; ++i) {
                                c = in.read();
                                escapeVal = (escapeVal << 4) | xDigitToInt(c);
                                if (escapeVal < 0) {
                                    continue strLoop;
                                }
                                addToString(c);
                            }
                            // prepare for replace of stored 'u' sequence
                            // by escape value
                            stringBufferTop = escapeStart;
                            c = escapeVal;
                        } break;

                        case 'x': {
                            /* Get 2 hex digits, defaulting to 'x' + literal
                             * sequence, as above.
                             */
                            c = in.read();
                            int escapeVal = xDigitToInt(c);
                            if (escapeVal < 0) {
                                addToString('x');
                                continue strLoop;
                            } else {
                                int c1 = c;
                                c = in.read();
                                escapeVal = (escapeVal << 4) | xDigitToInt(c);
                                if (escapeVal < 0) {
                                    addToString('x');
                                    addToString(c1);
                                    continue strLoop;
                                } else {
                                    // got 2 hex digits
                                    c = escapeVal;
                                }
                            }
                        } break;

                        case '\n':
                            // Remove line terminator
                            c = in.read();
                            continue strLoop;

                        default: if ('0' <= c && c < '8') {
                            val = c - '0';
                            c = in.read();
                            if ('0' <= c && c < '8') {
                                val = 8 * val + c - '0';
                                c = in.read();
                                if ('0' <= c && c < '8' && val <= 037) {
                                    // c is 3rd char of octal sequence only if
                                    // the resulting val <= 0377
                                    val = 8 * val + c - '0';
                                    c = in.read();
                                }
                            }
                            in.unread();
                            c = val;
                        }
                    }
                }
                addToString(c);
                c = in.read();
            }

            this.string = getStringFromBuffer();
            return STRING;
        }

        switch (c)
        {
        case '\n': return EOL;
        case ';': return SEMI;
        case '[': return LB;
        case ']': return RB;
        case '{': return LC;
        case '}': return RC;
        case '(': return LP;
        case ')': return GWT;
        case ',': return COMMA;
        case '?': return HOOK;
        case ':': return COLON;
        case '.': return DOT;

        case '|':
            if (in.match('|')) {
                return OR;
            } else if (in.match('=')) {
                this.op = BITOR;
                return ASSIGN;
            } else {
                return BITOR;
            }

        case '^':
            if (in.match('=')) {
                this.op = BITXOR;
                return ASSIGN;
            } else {
                return BITXOR;
            }

        case '&':
            if (in.match('&')) {
                return AND;
            } else if (in.match('=')) {
                this.op = BITAND;
                return ASSIGN;
            } else {
                return BITAND;
            }

        case '=':
            if (in.match('=')) {
                if (in.match('='))
                    this.op = SHEQ;
                else
                    this.op = EQ;
                return EQOP;
            } else {
                this.op = NOP;
                return ASSIGN;
            }

        case '!':
            if (in.match('=')) {
                if (in.match('='))
                    this.op = SHNE;
                else
                    this.op = NE;
                return EQOP;
            } else {
                this.op = NOT;
                return UNARYOP;
            }

        case '<':
            /* NB:treat HTML begin-comment as comment-till-eol */
            if (in.match('!')) {
                if (in.match('-')) {
                    if (in.match('-')) {
                        skipLine();
                        return RETRY_TOKEN;  // in place of 'goto retry'
                    }
                    in.unread();
                }
                in.unread();
            }
            if (in.match('<')) {
                if (in.match('=')) {
                    this.op = LSH;
                    return ASSIGN;
                } else {
                    this.op = LSH;
                    return SHOP;
                }
            } else {
                if (in.match('=')) {
                    this.op = LE;
                    return RELOP;
                } else {
                    this.op = LT;
                    return RELOP;
                }
            }

        case '>':
            if (in.match('>')) {
                if (in.match('>')) {
                    if (in.match('=')) {
                        this.op = URSH;
                        return ASSIGN;
                    } else {
                        this.op = URSH;
                        return SHOP;
                    }
                } else {
                    if (in.match('=')) {
                        this.op = RSH;
                        return ASSIGN;
                    } else {
                        this.op = RSH;
                        return SHOP;
                    }
                }
            } else {
                if (in.match('=')) {
                    this.op = GE;
                    return RELOP;
                } else {
                    this.op = GT;
                    return RELOP;
                }
            }

        case '*':
            if (in.match('=')) {
                this.op = MUL;
                return ASSIGN;
            } else {
                return MUL;
            }

        case '/':
            // is it a // comment?
            if (in.match('/')) {
                skipLine();
                return RETRY_TOKEN;
            }
            if (in.match('*')) {
                while ((c = in.read()) != -1 &&
                       !(c == '*' && in.match('/'))) {
                    ; // empty loop body
                }
                if (c == EOF_CHAR) {
                    reportTokenError("msg.unterminated.comment", null);
                    return ERROR;
                }
                return RETRY_TOKEN;  // `goto retry'
            }

            // is it a regexp?
            if ((flags & TSF_REGEXP) != 0) {
                stringBufferTop = 0;
                while ((c = in.read()) != '/') {
                    if (c == '\n' || c == EOF_CHAR) {
                        in.unread();
                        reportTokenError("msg.unterminated.re.lit", null);
                        return ERROR;
                    }
                    if (c == '\\') {
                        addToString(c);
                        c = in.read();
                    }

                    addToString(c);
                }
                int reEnd = stringBufferTop;

                while (true) {
                    if (in.match('g'))
                        addToString('g');
                    else if (in.match('i'))
                        addToString('i');
                    else if (in.match('m'))
                        addToString('m');
                    else
                        break;
                }

                if (isAlpha(in.peek())) {
                    reportTokenError("msg.invalid.re.flag", null);
                    return ERROR;
                }

                this.string = new String(stringBuffer, 0, reEnd);
                this.regExpFlags = new String(stringBuffer, reEnd,
                                              stringBufferTop - reEnd);
                return REGEXP;
            }


            if (in.match('=')) {
                this.op = DIV;
                return ASSIGN;
            } else {
                return DIV;
            }

        case '%':
            this.op = MOD;
            if (in.match('=')) {
                return ASSIGN;
            } else {
                return MOD;
            }

        case '~':
            this.op = BITNOT;
            return UNARYOP;

        case '+':
            if (in.match('=')) {
                this.op = ADD;
                return ASSIGN;
            } else if (in.match('+')) {
                return INC;
            } else {
                return ADD;
            }

        case '-':
            if (in.match('=')) {
                this.op = SUB;
                c = ASSIGN;
            } else if (in.match('-')) {
                if (0 == (flags & TSF_DIRTYLINE)) {
                    // treat HTML end-comment after possible whitespace
                    // after line start as comment-utill-eol
                    if (in.match('>')) {
                        skipLine();
                        return RETRY_TOKEN;
                    }
                }
                c = DEC;
            } else {
                c = SUB;
            }
            flags |= TSF_DIRTYLINE;
            return c;

        default:
            reportTokenError("msg.illegal.character", null);
            return ERROR;
        }
    }

    private void skipWhitespace() throws IOException {
      int tmp;
      do {
        tmp = in.read();
      } while (isJSSpace(tmp) || tmp == '\n');
      // Reposition back to first non whitespace char.
      in.unread();
    }

    private int jsniMatchReference() throws IOException {

      // First, read the type name whose member is being accessed. 
      if (!jsniMatchQualifiedTypeName('.', ':')) {
        return ERROR;
      }

      // Now we must the second colon.
      //
      int c = in.read();
      if (c != ':') {
          in.unread();
          reportTokenError("msg.jsni.expected.char", new String[] {":"});
          return ERROR;
      }
      addToString(c);

      // Skip whitespace starting after ::.
      skipWhitespace();

      // Finish by reading the field or method signature.
      if (!jsniMatchMethodSignatureOrFieldName()) {
        return ERROR;
      }

      this.string = new String(stringBuffer, 0, stringBufferTop);
      return NAME;
    }

    private boolean jsniMatchParamListSignature() throws IOException {
      // Assume the opening '(' has already been read.
      // Read param type signatures until we see a closing ')'.

      skipWhitespace();

      // First check for the special case of * as the parameter list, indicating
      // a wildcard
      if (in.peek() == '*') {
        addToString(in.read());
        if (in.peek() != ')') {
          reportTokenError("msg.jsni.expected.char", new String[] {")"});
        }
        addToString(in.read());
        return true;
      }

      // Otherwise, loop through reading one param type at a time
      do {
        // Skip whitespace between parameters.
        skipWhitespace();

        int c = in.read();

        if (c == ')') {
          // Finished successfully.
          //
          addToString(c);
          return true;
        }

        in.unread();
      } while (jsniMatchParamTypeSignature());

      // If we made it here, we can assume that there was an invalid type
      // signature that was already reported and that the offending char
      // was already unread.
      //
      return false;
    }

    private boolean jsniMatchParamTypeSignature() throws IOException {
      int c = in.read();
      switch (c) {
        case 'Z':
        case 'B':
        case 'C':
        case 'S':
        case 'I':
        case 'J':
        case 'F':
        case 'D':
          // Primitive type id.
          addToString(c);
          return true;
        case 'L':
          // Class/Interface type prefix.
          addToString(c);
          return jsniMatchQualifiedTypeName('/', ';');
        case '[':
          // Array type prefix.
          addToString(c);
          return jsniMatchParamArrayTypeSignature();
        default:
          in.unread();
          reportTokenError("msg.jsni.expected.param.type", null);
          return false;
      }
    }

    private boolean jsniMatchParamArrayTypeSignature() throws IOException {
      // Assume the leading '[' has already been read.
      // What follows must be another param type signature.
      //
      return jsniMatchParamTypeSignature();
    }

    private boolean jsniMatchMethodSignatureOrFieldName() throws IOException {
      int c = in.read();


      // We must see an ident start here.
      //
      if (!Character.isJavaIdentifierStart((char)c)) {
        in.unread();
        reportTokenError("msg.jsni.expected.identifier", null);
        return false;
      }
      
      addToString(c);
      
      for (;;) {
        c = in.read();
        if (Character.isJavaIdentifierPart((char)c)) {
          addToString(c);
        }
        else if (c == '(') {
          // This means we're starting a JSNI method signature.
          //
          addToString(c);
          if (jsniMatchParamListSignature()) {
            // Finished a method signature with success.
            // Assume the callee unread the last char.
            //
            return true;
          }
          else {
            // Assume the callee reported the error and unread the last char.
            //
            return false;
          }
        }
        else {
          // We don't know this char, so it finishes the token.
          //
          in.unread();
          return true;
        }
      }
    }

    /**
     * This method is called to match the fully-qualified type name that
     * should appear after the '@' in a JSNI reference.
     * @param sepChar the character that will separate the Java idents
     *        (either a '.' or '/')
     * @param endChar the character that indicates the end of the 
     */
    private boolean jsniMatchQualifiedTypeName(char sepChar, char endChar) 
        throws IOException {
      int c = in.read();

      // Whether nested or not, we must see an ident start here.
      //
      if (!Character.isJavaIdentifierStart((char)c)) {
        in.unread();
        reportTokenError("msg.jsni.expected.identifier", null);
        return false;
      }
      
      // Now actually add the first ident char.
      //
      addToString(c);

      // And append any other ident chars.
      //
      for (;;) {
        c = in.read();
        if (Character.isJavaIdentifierPart((char)c)) {
          addToString(c);
        }
        else {
          break;
        }
      }
      
      // Arrray-type reference
      while (c == '[') {
        if (']' == in.peek()) {
          addToString('[');
          addToString(in.read());
          c = in.read();
        } else {
          break;
        }
      }

      // We have a non-ident char to classify.
      //
      if (c == sepChar) {
        addToString(c);
        if (jsniMatchQualifiedTypeName(sepChar, endChar)) {
          // We consumed up to the endChar, so we finished with total success.
          //
          return true;
        } else {
          // Assume that the nested call reported the syntax error and
          // unread the last character.
          //
          return false;
        }
      } else if (c == endChar) {
        // Matched everything up to the specified end char.
        //
        addToString(c);
        return true;
      } else {
        // This is an unknown char that finishes the token.
        //
        in.unread();
        return true;
      }
    }
    
    private String getStringFromBuffer() {
        return new String(stringBuffer, 0, stringBufferTop);
    }

    private void addToString(int c) {
        if (stringBufferTop == stringBuffer.length) {
            char[] tmp = new char[stringBuffer.length * 2];
            System.arraycopy(stringBuffer, 0, tmp, 0, stringBufferTop);
            stringBuffer = tmp;
        }
        stringBuffer[stringBufferTop++] = (char)c;
    }

    /**
     * Positions hold offset of an corresponding token's end.
     * So lastPosition holds an offset of char that is next to last token.
     *
     * Use secondToLastPosition for error reporting outside of TokenStream, because
     * usually we want to report beginning of erroneous token,
     * which is end of second to last read token.
     */
    public void reportSyntaxError(String messageProperty, Object[] args) {
        String message = Context.getMessage(messageProperty, args);
        Context.reportError(message, secondToLastPosition, lastPosition);
    }

    /**
     * Token errors are reported before tokes is read,
     * so use lastPosition for reporting.
     * @see #reportSyntaxError
     */
    private void reportTokenError(String messageProperty, Object[] args) {
        String message = Context.getMessage(messageProperty, args);
        Context.reportError(message, lastPosition, new CodePosition(getLineno(), getOffset()));
    }

    private void reportTokenWarning(String messageProperty, Object[] args) {
        String message = Context.getMessage(messageProperty, args);
        Context.reportWarning(message, lastPosition, new CodePosition(getLineno(), getOffset()));
    }

    /**
     * Updates last two known positions (for error reporting).
     */
    private void updatePosition() {
        CodePosition currentPosition = new CodePosition(getLineno(), getOffset());
        if (currentPosition.compareTo(lastPosition) > 0) {
            secondToLastPosition = lastPosition;
            lastPosition = currentPosition;
        }
    }

    public String getSourceName() { return sourceName; }
    public int getLineno() { return in.getLineno(); }
    public int getOp() { return op; }
    public String getString() { return string; }
    public double getNumber() { return number; }
    public String getLine() { return in.getLine(); }
    public int getOffset() { return in.getOffset(); }
    public int getTokenno() { return tokenno; }
    public boolean eof() { return in.eof(); }

    // instance variables
    private LineBuffer in;


    /* for TSF_REGEXP, etc.
     * should this be manipulated by gettor/settor functions?
     * should it be passed to getToken();
     */
    int flags;
    String regExpFlags;

    private String sourceName;
    private int pushbackToken;
    private int tokenno;

    CodePosition secondToLastPosition;
    CodePosition lastPosition;

    private int op;

    // Set this to an inital non-null value so that the Parser has
    // something to retrieve even if an error has occured and no
    // string is found.  Fosters one class of error, but saves lots of
    // code.
    private String string = "";
    private double number;

    private char[] stringBuffer = new char[128];
    private int stringBufferTop;
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy