All Downloads are FREE. Search and download functionalities are using the official Maven repository.

opu.lljvm-python.0.2.0-EXPERIMENTAL.source-code.Scanner Maven / Gradle / Ivy

The newest version!
/* --- Copyright Jonathan Meyer 1996. All rights reserved. -----------------
 > File:        jasmin/src/jasmin/Scanner.java
 > Purpose:     Tokenizer for Jasmin
 > Author:      Jonathan Meyer, 10 July 1996
 */

/* Scanner.java - class for tokenizing Jasmin files. This is rather
 * cheap and cheerful.
*/

package jasmin;

import jas.*;
import java_cup.runtime.*;
import java.util.*;
import java.io.Reader;

class Scanner {
  Reader inp;

    // single lookahead character
    int next_char;

    // temporary buffer
    char chars[];
    static private int chars_size = 512;

    // Whitespace characters
    static final String WHITESPACE = " \n\t\r";

    // Separator characters
    static final String SEPARATORS = WHITESPACE + ":=";

    /*
    // Character can be present in signature
    static final String SIGCHARS = ";:()[/.^*+-<>@";
    */

    // used for error reporting to print out where an error is on the line
    public int line_num, token_line_num, char_num, int_char_num, int_line_num;
    public StringBuffer line;
    public String int_line;

    // used by the .set directive to define new variables.
    public Hashtable dict = new Hashtable();

    //
    // returns true if a character code is a whitespace character
    //
    protected static boolean whitespace(int c) {
        return (WHITESPACE.indexOf(c) != -1);
    }

    //
    // returns true if a character code is a separator character
    //
    protected static boolean separator(int c) {
        return (c == -1 || SEPARATORS.indexOf(c) != -1);
    }


    //
    // Advanced the input by one character
    //
    protected void advance() throws java.io.IOException
    {
        next_char = inp.read();
        switch (next_char) {
        case -1:  // EOF
            if (char_num == 0) {
                char_num = -1;
                break;
            }
            next_char = '\n';
            // pass thru
        case '\n': // a new line
            line_num++;
            char_num = 0;
            break;
        default:
            line.append((char)next_char);
            char_num++;
            return;
        }
        line.setLength(0);
    }

    //
    // initialize the scanner
    //
    public Scanner(Reader i) throws java.io.IOException, jasError
    {
        inp = i;
        line_num = 1;
        char_num = 0;
        line = new StringBuffer();
        chars = new char[chars_size];
        next_char = 0;  // no start comment
        skip_empty_lines();
        if ( next_char == -1 )
            throw new jasError("empty source file");
    }

    private void chars_expand()
    {
        char temp[] = new char[chars_size * 2];
        System.arraycopy(chars, 0, temp, 0, chars_size);
        chars_size *= 2;
        chars = temp;
    }

    private void skip_empty_lines() throws java.io.IOException
    {
        for (;;) {
            if (next_char != ';') {
                do { advance(); } while (whitespace(next_char));
                if (next_char != ';')
                    return;
            }
            do {
                advance();
                if (next_char == -1)
                   return;
            } while (next_char != '\n');
        }
    }

    private char uniEscape()
                throws java.io.IOException, jasError
    {
        int res = 0;
        for(int i = 0; i < 4; i++) {
            advance();
            if(next_char == -1)
                return 0;

            int tmp = Character.digit((char)next_char, 16);
            if (tmp == -1)
                throw new jasError("Bad '\\u' escape sequence");
            res = (res << 4) | tmp;
        }
        return (char)res;
    }

    private char nameEscape()
                throws java.io.IOException, jasError
    {
        advance();
        if (next_char != 'u')
            throw new jasError("Only '\\u' escape sequence allowed in names");
        char chval = uniEscape();
        if (next_char == -1)
            throw new jasError("Left over '\\u' escape sequence");
        /*
        if (   SIGCHARS.indexOf(chval) == -1
            && (   !Character.isJavaIdentifierPart(chval)
                || Character.isIdentifierIgnorable(chval)))
        {
            throw new jasError("Invalid unicode char from name/signature");
        }
        */
        return chval;
    }

    //
    // recognize and return the next complete token
    //
    public token next_token()
                throws java.io.IOException, jasError
    {
        token_line_num = line_num;

        for (;;) switch (next_char) {
            case ';':  // a comment
            case '\n':
                // return single SEP token (skip multiple newlines
                // interspersed with whitespace or comments)
                skip_empty_lines();
                token_line_num = line_num;
                return new token(sym.SEP);

            case -1:                // EOF token
                char_num = -1;
                return new token(sym.EOF);

            case '-': case '+':
            case '0': case '1': case '2': case '3': case '4':
            case '5': case '6': case '7': case '8': case '9':
            case '.':                       // a number
            {
                int pos = 0;
                do {
                    chars[pos] = (char)next_char;
                    pos++;
                    if(pos == chars_size) chars_expand();
                    advance();
                }while(!separator(next_char));

                String str = new String(chars, 0, pos);
                token tok;

                // This catches directives like ".method"
                if ((tok = ReservedWords.get(str)) != null)
                    return tok;

                Number num;
                try {
                    num = ScannerUtils.convertNumber(str);
                } catch (NumberFormatException e) {
                    if (chars[0] != '.')
                        throw new jasError("Badly formatted number");
                    throw new jasError("Unknown directive or badly formed number.");
                }

                if (num instanceof Integer) {
                    int_line     = line.toString();
                    int_line_num = token_line_num;
                    int_char_num = char_num;
                    return new int_token(sym.Int, num.intValue());
                }
                return new num_token(sym.Num, num);
            }

            case '"':   // quoted string
            {
                boolean already = false;
                for (int pos = 0; ; ) {
                    if (already) already = false;
                    else advance();

                    if (next_char == '"') {
                        advance(); // skip close quote
                        return new str_token(sym.Str, new String(chars, 0, pos));
                    }

                    if(next_char == -1)
                        throw new jasError("Unterminated string");

                    char chval = (char)next_char;

                    if (chval == '\\') {
                        advance();
                        switch (next_char) {
                        case -1: already = true; continue;
                        case 'n':   chval = '\n'; break;
                        case 'r':   chval = '\r'; break;
                        case 't':   chval = '\t'; break;
                        case 'f':   chval = '\f'; break;
                        case 'b':   chval = '\b'; break;
                        case '"' :  chval = '"';  break;
                        case '\'' : chval = '\''; break;
                        case '\\' : chval = '\\'; break;

                        case 'u':
                            chval = uniEscape();
                            if(next_char == -1) {
                                already = true;
                                continue;
                            }
                            break;

                        case '0': case '1': case '2': case '3':
                        case '4': case '5': case '6': case '7':
                        {
                            int res = next_char&7;
                            advance();
                            if (next_char < '0' || next_char > '7')
                                already = true;
                            else {
                                res = res*8 + (next_char&7);
                                advance();
                                if (next_char < '0' || next_char > '7')
                                    already = true;
                                else {
                                    int val = res*8 + (next_char&7);
                                    if (val >= 0x100)
                                        already = true;
                                    else
                                        res = val;
                                }
                            }
                            chval = (char)res;
                        }
                        break;

                        default:
                            throw new jasError("Bad backslash escape sequence");
                        }
                    }
                    chars[pos] = chval;
                    pos++;
                    if(pos == chars_size) chars_expand();
                }
            }

            case '\'':  // quotation for overloading reserved words
                for (int pos = 0; ; ) {
                    advance();
                    if (separator(next_char))
                        throw new jasError("Unterminated ''-enclosed name");
                    if (next_char == '\'') {
                        if (pos == 0)
                            throw new jasError("Empty ''-enclosed name");
                        advance(); // skip close quote
                        if (!separator(next_char))
                            throw new jasError("Not separator after ''-enclosed name");
                        return new str_token(sym.Word, new String(chars, 0, pos));
                    }
                    char chval = (char)next_char;
                    if (next_char == '\\')
                        chval = nameEscape();
                    chars[pos] = chval;
                    pos++;
                    if(pos == chars_size) chars_expand();
                }

            case ' ':
            case '\t':
            case '\r':              // whitespace
                advance();
                break;

            case '=':               // EQUALS token
                advance();
                return new token(sym.EQ);

            case ':':               // COLON token
                advance();
                return new token(sym.COLON);

            default:
            {
                // read up until a separatorcharacter
               int pos = 0;
               boolean only_name = false;

               do {
                  char chval = (char)next_char;
                  if (next_char == '\\') {
                      chval = nameEscape();
                      only_name = true;
                  }
                  chars[pos] = chval;
                  pos++;
                  if(pos == chars_size) chars_expand();
                  advance();
                }while(!separator(next_char));
                // convert the byte array into a String
                String str = new String(chars, 0, pos);

                if (!only_name) {
                    token tok;

                    // Jasmin keyword or directive ?
                    if ((tok = ReservedWords.get(str)) != null)
                        return tok;

                    // its a JVM instruction ?
                    if (InsnInfo.contains(str))
                        return new str_token(sym.Insn, str);

                    if (str.charAt(0) == '$') {
                        String s = str.substring(1);
                        Object v;
                        int n = 10;
                        boolean neg = false;
                        boolean sign = false;
                        switch(s.charAt(0)) {
                        default:
                            break;

                        case '-':
                            neg = true;;
                        case '+':
                            s = s.substring(1);
                            if (s.startsWith("0x")) {
                                n = 16;
                                s = s.substring(2);
                            }
                            try {
                                n = Integer.parseInt(s, n);
                            } catch (NumberFormatException e) {
                                throw new jasError("Badly relative offset number");
                            }
                            if(neg) n = -n;
                                return new relative_num_token(sym.Relative, n);
                        }
                        // Perform variable substitution
                        if ((v = dict.get(s)) != null)
                            return (token)v;
                    } // not begin from '$'
                } // !only_name
                // Unrecognized string token (e.g. a classname)
                return new str_token(sym.Word, str);
            } /* default */
        } /* switch and for */
    }

};

/* --- Revision History ---------------------------------------------------
--- Iouri Kharon, Mar 13 2006
    Added support for '\\u' escape sequnce in name/signature
    Added '' enclosed names (overload of reserved words)
--- Iouri Kharon, Feb 17 2006
    Remove infinite loop when last line in source file do not have EOL
--- Iouri Kharon, Dec 19 2005
    Added '\\u' escape sequence
    Change '\octal' escape sequence
    Added very long string support
--- Daniel Reynaud, Oct 19 2005
    Added '\\' escape sequence
--- Jonathan Meyer, Feb 8 1997
    Converted to be non-static
--- Jonathan Meyer, Oct 30 1996
    Added support for more \ escapes in quoted strings (including octals).
--- Jonathan Meyer, Oct 1 1996
    Added .interface and .implements
--- Jonathan Meyer, July 25 1996
    changed IN to IS. Added token_line_num, which is the line number of the
    last token returned by next_token().
--- Jonathan Meyer, July 24 1996 added mods to recognize '\r' as whitespace.
*/




© 2015 - 2024 Weber Informatics LLC | Privacy Policy