
com.tangosol.dev.compiler.java.Tokenizer Maven / Gradle / Ivy
/*
* Copyright (c) 2000, 2020, Oracle and/or its affiliates.
*
* Licensed under the Universal Permissive License v 1.0 as shown at
* http://oss.oracle.com/licenses/upl.
*/
package com.tangosol.dev.compiler.java;
import com.tangosol.dev.compiler.CompilerException;
import com.tangosol.dev.compiler.CompilerErrorInfo;
import com.tangosol.dev.compiler.ParsePosition;
import com.tangosol.dev.compiler.Script;
import com.tangosol.util.Base;
import com.tangosol.util.Dequeue;
import com.tangosol.util.ErrorList;
import com.tangosol.util.HashHelper;
import java.io.EOFException;
import java.io.IOException;
import java.util.Hashtable;
import java.util.NoSuchElementException;
import java.util.Properties;
import java.lang.Character;
/**
* A unicode script tokenizer for the java language, originally inspired by
* the java.util package's StringTokenizer class.
*
* TODO add support for comment parsing
*
* @version 1.00, 11/21/96
* @author Cameron Purdy
*/
public class Tokenizer
extends Base
implements com.tangosol.dev.compiler.Tokenizer, Constants
{
// ----- constructors ---------------------------------------------------
/**
* (Default) Constructs a Tokenizer.
*/
public Tokenizer()
{
initOptions(null);
}
/**
* Constructs a Tokenizer.
*
* @param s the string which contains the Java script to tokenize
* @param iLine the beginning line number
* @param errlist the error list which the tokenizer will log to
*
* @exception CompilerException If the script tokenizer encounters an
* invalid unicode escape sequence
*/
public Tokenizer(String s, int iLine, ErrorList errlist)
throws CompilerException
{
this(s, iLine, errlist, null);
}
/**
* Constructs a Tokenizer.
*
* @param s the string which contains the Java script to tokenize
* @param errlist the error list which the tokenizer will log to
* @param propOpts a java.util.Properties object containing options
*
* @exception CompilerException If the script tokenizer encounters an
* invalid unicode escape sequence
*/
public Tokenizer(String s, ErrorList errlist, Properties propOpts)
throws CompilerException
{
this(s, 0, errlist, propOpts);
}
/**
* Constructs a Tokenizer.
*
* @param s the string which contains the Java script to tokenize
* @param iLine the beginning line number
* @param errlist the error list which the tokenizer will log to
* @param propOpts a java.util.Properties object containing options
*
* @exception CompilerException If the script tokenizer encounters an
* invalid unicode escape sequence
*/
public Tokenizer(String s, int iLine, ErrorList errlist, Properties propOpts)
throws CompilerException
{
initOptions(propOpts);
setScript(new UnicodeScript(s, iLine), errlist);
}
// ----- Tokenizer options ----------------------------------------------
/**
* Specify the tokenizer options.
*
* @param propOpts a java.util.Properties object containing options
*/
private void initOptions(Properties propOpts)
{
// default option values (for backwards compatibility)
m_fExtensionsEnabled = true;
m_fCommentsEnabled = false;
if (propOpts != null)
{
m_fExtensionsEnabled = extractBooleanOption(propOpts, OPT_PARSE_EXTENSIONS, m_fExtensionsEnabled);
m_fCommentsEnabled = extractBooleanOption(propOpts, OPT_PARSE_COMMENTS , m_fCommentsEnabled );
}
}
/**
* Extract a boolean option value from a Properties object.
*
* @param propOpts the java.util.Properties object
* @param sName the option property name
* @param fDefault the default option value
*
* @return true or false if the value is specified, otherwise the value of
* the fDefault parameter
*/
private static boolean extractBooleanOption(Properties propOpts, String sName, boolean fDefault)
{
String sValue = propOpts.getProperty(sName);
if (sValue != null && sValue.length() > 0)
{
switch (sValue.trim().charAt(0))
{
case 'T': // TRUE or True
case 't': // true
case 'Y': // YES or Yes
case 'y': // yes or y
case '1': // integer representation of true
return true;
case 'F': // FALSE or False
case 'f': // false
case 'N': // NO or No
case 'n': // no
case '0': // integer representation of false
return false;
}
}
return fDefault;
}
/**
* Determine whether the tokenizer will parse proprietary extensions, such
* as the "as" keyword.
*
* @return true if the tokenizer will parse proprietary extensions
*/
public boolean isExtensionsEnabled()
{
return m_fExtensionsEnabled;
}
/**
* Specify whether the tokenizer will parse proprietary extensions, such
* as the "as" keyword.
*
* @param f pass true to cause the tokenizer to parse proprietary
* extensions or pass false to follow the JLS strictly
*/
public void setExtensionsEnabled(boolean f)
{
m_fExtensionsEnabled = f;
}
/**
* Determine whether the tokenizer will parse comments as tokens or
* simply discard them.
*
* @return true if the tokenizer will return comments as tokens
*/
public boolean isCommentsEnabled()
{
return m_fCommentsEnabled;
}
/**
* Specify whether the tokenizer will parse comments as tokens or
* simply discard them.
*
* @param f pass true to cause the tokenizer to return comments as tokens
* or false to cause the tokenizer to discard comments
*/
public void setCommentsEnabled(boolean f)
{
m_fCommentsEnabled = f;
}
// ----- Toker interface ------------------------------------------------
/**
* Initializes the tokenizer. This method must be called exactly
* one time to initialize the tokenizer object.
*
* @param script the script to tokenize
* @param errlist the list to log errors to
*
* @exception NoSuchElementException If the tokens are exhausted
* @exception CompilerException If a lexical error occurs that should stop
* the compilation process
*/
public void setScript(Script script, ErrorList errlist)
throws CompilerException
{
// assert: this method must be called only one time
if (m_script != null)
{
throw new IllegalStateException();
}
m_errlist = errlist;
m_script = script;
m_tblNames = new Hashtable();
m_dq = new Dequeue();
// eat up to the next token
eatFluff();
}
/**
* Checks for more tokens in the script.
*
* @return true if tokenizing of the script is incomplete.
*/
public boolean hasMoreTokens()
{
// first check for any tokens that were "put back"
if (m_dq.hasMoreElements())
{
return true;
}
// check if there is more to parse
return m_script.hasMoreChars();
}
/**
* Eats and returns the next token from the script.
*
* @return the next token
*
* @exception NoSuchElementException If the tokens are exhausted
* @exception CompilerException If a lexical error occurs that should stop
* the compilation process
*/
public com.tangosol.dev.compiler.Token nextToken()
throws CompilerException
{
Token tok;
if (m_dq.hasMoreElements())
{
tok = (Token) m_dq.nextElement();
}
else if (m_script.hasMoreChars())
{
tok = eatToken();
eatFluff();
}
else
{
throw new NoSuchElementException();
}
return tok;
}
/**
* Regurgitates the last eaten token so that the next call to nextToken
* will return the same token that was returned by the most recent call
* to nextToken. (This method can be called more than once to regurgitate
* multiple tokens.)
*
* @exception NoSuchElementException an attempt to back up past the
* beginning of the script -or- the dequeue was not constructed
* large enough to hold the number of entries that have been
* put back.
*/
public void putBackToken(com.tangosol.dev.compiler.Token tok)
{
m_dq.putBackElement(tok);
}
/**
* Returns an object that can be used to restore the current position
* in the script. This method is similar to the mark method of the
* Java stream classes, but by returning an object that identifies the
* position, multiple positions can be saved and later returned to.
*
* @return an object which identifies the current position within the
* script
*/
public ParsePosition savePosition()
{
Position pos = new Position();
pos.toker = this;
pos.dq = (Dequeue) m_dq.clone();
pos.scriptpos = m_script.savePosition();
return pos;
}
/**
* Restores the current parsing position that was returned from
* the savePosition method.
*/
public void restorePosition(ParsePosition parsepos)
{
Position pos = (Position) parsepos;
if (pos.toker != this)
{
throw new IllegalArgumentException("Unknown ParsePosition object!");
}
m_script.restorePosition(pos.scriptpos);
m_dq = pos.dq;
}
/**
* Get current line number.
*
* @return the line number of the next character in the script
*/
public int getLine()
{
return m_script.getLine();
}
// ----- script parsing -------------------------------------------------
/**
* Eats whitespace and comments until the next java token is encountered
* or the end of the string is reached, whichever comes first.
*
* @exception CompilerException Thrown for unexpected end-of file, which
* can be caused by a SUB character encountered before the end
* of the script, by a non-terminated multi-line comment, etc.
*/
protected void eatFluff()
throws CompilerException
{
try
{
while (m_script.hasMoreChars())
{
char ch = m_script.nextChar();
switch (ch)
{
// ignore LineTerminator (3.4)
case '\n': // new line
break;
// ignore Sub (3.5)
case Script.SUB: // end-of-file
break;
// ignore WhiteSpace (3.6)
case '\t': // h-tab
case '\f': // v-tab
case '\r': // carriage return
case ' ': // space
break;
// ignore Comments (3.7)
case '/':
{
ch = m_script.nextChar();
if (ch == '*')
{
eatMultiLineComment();
}
else if (ch == '/')
{
eatSingleLineComment();
}
else
{
m_script.putBackChar();
m_script.putBackChar();
return;
}
}
break;
// a token has been encountered
default:
m_script.putBackChar();
return;
}
}
}
catch (EOFException e)
{
logError(ERROR, ERR_UNEXPECTED_EOF, null,
m_script.getLine(), m_script.getOffset(), 0);
throw new CompilerException();
}
catch (UnicodeDataFormatException e)
{
logError(ERROR, ERR_UNICODE_ESCAPE, null,
m_script.getLine(), m_script.getOffset(), 0);
throw new CompilerException();
}
catch (IOException e)
{
logError(ERROR, ERR_UNEXPECTED_IO, new String[] {e.toString()},
m_script.getLine(), m_script.getOffset(), 0);
throw new CompilerException();
}
}
/**
* Eats everything until the end of the multi-line comment is found
* or the end of the string is reached, whichever comes first. Note
* that the opening portion of the comment ('/' + '*') has already
* been eaten.
*
* @exception EOFException as thrown by the script object
*/
protected void eatMultiLineComment()
throws IOException
{
while (true)
{
if (m_script.nextChar() == '*')
{
if (m_script.nextChar() == '/')
{
return;
}
m_script.putBackChar();
}
}
}
/**
* Eats everything until an end-of-line character is encountered or
* the end of the string is reached, whichever comes first. Note
* that the opening portion of the comment ('/' + '/') has already
* been eaten.
*
* @exception IOException as thrown by the script object
*/
protected void eatSingleLineComment()
throws IOException
{
while (m_script.hasMoreChars())
{
char ch = m_script.nextChar();
if (ch == '\r' || ch == '\n')
{
m_script.putBackChar();
return;
}
}
}
/**
* Eats the largest token possible, returning a Token interface which
* describes the token.
*
* @exception CompilerException If a lexical error is encountered which
* should stop compilation, like an unexpected end-of-file
*/
protected Token eatToken()
throws CompilerException
{
try
{
while (true)
{
int iLine = m_script.getLine();
int ofInLine = m_script.getOffset();
char ch = m_script.nextChar();
switch(ch)
{
// Separator (3.11) - the easiest tokens to parse
case '(':
return new Token(Token.TOK_LPARENTHESIS, iLine, ofInLine, m_script.getOffset() - ofInLine);
case ')':
return new Token(Token.TOK_RPARENTHESIS, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '{':
return new Token(Token.TOK_LCURLYBRACE, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '}':
return new Token(Token.TOK_RCURLYBRACE, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '[':
return new Token(Token.TOK_LBRACKET, iLine, ofInLine, m_script.getOffset() - ofInLine);
case ']':
return new Token(Token.TOK_RBRACKET, iLine, ofInLine, m_script.getOffset() - ofInLine);
case ';':
return new Token(Token.TOK_SEMICOLON, iLine, ofInLine, m_script.getOffset() - ofInLine);
case ',':
return new Token(Token.TOK_COMMA, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '.':
{
// test for '.' as the start of a FloatingPointLiteral (3.10.2)
boolean fNum = isDecimal(m_script.nextChar());
m_script.putBackChar();
// if it is a number, then put back the '.' and parse
if (fNum)
{
m_script.putBackChar();
return eatFloatingPointLiteral();
}
}
return new Token(Token.TOK_DOT, iLine, ofInLine, m_script.getOffset() - ofInLine);
// Operator (3.12) - the second easiest tokens to parse
case '~':
return new Token(Token.TOK_BITNOT, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '?':
return new Token(Token.TOK_CONDITIONAL, iLine, ofInLine, m_script.getOffset() - ofInLine);
case ':':
return new Token(Token.TOK_COLON, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '+':
switch(m_script.nextChar())
{
case '+':
return new Token(Token.TOK_INCREMENT, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '=':
return new Token(Token.TOK_ASSIGN_ADD, iLine, ofInLine, m_script.getOffset() - ofInLine);
default:
m_script.putBackChar();
return new Token(Token.TOK_ADD, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
case '-':
switch(m_script.nextChar())
{
case '-':
return new Token(Token.TOK_DECREMENT, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '=':
return new Token(Token.TOK_ASSIGN_SUB, iLine, ofInLine, m_script.getOffset() - ofInLine);
default:
m_script.putBackChar();
return new Token(Token.TOK_SUB, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
case '*':
if (m_script.nextChar() == '=')
{
return new Token(Token.TOK_ASSIGN_MUL, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
m_script.putBackChar();
return new Token(Token.TOK_MUL, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '/':
if (m_script.nextChar() == '=')
{
return new Token(Token.TOK_ASSIGN_DIV, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
m_script.putBackChar();
return new Token(Token.TOK_DIV, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '%':
if (m_script.nextChar() == '=')
{
return new Token(Token.TOK_ASSIGN_REM, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
m_script.putBackChar();
return new Token(Token.TOK_REM, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '^':
if (m_script.nextChar() == '=')
{
return new Token(Token.TOK_ASSIGN_BITXOR, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
m_script.putBackChar();
return new Token(Token.TOK_BITXOR, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '=':
if (m_script.nextChar() == '=')
{
return new Token(Token.TOK_TEST_EQ, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
m_script.putBackChar();
return new Token(Token.TOK_ASSIGN, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '!':
if (m_script.nextChar() == '=')
{
return new Token(Token.TOK_TEST_NE, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
m_script.putBackChar();
return new Token(Token.TOK_LOGICAL_NOT, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '&':
switch(m_script.nextChar())
{
case '&':
return new Token(Token.TOK_LOGICAL_AND, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '=':
return new Token(Token.TOK_ASSIGN_BITAND, iLine, ofInLine, m_script.getOffset() - ofInLine);
default:
m_script.putBackChar();
return new Token(Token.TOK_BITAND, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
case '|':
switch(m_script.nextChar())
{
case '|':
return new Token(Token.TOK_LOGICAL_OR, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '=':
return new Token(Token.TOK_ASSIGN_BITOR, iLine, ofInLine, m_script.getOffset() - ofInLine);
default:
m_script.putBackChar();
return new Token(Token.TOK_BITOR, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
case '<':
switch(m_script.nextChar())
{
case '=':
return new Token(Token.TOK_TEST_LE, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '<':
if (m_script.nextChar() == '=')
{
return new Token(Token.TOK_ASSIGN_SHL, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
m_script.putBackChar();
return new Token(Token.TOK_SHL, iLine, ofInLine, m_script.getOffset() - ofInLine);
default:
m_script.putBackChar();
return new Token(Token.TOK_TEST_LT, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
case '>':
switch(m_script.nextChar())
{
case '=':
return new Token(Token.TOK_TEST_GE, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '>':
switch(m_script.nextChar())
{
case '>':
if (m_script.nextChar() == '=')
{
return new Token(Token.TOK_ASSIGN_USHR, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
m_script.putBackChar();
return new Token(Token.TOK_USHR, iLine, ofInLine, m_script.getOffset() - ofInLine);
case '=':
return new Token(Token.TOK_ASSIGN_SHR, iLine, ofInLine, m_script.getOffset() - ofInLine);
default:
m_script.putBackChar();
return new Token(Token.TOK_SHR, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
default:
m_script.putBackChar();
return new Token(Token.TOK_TEST_GT, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
// CharacterLiteral (3.10.4)
case '\'':
m_script.putBackChar();
return eatCharacterLiteral();
// StringLiteral (3.10.5)
case '\"':
case '`': // non-standard
m_script.putBackChar();
return eatStringLiteral();
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
// IntegerLiteral (3.10.1)
// FloatingPointLiteral (3.10.2)
// (see also TOK_DOT in the Separator (3.11) section)
m_script.putBackChar();
return eatNumericLiteral();
case '_': // ASCII underscore
case '$': // ASCII dollar sign
case 'A': case 'a': // ASCII latin A-Z and a-z
case 'B': case 'b':
case 'C': case 'c':
case 'D': case 'd':
case 'E': case 'e':
case 'F': case 'f':
case 'G': case 'g':
case 'H': case 'h':
case 'I': case 'i':
case 'J': case 'j':
case 'K': case 'k':
case 'L': case 'l':
case 'M': case 'm':
case 'N': case 'n':
case 'O': case 'o':
case 'P': case 'p':
case 'Q': case 'q':
case 'R': case 'r':
case 'S': case 's':
case 'T': case 't':
case 'U': case 'u':
case 'V': case 'v':
case 'W': case 'w':
case 'X': case 'x':
case 'Y': case 'y':
case 'Z': case 'z':
// identifier (3.8) or keyword (3.9)
m_script.putBackChar();
return eatIdentifierOrKeyword();
default:
{
if (Character.isJavaIdentifierStart(ch))
{
// identifier (3.8) or keyword (3.9)
m_script.putBackChar();
return eatIdentifierOrKeyword();
}
// lexical error -- the character is not a valid "lead"
// character for any of the java tokens
logError(ERROR, ERR_INVALID_CHAR,
new String[] {LiteralToken.printableChar(ch)},
iLine, ofInLine, m_script.getOffset() - ofInLine);
eatFluff();
}
}
}
}
catch (EOFException e)
{
logError(ERROR, ERR_UNEXPECTED_EOF, null, m_script.getLine(), m_script.getOffset(), 0);
throw new CompilerException();
}
catch (IOException e)
{
logError(ERROR, ERR_UNEXPECTED_IO, new String[] {e.toString()},
m_script.getLine(), m_script.getOffset(), 0);
throw new CompilerException();
}
}
/**
* Eats a character literal, ie. a single quote, a character or
* escaped character, and a closing single quote.
*
* @exception CompilerException If a lexical error is encountered which
* should stop compilation (like a full error list)
* @exception EOFException Unexpected end-of-file
*/
protected Token eatCharacterLiteral()
throws CompilerException, IOException
{
int iLine = m_script.getLine();
int ofInLine = m_script.getOffset();
// skip opening single quote
m_script.nextChar();
char ch = m_script.nextChar();
switch (ch)
{
case '\\':
m_script.putBackChar();
ch = eatEscapedCharacter();
break;
case '\r':
case '\n':
// an unexpected end-of-line was found; don't try to complete
// parsing of the character literal
m_script.putBackChar();
logError(ERROR, ERR_NEWLINE_IN_LIT, null,
iLine, m_script.getOffset(), 0);
return new LiteralToken(' ', iLine, ofInLine, m_script.getOffset() - ofInLine);
case '\'':
// get the exact offset of the single quote
m_script.putBackChar();
int ofSQuote = m_script.getOffset();
m_script.nextChar();
// log the error, highlighting the unescaped single quote
logError(ERROR, ERR_UNESC_S_QUOTE, null, iLine, ofSQuote, m_script.getOffset() - ofSQuote);
}
// closing single quote
if (m_script.nextChar() != '\'')
{
m_script.putBackChar();
logError(ERROR, ERR_S_QUOTE_EXP, null, iLine, m_script.getOffset(), 0);
}
return new LiteralToken(ch, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
/**
* Eats a string literal. A string literal is composed of a double quote,
* a set of characters, and a closing double quote.
*
* @exception CompilerException If a lexical error is encountered which
* should stop compilation (like a full error list)
* @exception EOFException Unexpected end-of-file
*/
protected Token eatStringLiteral()
throws CompilerException, IOException
{
int iLine = m_script.getLine();
int ofInLine = m_script.getOffset();
// skip opening double quote
char chQuote = m_script.nextChar();
StringBuffer sb = new StringBuffer();
char ch = m_script.nextChar();
while (ch != chQuote)
{
if (ch == '\\')
{
m_script.putBackChar();
ch = eatEscapedCharacter();
}
else if (ch == '\r' || ch == '\n')
{
m_script.putBackChar();
logError(ERROR, ERR_NEWLINE_IN_LIT, null, iLine, m_script.getOffset(), 0);
break;
}
sb.append(ch);
ch = m_script.nextChar();
}
String s = sb.toString();
if (chQuote == '`')
{
s = HashHelper.hash(s);
}
return new LiteralToken(s, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
/**
* Eats a character escape sequence. See EscapeSequence (Java Language
* Specification 3.10.6).
*
* @exception CompilerException If a lexical error is encountered which
* should stop compilation (like a full error list)
* @exception IOException Unexpected end-of-file, etc.
*/
public char eatEscapedCharacter()
throws CompilerException, IOException
{
// skip the escape ('\')
m_script.nextChar();
// check the escaped character
char ch = m_script.nextChar();
switch (ch)
{
case 'b':
return '\b';
case 't':
return '\t';
case 'n':
return '\n';
case 'f':
return '\f';
case 'r':
return '\r';
case '\"':
case '\'':
case '\\':
return ch;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
{
int cMaxDigits = (ch > '3' ? 2 : 3);
int nChar = 0;
do
{
nChar = (nChar * 8) + (ch - '0');
ch = m_script.nextChar();
}
while (ch >= '0' && ch <= '7' && --cMaxDigits > 0);
m_script.putBackChar();
ch = (char) nChar;
}
return ch;
case '\r':
case '\n':
m_script.putBackChar();
logError(ERROR, ERR_NEWLINE_IN_LIT, null, m_script.getLine(), m_script.getOffset(), 0);
return ' ';
default:
{
// back up temporarily to determine the exact line number and
// position of the invalid escaped character
int ofNext = m_script.getOffset();
m_script.putBackChar();
// log the error (including the parameter)
logError(ERROR, ERR_CHAR_ESCAPE, new String[]
{LiteralToken.printableChar(ch)}, m_script.getLine(),
m_script.getOffset(), ofNext - m_script.getOffset());
m_script.nextChar();
}
return ' ';
}
}
/**
* Eats a numeric literal. Numeric literals are either integral or
* floating point. Integral literals are either int or long; floating
* point literals are either float or double. This method first attempts
* to parse the numeric literal as an integer. While doing so, if it
* detects that the literal is floating point, it backs up and parses the
* number as a floating point literal.
*
* @exception CompilerException If a lexical error is encountered which
* should stop compilation (like a full error list)
* @exception EOFException Unexpected end-of-file
*/
protected Token eatNumericLiteral()
throws CompilerException, IOException
{
ParsePosition pos = m_script.savePosition();
Token tok = eatIntegralLiteral();
if (tok == null)
{
m_script.restorePosition(pos);
tok = eatFloatingPointLiteral();
}
return tok;
}
/**
* Eats an integral numeric literal. If the number is an integral number
* but contains a lexical error, then a token representing the integral 0
* is returned. If the number is determined to not be an integral (ie. it
* appears to be a floating point number), then null is returned.
*
* @return Either an integral token or null if the number is appears to be
* a floating point number
*
* @exception CompilerException If a lexical error is encountered which
* should stop compilation (like a full error list)
* @exception EOFException Unexpected end-of-file
*/
protected Token eatIntegralLiteral()
throws CompilerException, IOException
{
int iLine = m_script.getLine();
int ofInLine = m_script.getOffset();
// get the first character of the literal; the first character
// is the best indicator of the type of the number (integer vs.
// float, and hex vs. octal vs. decimal for integers).
char ch = m_script.nextChar();
if (ch == '0')
{
// hex and octal numbers start with a leading 0, but a float can
// also start with a leading zero and there is such a number as
// the integral zero by itself
ch = m_script.nextChar();
switch (ch)
{
case 'X': case 'x':
{
// build a string containing the hex integer
StringBuffer sb = new StringBuffer();
boolean fLeadZero = true;
boolean fHasDigits = false;
ch = m_script.nextChar();
while (isHex(ch))
{
if (ch != '0')
{
fLeadZero = false;
}
if (!fLeadZero)
{
sb.append(ch);
}
fHasDigits = true;
ch = m_script.nextChar();
}
// the character following the hex integer could
// specify that the number is a long integer
boolean fLong = (ch == 'L' || ch == 'l');
if (!fLong)
{
m_script.putBackChar();
}
// check for an error
String s = sb.toString();
if (!fHasDigits || s.length() > (fLong ? 16 : 8))
{
String sCode = (fHasDigits ? ERR_NUMERIC_RANGE : ERR_NUMERIC_FORMAT);
logError(ERROR, sCode, new String[] {s}, iLine,
ofInLine, m_script.getOffset() - ofInLine);
}
else if (s.length() > 0)
{
// the string s contains a valid non-zero hex number;
// convert it to an integer
try
{
char[] ach = s.toCharArray();
if (fLong)
{
long lValue = 0;
for (int of = 0, cch = ach.length; of < cch; ++of)
{
lValue = (lValue << 4) + hexValue(ach[of]);
}
return new LiteralToken(lValue, iLine,
ofInLine, m_script.getOffset() - ofInLine);
}
else
{
int nValue = 0;
for (int of = 0, cch = ach.length; of < cch; ++of)
{
nValue = (nValue << 4) + hexValue(ach[of]);
}
return new LiteralToken(nValue, iLine,
ofInLine, m_script.getOffset() - ofInLine);
}
}
catch (NumberFormatException e)
{
internalError();
}
}
// parsing error already logged ... just return zero
return new LiteralToken((fLong ? LIT_ZERO_L : LIT_ZERO),
iLine, ofInLine, m_script.getOffset() - ofInLine);
}
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
{
// either an octal integer or a float
StringBuffer sb = new StringBuffer();
boolean fLong = false;
boolean fLeadZero = true;
while (ch >= '0' && ch <= '7')
{
if (ch != '0')
{
fLeadZero = false;
}
if (!fLeadZero)
{
sb.append(ch);
}
ch = m_script.nextChar();
}
switch (ch)
{
case '8': case '9':
case 'E': case 'e':
case 'F': case 'f':
case 'D': case 'd':
case '.':
// the numeric literal is actually a float
return null;
case 'L': case 'l':
m_script.putBackChar();
fLong = true;
break;
}
// check if the octal number exceeds the limits
String s = sb.toString();
if (s.length() >= (fLong ? 22 : 11) &&
(s.length() > (fLong ? 22 : 11) || s.charAt(0) > '1'))
{
logError(ERROR, ERR_NUMERIC_RANGE, new String[] {s},
iLine, ofInLine, m_script.getOffset() - ofInLine);
}
else if (s.length() > 0)
{
// the string s contains a valid non-zero hex number;
// convert it to an integer
try
{
if (fLong)
{
return new LiteralToken(Long.parseLong(s, 8), iLine,
ofInLine, m_script.getOffset() - ofInLine);
}
else
{
return new LiteralToken(Integer.parseInt(s, 8), iLine,
ofInLine, m_script.getOffset() - ofInLine);
}
}
catch (NumberFormatException e)
{
internalError();
}
}
// parsing error already logged ... just return zero
return new LiteralToken((fLong ? LIT_ZERO_L : LIT_ZERO),
iLine, ofInLine, m_script.getOffset() - ofInLine);
}
case '8': case '9':
case 'E': case 'e':
case 'F': case 'f':
case 'D': case 'd':
case '.':
// floating point number
return null;
case 'L': case 'l':
// the long integer numeric literal "0"
return new LiteralToken(LIT_ZERO_L, iLine, ofInLine,
m_script.getOffset() - ofInLine);
default:
// the integer numeric literal "0"
m_script.putBackChar();
return new LiteralToken(LIT_ZERO, iLine, ofInLine,
m_script.getOffset() - ofInLine);
}
}
// either a decimal integer or a float
else if (ch >= '1' && ch <= '9')
{
// decimal integers are constructed as negative numbers in case
// of the MIN_VALUE problem
// (see the Java language specification 3.10.4)
StringBuffer sb = new StringBuffer();
sb.append('-');
while (ch >= '0' && ch <= '9')
{
sb.append(ch);
ch = m_script.nextChar();
}
boolean fLong = false;
switch (ch)
{
case 'E': case 'e':
case 'F': case 'f':
case 'D': case 'd':
case '.':
// the first non-decimal character shows that the
// numeric literal is actually a float
return null;
case 'L': case 'l':
// the decimal number is a long integer
fLong = true;
break;
default:
m_script.putBackChar();
}
// check if the decimal number exceeds the limits
String s = sb.toString();
String sMax = (fLong ? MAX_LONG : MAX_INT);
if (s.length() >= sMax.length() &&
(s.length() > sMax.length() || s.compareTo(sMax) > 0))
{
logError(ERROR, ERR_NUMERIC_RANGE, new String[] {s},
iLine, ofInLine, m_script.getOffset() - ofInLine);
}
else
{
// the string s contains a valid non-zero decimal number;
// convert it to an integer (and store it as a negative)
try
{
if (fLong)
{
return new LiteralToken(Long.parseLong(s, 10), true,
iLine, ofInLine, m_script.getOffset() - ofInLine);
}
else
{
return new LiteralToken(Integer.parseInt(s, 10), true,
iLine, ofInLine, m_script.getOffset() - ofInLine);
}
}
catch (NumberFormatException e)
{
internalError();
}
}
// parsing error already logged ... just return zero
return new LiteralToken((fLong ? LIT_ZERO_L : LIT_ZERO),
iLine, ofInLine, m_script.getOffset() - ofInLine);
}
// otherwise, assume that it is a floating point number
return null;
}
/**
* Eats a floating point literal. Floating point literals are composed of
* five parts, most of which are optional: digits, dot, digits, exponent,
* and suffix.
*
* @exception CompilerException If a lexical error is encountered which
* should stop compilation (like a full error list)
* @exception EOFException Unexpected end-of-file
*/
protected Token eatFloatingPointLiteral()
throws CompilerException, IOException
{
int iLine = m_script.getLine();
int ofInLine = m_script.getOffset();
boolean fLeadingDigits = false;
boolean fDot = false;
boolean fTrailingDigits = false;
boolean fExponent = false;
boolean fExponentDigits = false;
boolean fSuffix = false;
boolean fFloatNotDouble = false;
StringBuffer sb = new StringBuffer();
// (1) leading digits
char ch = m_script.nextChar();
while (isDecimal(ch))
{
fLeadingDigits = true;
sb.append(ch);
ch = m_script.nextChar();
}
// (2) dot
if (ch == '.')
{
fDot = true;
sb.append(ch);
ch = m_script.nextChar();
}
// (3) trailing digits
while (isDecimal(ch))
{
fTrailingDigits = true;
sb.append(ch);
ch = m_script.nextChar();
}
// (4) exponent
if (ch == 'E' || ch == 'e')
{
fExponent = true;
sb.append(ch);
ch = m_script.nextChar();
// sign of the exponent
if (ch == '+' || ch == '-')
{
sb.append(ch);
ch = m_script.nextChar();
}
// value of the exponent
while (isDecimal(ch))
{
fExponentDigits = true;
sb.append(ch);
ch = m_script.nextChar();
}
}
// (5) suffix
if (ch == 'D' || ch == 'd')
{
fSuffix = true;
}
else if (ch == 'F' || ch == 'f')
{
fSuffix = true;
fFloatNotDouble = true;
}
else
{
m_script.putBackChar();
}
// now verify that the correct pieces of the floating point
// literal appeared in the string
String s = sb.toString();
if (fTrailingDigits || (fLeadingDigits && (fDot || fExponent || fSuffix)))
{
// either there is no exponent or there is an exponent with
// at least 1 digit
if (!fExponent || (fExponent && fExponentDigits))
{
try
{
if (fFloatNotDouble)
{
return new LiteralToken(Float.valueOf(s).floatValue(),
iLine, ofInLine, m_script.getOffset() - ofInLine);
}
else
{
return new LiteralToken(Double.valueOf(s).doubleValue(),
iLine, ofInLine, m_script.getOffset() - ofInLine);
}
}
catch (NumberFormatException e)
{
// error handled below (fall through)
}
}
}
// log numeric format error with the invalid number as the parameter
logError(ERROR, ERR_NUMERIC_FORMAT, new String[] {s},
iLine, ofInLine, m_script.getOffset() - ofInLine);
// return a floating point zero
return new LiteralToken((fFloatNotDouble ? LIT_ZERO_F : LIT_ZERO_D),
iLine, ofInLine, m_script.getOffset() - ofInLine);
}
/**
* Eats an identifier or a keyword. Note that this method assumes that
* it is only called if the next character is a "Java letter".
*
* @exception CompilerException If a lexical error is encountered which
* should stop compilation (like a full error list)
* @exception EOFException Unexpected end-of-file
*
* @see Character#isJavaIdentifierStart(char)
* @see Character#isJavaIdentifierPart(char)
*/
protected Token eatIdentifierOrKeyword()
throws CompilerException, IOException
{
int iLine = m_script.getLine();
int ofInLine = m_script.getOffset();
// build the name (identifier, keyword, boolean literal, or null literal)
StringBuffer sb = new StringBuffer();
char ch = m_script.nextChar();
while (Character.isJavaIdentifierPart(ch))
{
sb.append(ch);
ch = m_script.nextChar();
}
m_script.putBackChar();
// check if it is a keyword
String s = sb.toString();
Token tok = (Token) Token.RESERVED.get(s);
// 2001.06.29 cp - check if the keyword is a proprietary extension
if (tok != null && !m_fExtensionsEnabled && tok.getID() == Token.KEY_AS)
{
tok = null;
}
if (tok == null)
{
// must be an identifier
// see if the name is already registered
tok = (Token) m_tblNames.get(s);
if (tok == null)
{
// register it as a new name
tok = new Token(Token.IDENTIFIER, Token.NONE, Token.IDENT, null, s);
m_tblNames.put(s, tok);
}
}
// return a "location-specific" instance of the token
return new Token(tok, iLine, ofInLine, m_script.getOffset() - ofInLine);
}
// ----- error handling -------------------------------------------------
/**
* Logs an internal error and throws a CompilerException.
*
* @exception CompilerException Thrown unconditionally
*/
protected void internalError()
throws CompilerException
{
logError(FATAL, ERR_INTERNAL, null, m_script.getLine(), m_script.getOffset(), 0);
throw new CompilerException();
}
/**
* Logs the passed error in the error list.
*
* @param nSeverity Severity of the error as defined by ErrorList.Constants
* @param sCode Error code, as defined by the class logging the error
* @param asParams Replaceable parameters for the error message
* @param iLine Line number where the error was detected
* @param ofInLine Offset of the error within the line
* @param cchText Length of the text which caused the error
*
* @exception CompilerException If the error list overflows.
*/
protected void logError(int nSeverity, String sCode, String[] asParams, int iLine, int ofInLine, int cchText)
throws CompilerException
{
try
{
m_errlist.add(new CompilerErrorInfo(nSeverity, sCode, RESOURCES,
asParams, iLine, ofInLine, cchText));
}
catch (ErrorList.OverflowException e)
{
throw new CompilerException();
}
}
// ----- inner classes --------------------------------------------------
/**
* Stores all information required to later restore the current position in
* the script.
*/
class Position implements ParsePosition
{
Tokenizer toker;
Dequeue dq;
ParsePosition scriptpos;
}
// ----- constants ------------------------------------------------------
// commonly used literal tokens
public static final LiteralToken LIT_ZERO = new LiteralToken((int) 0 , -1, -1, -1);
public static final LiteralToken LIT_ZERO_L = new LiteralToken((long) 0L , -1, -1, -1);
public static final LiteralToken LIT_ZERO_F = new LiteralToken((float) 0.0F, -1, -1, -1);
public static final LiteralToken LIT_ZERO_D = new LiteralToken((double) 0.0D, -1, -1, -1);
// maximum integral values
public static final String MAX_INT = "-2147483648";
public static final String MAX_LONG = "-9223372036854775808";
// ----- data members ---------------------------------------------------
/**
* The script to parse.
*/
private Script m_script;
/**
* Parsed names.
*/
private Hashtable m_tblNames;
/**
* The "put back" queue.
*/
private Dequeue m_dq;
/**
* The error list to log to.
*/
private ErrorList m_errlist;
/**
* The option to parse proprietary keyword extensions.
*/
private boolean m_fExtensionsEnabled;
/**
* The option to parse comments as tokens.
*/
private boolean m_fCommentsEnabled;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy