org.eclipse.ocl.lpg.AbstractParser Maven / Gradle / Ivy
/**
*
*
* Copyright (c) 2005, 2010 IBM Corporation and others.
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Public License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* Contributors:
* IBM - Initial API and implementation
* E.D.Willink - refactored to separate from OCLLPGParser
* - Bugs 243976, 295166, 259818
* Zeligsoft - Bug 243976
*
*
*
* $Id: AbstractParser.java,v 1.12 2010/05/03 09:32:32 ewillink Exp $
*/
package org.eclipse.ocl.lpg;
import java.text.StringCharacterIterator;
import lpg.runtime.ErrorToken;
import lpg.runtime.ILexStream;
import lpg.runtime.IPrsStream;
import lpg.runtime.IToken;
import lpg.runtime.LexStream;
import lpg.runtime.Monitor;
import lpg.runtime.ParseTable;
import org.eclipse.ocl.cst.CSTNode;
import org.eclipse.ocl.internal.l10n.OCLMessages;
import org.eclipse.ocl.options.ParsingOptions;
import org.eclipse.ocl.options.ProblemOption;
/**
* The AbstractParser
supports syntactic analysis of a tokens
* produced by an AbstractLexer
to create a corresponding CST, that
* may in turn be exploited by an AbstractAnalyzer
to create an
* AST.
*
* Derived classes should extend the abstract support for AST from CST from
* tokens from text parsing and analysis to support the AST and CST classes
* appropriate to a particular language.
*/
public abstract class AbstractParser {
private final BasicEnvironment environment;
private int defaultRepairCount = 0;
private AbstractLexer lexer;
public AbstractParser(BasicEnvironment environment) {
this.environment = environment;
environment.setParser(this);
}
public AbstractParser(AbstractLexer lexer) {
this.lexer = lexer;
this.environment = lexer.getEnvironment();
environment.setParser(this);
}
/**
* Returns a single line string representation of the input chars for the
* given range.
*
* @param left
* left most char index
* @param right
* right most char index
* @return a single line string representation of the input chars
*/
public String computeInputString(int left, int right) {
StringBuffer result = new StringBuffer(right - left + 1);
char[] chars = getIPrsStream().getInputChars();
if (chars.length > 0) {
for (int i = left; i <= right; i++) {
if (chars[i] == '\t') {
result.append(' ');
} else if (chars[i] == '\n' || chars[i] == '\r'
|| chars[i] == '\f') {
if (i > 0) {
if (!Character.isWhitespace(chars[i - 1])) {
result.append(' ');
}
}
} else {
result.append(chars[i]);
}
}
}
return result.toString();
}
public BasicEnvironment getEnvironment() {
return environment;
}
/**
* @since 1.3
*/
public int getDefaultRepairCount() {
return defaultRepairCount;
}
public AbstractLexer getLexer() {
return lexer;
}
/**
* @deprecated clients should invoke {@link #parser()}
*/
@Deprecated
public CSTNode parseTokensToCST() {
return parser(null, defaultRepairCount);
}
/**
* @param lexStream
* @deprecated To set/reset the lex stream use {@link #reset(ILexStream)}
*/
@Deprecated
public void resetLexStream(LexStream lexStream) {
getIPrsStream().resetLexStream(lexStream);
}
/**
* Sets the number of repairs to be performed by a parser capable of
* performing repairs (the BacktrackingParser) unless overridden on the
* specific parser invocation method.
*
* When repair count is 0, no Error token recoveries occur.
* When repair count is > 0, it limits the number of Error token recoveries.
* When repair count is < 0, the number of error token recoveries is unlimited.
*
* @param defaultRepairCount
* the default repair count
*
* @since 1.3
*/
public void setDefaultRepairCount(int defaultRepairCount) {
this.defaultRepairCount = defaultRepairCount;
}
/**
*
* @param lexer
* @deprecated To set/reset the lex stream use {@link #reset(ILexStream)}
*/
@Deprecated
public void setLexStream(AbstractLexer lexer) {
this.lexer = lexer;
getIPrsStream().resetLexStream(lexer.getILexStream());
}
/**
* Sets the start and end offsets of the given CSTNode
to the
* start and end offsets of the given IToken
*
* @param cstNode
* CSTNode
to set offsets
* @param startEnd
* IToken
to retrieve offsets from
*/
protected void setOffsets(CSTNode cstNode, IToken startEnd) {
cstNode.setStartToken(startEnd);
cstNode.setEndToken(startEnd);
int startOffset = startEnd.getStartOffset();
int endOffset = startEnd.getEndOffset();
if (endOffset < startOffset) {
cstNode.setStartOffset(endOffset);
cstNode.setEndOffset(startOffset);
} else {
cstNode.setStartOffset(startOffset);
cstNode.setEndOffset(endOffset);
}
}
/**
* Sets the start and end offsets of the given CSTNode
to the
* start and end offsets of the 2nd given CSTNode
*
* @param cstNode
* CSTNode
to set offsets
* @param startEnd
* CSTNode
to retrieve offsets from
*/
protected void setOffsets(CSTNode cstNode, CSTNode startEnd) {
cstNode.setStartToken(startEnd.getStartToken());
cstNode.setEndToken(startEnd.getEndToken());
cstNode.setStartOffset(startEnd.getStartOffset());
cstNode.setEndOffset(startEnd.getEndOffset());
}
/**
* Sets the start and end offsets of the given CSTNode
to the
* start offset of the 2nd given CSTNode
and the end offset of
* the 3rd given CSTNode
*
* @param cstNode
* CSTNode
to set offsets
* @param start
* CSTNode
to retrieve start offset from
* @param end
* CSTNode
to retrieve end offset from
*/
protected void setOffsets(CSTNode cstNode, CSTNode start, CSTNode end) {
cstNode.setStartToken(start.getStartToken());
cstNode.setEndToken(end.getEndToken());
cstNode.setStartOffset(start.getStartOffset());
cstNode.setEndOffset(end.getEndOffset());
}
/**
* Sets the start and end offsets of the given CSTNode
to the
* start offset of the 2nd given CSTNode
and the end offset of
* the given IToken
*
* @param cstNode
* CSTNode
to set offsets
* @param start
* CSTNode
to retrieve start offset from
* @param end
* IToken
to retrieve end offset from
*/
protected void setOffsets(CSTNode cstNode, CSTNode start, IToken end) {
cstNode.setStartToken(start.getStartToken());
cstNode.setEndToken(end);
cstNode.setStartOffset(start.getStartOffset());
cstNode.setEndOffset(end.getEndOffset());
}
/**
* Sets the start and end offsets of the given CSTNode
to the
* start offset of the given IToken
and the end offset of the
* 2nd given CSTNode
*
* @param cstNode
* CSTNode
to set offsets
* @param start
* IToken
to retrieve start offset from
* @param end
* CSTNode
to retrieve end offset from
*/
protected void setOffsets(CSTNode cstNode, IToken start, CSTNode end) {
cstNode.setStartToken(start);
cstNode.setEndToken(end.getEndToken());
cstNode.setStartOffset(start.getStartOffset());
cstNode.setEndOffset(end.getEndOffset());
}
/**
* Sets the start and end offsets of the given CSTNode
to the
* start offset of the 1std given IToken
and the end offset of
* the 2nd given IToken
*
* @param cstNode
* CSTNode
to set offsets
* @param start
* IToken
to retrieve start offset from
* @param end
* IToken
to retrieve end offset from
*/
protected void setOffsets(CSTNode cstNode, IToken start, IToken end) {
cstNode.setStartToken(start);
cstNode.setEndToken(end);
cstNode.setStartOffset(start.getStartOffset());
cstNode.setEndOffset(end.getEndOffset());
}
/**
*
* Initializes a concrete-syntax node's start and end offsets from the
* current token in the parser stream.
*
*
*
* Note: this method resided in the OCLEssential.g template since 1.2
* It has been incorporated in the abstract parser since 3.0
*
*
* @param cstNode a concrete-syntax node
*
* @since 3.0
*/
protected void setOffsets(CSTNode cstNode) {
IToken firstToken = getRhsIToken(1);
cstNode.setStartToken(firstToken);
cstNode.setEndToken(firstToken);
cstNode.setStartOffset(firstToken.getStartOffset());
cstNode.setEndOffset(firstToken.getEndOffset()-1);
}
/**
* Removes the "s surrounding a quoted string, if any.
*
* @param quoted
* a possibly quoted string
* @return quoted
without the surrounding quotes, or just
* quoted
verbatim if there were none
*
* @deprecated Use unDoubleQuote
*/
@Deprecated
protected String unquote(String quoted) {
String result = quoted;
if ((result != null) && (result.length() > 1)) {
int max = result.length() - 1;
if ((result.charAt(0) == '"') && (quoted.charAt(max) == '"')) {
result = result.substring(1, max);
}
// this is a regexp, so the backslash needs to be
// re-escaped, thus "\\" is rendered in a Java
// string literal as "\\\\"
result = result.replaceAll("\\\\\"", "\""); //$NON-NLS-2$//$NON-NLS-1$
if (!result.equals(quoted)) {
// check settings for using non-standard closure iterator
ProblemHandler.Severity sev = ProblemHandler.Severity.OK;
BasicEnvironment benv = getEnvironment();
if (benv != null) {
sev = benv
.getValue(ProblemOption.ELEMENT_NAME_QUOTE_ESCAPE);
if ((sev != null) && (sev != ProblemHandler.Severity.OK)) {
benv.problem(sev, ProblemHandler.Phase.PARSER, OCLMessages
.bind(OCLMessages.NonStd_DQuote_Escape_, quoted),
"unquote", //$NON-NLS-1$
null);
}
}
}
}
return result;
}
/**
* Removes any "s surrounding a quoted string and decodes any escape sequences
* within it using {@link #decodeEscapeSequence}.
*
* For MDT/OCL 1.3.0 compatibility, a problem is reported if double
* quotes are present and the {@link #ProblemOption.ELEMENT_NAME_QUOTE_ESCAPE}
* severity is not {@link #ProblemHandler.Severity.OK}.
*
* @param token containing string to be decoded
* @return string value of token
with escapes replaced
* @since 3.0
* @deprecated double quotes form no part of the OCL specification
*/
@Deprecated
protected String unDoubleQuote(IToken token) {
if (token == null) {
return null;
}
String quoted = token.toString();
if (quoted == null) {
return null;
}
int quotedLength = quoted.length();
if ((quotedLength < 2) || (quoted.charAt(0) != '"') || (quoted.charAt(quotedLength-1) != '"')) {
return quoted;
}
ProblemHandler.Severity sev = ProblemHandler.Severity.OK;
BasicEnvironment benv = getEnvironment();
if (benv != null) {
sev = benv
.getValue(ProblemOption.ELEMENT_NAME_QUOTE_ESCAPE);
if ((sev != null) && (sev != ProblemHandler.Severity.OK)) {
benv.problem(sev, ProblemHandler.Phase.PARSER, OCLMessages
.bind(OCLMessages.NonStd_DQuote_Escape_, quoted),
"unquote", //$NON-NLS-1$
token);
}
}
return decodeString(token, quoted.substring(1, quotedLength-1));
}
/**
* Removes any quotes surrounding the string value of a token
* using {@link #unSingleQuote(String,int)} and optionally decode any
* escape sequences within it using {@link #decodeEscapeSequence}.
*
* For MDT/OCL 1.3.0 compatibility, escape sequences conversion can be disabled
* by resetting {@link #ParsingOption.USE_BACKSLASH_ESCAPE_PROCESSING}.
*
* @param token containing string to be decoded
* @return string value of token
with escapes replaced
* @since 3.0
*/
protected String unSingleQuote(IToken token) {
if (token == null) {
return null;
}
String quoted = token.toString();
if (quoted == null) {
return null;
}
String unquoted = unSingleQuote(quoted);
if (unquoted == null) {
return quoted;
}
Boolean backslashProcessingEnabled = null;
BasicEnvironment benv = getEnvironment();
if (benv != null) {
backslashProcessingEnabled = benv
.getValue(ParsingOptions.USE_BACKSLASH_ESCAPE_PROCESSING);
}
if ((backslashProcessingEnabled == null) || !backslashProcessingEnabled) {
return unquoted;
}
return decodeString(token, unquoted);
}
/**
* Removes any quotes surrounding a quoted string.
*
* The default implementation removes a leading/trailing single quote pair,
* or an underscore-prefixed leading/trailing single quote pair.
*
* @param quoted string to be decoded
* @return string content
* @since 3.0
*/
protected String unSingleQuote(String quoted) {
int quotedLength = quoted.length();
if ((quotedLength >= 2) && (quoted.charAt(0) == '\'') && (quoted.charAt(quotedLength-1) == '\'')) {
return quoted.substring(1, quotedLength-1);
}
else if ((quotedLength >= 3) && (quoted.charAt(0) == '_') && (quoted.charAt(1) == '\'') && (quoted.charAt(quotedLength-1) == '\'')) {
return quoted.substring(2, quotedLength-1);
}
else {
return null;
}
}
/**
* Returns a string with any escape sequences decoded by {@link #decodeEscapeSequence}.
*
* @param token the token from which the string originated
* @param string to be decoded
* @return string
without any escapes replaced
* @since 3.0
*/
protected String decodeString(IToken token, String string) {
if (string.indexOf('\\') < 0) {
return string;
}
StringBuffer s = new StringBuffer();
StringCharacterIterator i = new StringCharacterIterator(string);
for (char c = i.first(); c != StringCharacterIterator.DONE; c = i.next()) {
if (c != '\\') {
s.append(c);
}
else {
int iStart = i.getIndex();
char ch = decodeEscapeSequence(i);
if (ch != StringCharacterIterator.DONE) {
s.append(ch);
}
else {
BasicEnvironment benv = getEnvironment();
benv.problem(ProblemHandler.Severity.ERROR, ProblemHandler.Phase.PARSER, OCLMessages
.bind(OCLMessages.InvalidEscapeSequence_ERROR, string.substring(iStart, i.getIndex())),
"unquote", //$NON-NLS-1$
token);
return string;
}
}
}
return s.toString();
}
/**
* Decodes an escape sequence in accordance with the Issue 14357
* proposals for Concrete Syntaxes. For compatibility octal escape sequences
* are also supported.
*
*
* \b \u0008: backspace BS
* \t \u0009: horizontal tab HT
* \n \u000a: line feed LF
* \f \u000c: form feed FF
* \r \u000d: carriage return CR
* \" \u0022: double quote "
* \' \u0027: single quote '
* \\ \u005c: backslash \
* \xhh \u00hh: hex byte
* \uhhhh \uhhhh: hex code point
* \7$ \u000o: octal byte
* \77 \u00ff: octal byte
* \37$ \u00ff: octal byte
* \377 \u00ff: octal byte
*
* h hex digit (0-9, a-f, A-F)
* 7 octal digit (0-7)
* 3 octal prefix digit (0-3)
* $ end of string or non-octal digit next letter
*
*
*
* @param i character iterator pointing at first character after the introducer
*
* @return the decoded character and i updated to point to the first character following the
* escape sequence, or DONE and i unchanged if the escape sequence is unrecognised
* @since 3.0
*/
protected char decodeEscapeSequence(StringCharacterIterator i) {
int savedIndex = i.getIndex();
char c = i.next();
switch (c) {
case 'b' : return '\b';
case 'f' : return '\f';
case 't' : return '\t';
case 'n' : return '\n';
case 'r' : return '\r';
case '\\' : return '\\';
case '\'' : return '\'';
case '"' : return '\"';
case '0' :
case '1' :
case '2' :
case '3' : {
int c1 = c - '0';
int c2 = decodeOctalCharacter(i);
if (c2 < 0) {
return (char)(c1);
}
int c3 = decodeOctalCharacter(i);
if (c3 < 0) {
return (char)((c1 << 3) + c2);
}
return (char)((c1 << 6) + (c2 << 3) + c3);
}
case '4' :
case '5' :
case '6' :
case '7' : {
int c1 = c - '0';
int c2 = decodeOctalCharacter(i);
if (c2 < 0) {
i.previous();
return (char)(c1);
}
return (char)((c1 << 3) + c2);
}
case 'x' : {
int c1 = decodeHexCharacter(i.next());
int c2 = decodeHexCharacter(i.next());
if ((c1 < 0) || (c2 < 0)) {
break;
}
return (char)((c1 << 4) + c2);
}
case 'u' : {
int c1 = decodeHexCharacter(i.next());
int c2 = decodeHexCharacter(i.next());
int c3 = decodeHexCharacter(i.next());
int c4 = decodeHexCharacter(i.next());
if ((c1 < 0) || (c2 < 0) || (c3 < 0) || (c4 < 0)) {
break;
}
return (char)((c1 << 12) + (c2 << 8) + (c3 << 4) + c4);
}
}
i.setIndex(savedIndex); // Give derived augmentations the same starting point
return StringCharacterIterator.DONE;
}
/**
* Return the value of the next character if it is a hexadecimal character.
*
* @param c potentially hex character
* @return the hex value or -1 if the next character is not a hex character
* @since 3.0
*/
protected int decodeHexCharacter(char c) {
if (('0' <= c) && (c <= '9')) {
return c - '0';
}
if (('A' <= c) && (c <= 'F')) {
return 10 + c - 'A';
}
if (('a' <= c) && (c <= 'f')) {
return 10 + c - 'a';
}
return -1;
}
/**
* Return the value of the next character if it is an octal character.
*
* @param c potentially octal character
* @return the hex value or -1 if the next character is not a octal character
* @since 3.0
*/
protected int decodeOctalCharacter(StringCharacterIterator i) {
char c = i.next();
if (c == StringCharacterIterator.DONE) {
return -1;
}
if (('0' <= c) && (c <= '7')) {
return c - '0';
}
i.previous();
return -1;
}
// Some useful methods which will be implemented in the generated Parser
abstract public String[] orderedTerminalSymbols();
/**
* @return the number of different parser tokens
*
* @since 3.0
*/
abstract public int numTokenKinds();
/**
* @return the parser's {@link IPrsStream parseStream}
*
* @since 3.0
*/
abstract public DerivedPrsStream getIPrsStream();
/**
* Resets the parser's {@link ILexStream lexStream}
* @param lexStream
*
* @since 3.0
*/
abstract public void reset(ILexStream lexStream);
/**
* Runs the parser with the current {@link ILexStream lex} and {@link IPrsStream parse} streams
*
* @return the root {@link CSTNode} which results from the parsing process
* @since 3.0
*/
abstract public CSTNode parser();
/**
* Runs the parser with the current {@link ILexStream lex} and {@link IPrsStream parse} streams
* using the given {@link Monitor}
*
* @return the root {@link CSTNode} which results from the parsing process
* @since 3.0
*/
abstract public CSTNode parser(Monitor monitor);
/**
* Runs the parser with the current {@link ILexStream lex} and {@link IPrsStream parse} streams
* using a given error_repair_count (useful for a backtracking parser)
*
* @return the root {@link CSTNode} which results from the parsing process
* @since 3.0
*/
abstract public CSTNode parser(int error_repair_count);
/**
* Runs the parser with the current {@link ILexStream lex} and {@link IPrsStream parse} streams
* using the given {@link Monitor} and error_repair_count (useful for a backtracking parser)
*
* @return the root {@link CSTNode} which results from the parsing process
* @since 3.0
*/
abstract public CSTNode parser(Monitor monitor, int error_repair_count);
/**
* @return the {@link ParseTable} used by the parser
* @since 3.0
*/
abstract public ParseTable getParseTable();
/**
* @since 3.0
*/
abstract protected Object getRhsSym(int i);
/**
* @since 3.0
*/
abstract protected int getRhsTokenIndex(int i);
/**
* @since 3.0
*/
abstract protected IToken getRhsIToken(int i);
/**
* @since 3.0
*/
abstract protected int getRhsFirstTokenIndex(int i);
/**
* @since 3.0
*/
abstract protected IToken getRhsFirstIToken(int i);
/**
* @since 3.0
*/
abstract protected int getRhsLastTokenIndex(int i);
/**
* @since 3.0
*/
abstract protected IToken getRhsLastIToken(int i);
/**
* @since 3.0
*/
abstract protected int getLeftSpan();
/**
* @since 3.0
*/
abstract protected IToken getLeftIToken();
/**
* @since 3.0
*/
abstract protected int getRightSpan();
/**
* @since 3.0
*/
abstract protected IToken getRightIToken();
/**
* @since 3.0
*/
abstract protected int getRhsErrorTokenIndex(int i);
/**
* @since 3.0
*/
abstract protected ErrorToken getRhsErrorIToken(int i);
/**
* @since 3.0
*/
abstract protected void setResult(Object object);
}