All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.eclipse.ocl.lpg.AbstractParser Maven / Gradle / Ivy

/**
 * 
 *
 * Copyright (c) 2005, 2010 IBM Corporation and others.
 * All rights reserved.   This program and the accompanying materials
 * are made available under the terms of the Eclipse Public License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/legal/epl-v10.html
 * 
 * Contributors: 
 *   IBM - Initial API and implementation
 *   E.D.Willink - refactored to separate from OCLLPGParser
 *             - Bugs 243976, 295166, 259818
 *   Zeligsoft - Bug 243976
 *   
 * 
 *
 * $Id: AbstractParser.java,v 1.12 2010/05/03 09:32:32 ewillink Exp $
 */
package org.eclipse.ocl.lpg;

import java.text.StringCharacterIterator;

import lpg.runtime.ErrorToken;
import lpg.runtime.ILexStream;
import lpg.runtime.IPrsStream;
import lpg.runtime.IToken;
import lpg.runtime.LexStream;
import lpg.runtime.Monitor;
import lpg.runtime.ParseTable;

import org.eclipse.ocl.cst.CSTNode;
import org.eclipse.ocl.internal.l10n.OCLMessages;
import org.eclipse.ocl.options.ParsingOptions;
import org.eclipse.ocl.options.ProblemOption;

/**
 * The AbstractParser supports syntactic analysis of a tokens
 * produced by an AbstractLexer to create a corresponding CST, that
 * may in turn be exploited by an AbstractAnalyzer to create an
 * AST.
 * 
 * Derived classes should extend the abstract support for AST from CST from
 * tokens from text parsing and analysis to support the AST and CST classes
 * appropriate to a particular language.
 */
public abstract class AbstractParser {

	private final BasicEnvironment environment;

	private int defaultRepairCount = 0;

	private AbstractLexer lexer;

	public AbstractParser(BasicEnvironment environment) {
		this.environment = environment;
		environment.setParser(this);
	}

	public AbstractParser(AbstractLexer lexer) {
		this.lexer = lexer;
		this.environment = lexer.getEnvironment();
		environment.setParser(this);
	}

	/**
	 * Returns a single line string representation of the input chars for the
	 * given range.
	 * 
	 * @param left
	 *            left most char index
	 * @param right
	 *            right most char index
	 * @return a single line string representation of the input chars
	 */
	public String computeInputString(int left, int right) {
		StringBuffer result = new StringBuffer(right - left + 1);
		char[] chars = getIPrsStream().getInputChars();
		if (chars.length > 0) {
			for (int i = left; i <= right; i++) {
				if (chars[i] == '\t') {
					result.append(' ');
				} else if (chars[i] == '\n' || chars[i] == '\r'
					|| chars[i] == '\f') {
					if (i > 0) {
						if (!Character.isWhitespace(chars[i - 1])) {
							result.append(' ');
						}
					}
				} else {
					result.append(chars[i]);
				}

			}
		}
		return result.toString();
	}

	public BasicEnvironment getEnvironment() {
		return environment;
	}

	/**
	 * @since 1.3
	 */
	public int getDefaultRepairCount() {
		return defaultRepairCount;
	}

	public AbstractLexer getLexer() {
		return lexer;
	}

	/**
	 * @deprecated clients should invoke {@link #parser()}
	 */
	@Deprecated
	public CSTNode parseTokensToCST() {
		return parser(null, defaultRepairCount);
	}
	
	/**
	 * @param lexStream
     * @deprecated To set/reset the lex stream use {@link #reset(ILexStream)}
	 */	
	@Deprecated
	public void resetLexStream(LexStream lexStream) {
		getIPrsStream().resetLexStream(lexStream);
	}


	
	/**
	 * Sets the number of repairs to be performed by a parser capable of
	 * performing repairs (the BacktrackingParser) unless overridden on the
	 * specific parser invocation method.
	 * 
     * When repair count is 0, no Error token recoveries occur.
     * When repair count is > 0, it limits the number of Error token recoveries.
     * When repair count is < 0, the number of error token recoveries is unlimited.
     * 
	 * @param defaultRepairCount
	 *            the default repair count
	 * 
	 * @since 1.3
	 */
	public void setDefaultRepairCount(int defaultRepairCount) {
		this.defaultRepairCount = defaultRepairCount;
	}
	
	/**
     * 
	 * @param lexer
     * @deprecated To set/reset the lex stream use {@link #reset(ILexStream)}
	 */
	@Deprecated
	public void setLexStream(AbstractLexer lexer) {
		this.lexer = lexer;
		getIPrsStream().resetLexStream(lexer.getILexStream());
	}

	/**
	 * Sets the start and end offsets of the given CSTNode to the
	 * start and end offsets of the given IToken
	 * 
	 * @param cstNode
	 *            CSTNode to set offsets
	 * @param startEnd
	 *            IToken to retrieve offsets from
	 */
	protected void setOffsets(CSTNode cstNode, IToken startEnd) {
		cstNode.setStartToken(startEnd);
		cstNode.setEndToken(startEnd);
		int startOffset = startEnd.getStartOffset();
		int endOffset = startEnd.getEndOffset();
		if (endOffset < startOffset) {
			cstNode.setStartOffset(endOffset);
			cstNode.setEndOffset(startOffset);
		} else {
			cstNode.setStartOffset(startOffset);
			cstNode.setEndOffset(endOffset);
		}
	}

	/**
	 * Sets the start and end offsets of the given CSTNode to the
	 * start and end offsets of the 2nd given CSTNode
	 * 
	 * @param cstNode
	 *            CSTNode to set offsets
	 * @param startEnd
	 *            CSTNode to retrieve offsets from
	 */
	protected void setOffsets(CSTNode cstNode, CSTNode startEnd) {
		cstNode.setStartToken(startEnd.getStartToken());
		cstNode.setEndToken(startEnd.getEndToken());
		cstNode.setStartOffset(startEnd.getStartOffset());
		cstNode.setEndOffset(startEnd.getEndOffset());
	}

	/**
	 * Sets the start and end offsets of the given CSTNode to the
	 * start offset of the 2nd given CSTNode and the end offset of
	 * the 3rd given CSTNode
	 * 
	 * @param cstNode
	 *            CSTNode to set offsets
	 * @param start
	 *            CSTNode to retrieve start offset from
	 * @param end
	 *            CSTNode to retrieve end offset from
	 */
	protected void setOffsets(CSTNode cstNode, CSTNode start, CSTNode end) {
		cstNode.setStartToken(start.getStartToken());
		cstNode.setEndToken(end.getEndToken());
		cstNode.setStartOffset(start.getStartOffset());
		cstNode.setEndOffset(end.getEndOffset());
	}

	/**
	 * Sets the start and end offsets of the given CSTNode to the
	 * start offset of the 2nd given CSTNode and the end offset of
	 * the given IToken
	 * 
	 * @param cstNode
	 *            CSTNode to set offsets
	 * @param start
	 *            CSTNode to retrieve start offset from
	 * @param end
	 *            IToken to retrieve end offset from
	 */
	protected void setOffsets(CSTNode cstNode, CSTNode start, IToken end) {
		cstNode.setStartToken(start.getStartToken());
		cstNode.setEndToken(end);
		cstNode.setStartOffset(start.getStartOffset());
		cstNode.setEndOffset(end.getEndOffset());
	}

	/**
	 * Sets the start and end offsets of the given CSTNode to the
	 * start offset of the given IToken and the end offset of the
	 * 2nd given CSTNode
	 * 
	 * @param cstNode
	 *            CSTNode to set offsets
	 * @param start
	 *            IToken to retrieve start offset from
	 * @param end
	 *            CSTNode to retrieve end offset from
	 */
	protected void setOffsets(CSTNode cstNode, IToken start, CSTNode end) {
		cstNode.setStartToken(start);
		cstNode.setEndToken(end.getEndToken());
		cstNode.setStartOffset(start.getStartOffset());
		cstNode.setEndOffset(end.getEndOffset());
	}

	/**
	 * Sets the start and end offsets of the given CSTNode to the
	 * start offset of the 1std given IToken and the end offset of
	 * the 2nd given IToken
	 * 
	 * @param cstNode
	 *            CSTNode to set offsets
	 * @param start
	 *            IToken to retrieve start offset from
	 * @param end
	 *            IToken to retrieve end offset from
	 */
	protected void setOffsets(CSTNode cstNode, IToken start, IToken end) {
		cstNode.setStartToken(start);
		cstNode.setEndToken(end);
		cstNode.setStartOffset(start.getStartOffset());
		cstNode.setEndOffset(end.getEndOffset());
	}

    /**
     * 

* Initializes a concrete-syntax node's start and end offsets from the * current token in the parser stream. *

* *

* Note: this method resided in the OCLEssential.g template since 1.2 * It has been incorporated in the abstract parser since 3.0 *

* * @param cstNode a concrete-syntax node * * @since 3.0 */ protected void setOffsets(CSTNode cstNode) { IToken firstToken = getRhsIToken(1); cstNode.setStartToken(firstToken); cstNode.setEndToken(firstToken); cstNode.setStartOffset(firstToken.getStartOffset()); cstNode.setEndOffset(firstToken.getEndOffset()-1); } /** * Removes the "s surrounding a quoted string, if any. * * @param quoted * a possibly quoted string * @return quoted without the surrounding quotes, or just * quoted verbatim if there were none * * @deprecated Use unDoubleQuote */ @Deprecated protected String unquote(String quoted) { String result = quoted; if ((result != null) && (result.length() > 1)) { int max = result.length() - 1; if ((result.charAt(0) == '"') && (quoted.charAt(max) == '"')) { result = result.substring(1, max); } // this is a regexp, so the backslash needs to be // re-escaped, thus "\\" is rendered in a Java // string literal as "\\\\" result = result.replaceAll("\\\\\"", "\""); //$NON-NLS-2$//$NON-NLS-1$ if (!result.equals(quoted)) { // check settings for using non-standard closure iterator ProblemHandler.Severity sev = ProblemHandler.Severity.OK; BasicEnvironment benv = getEnvironment(); if (benv != null) { sev = benv .getValue(ProblemOption.ELEMENT_NAME_QUOTE_ESCAPE); if ((sev != null) && (sev != ProblemHandler.Severity.OK)) { benv.problem(sev, ProblemHandler.Phase.PARSER, OCLMessages .bind(OCLMessages.NonStd_DQuote_Escape_, quoted), "unquote", //$NON-NLS-1$ null); } } } } return result; } /** * Removes any "s surrounding a quoted string and decodes any escape sequences * within it using {@link #decodeEscapeSequence}. *

* For MDT/OCL 1.3.0 compatibility, a problem is reported if double * quotes are present and the {@link #ProblemOption.ELEMENT_NAME_QUOTE_ESCAPE} * severity is not {@link #ProblemHandler.Severity.OK}. * * @param token containing string to be decoded * @return string value of token with escapes replaced * @since 3.0 * @deprecated double quotes form no part of the OCL specification */ @Deprecated protected String unDoubleQuote(IToken token) { if (token == null) { return null; } String quoted = token.toString(); if (quoted == null) { return null; } int quotedLength = quoted.length(); if ((quotedLength < 2) || (quoted.charAt(0) != '"') || (quoted.charAt(quotedLength-1) != '"')) { return quoted; } ProblemHandler.Severity sev = ProblemHandler.Severity.OK; BasicEnvironment benv = getEnvironment(); if (benv != null) { sev = benv .getValue(ProblemOption.ELEMENT_NAME_QUOTE_ESCAPE); if ((sev != null) && (sev != ProblemHandler.Severity.OK)) { benv.problem(sev, ProblemHandler.Phase.PARSER, OCLMessages .bind(OCLMessages.NonStd_DQuote_Escape_, quoted), "unquote", //$NON-NLS-1$ token); } } return decodeString(token, quoted.substring(1, quotedLength-1)); } /** * Removes any quotes surrounding the string value of a token * using {@link #unSingleQuote(String,int)} and optionally decode any * escape sequences within it using {@link #decodeEscapeSequence}. *

* For MDT/OCL 1.3.0 compatibility, escape sequences conversion can be disabled * by resetting {@link #ParsingOption.USE_BACKSLASH_ESCAPE_PROCESSING}. * * @param token containing string to be decoded * @return string value of token with escapes replaced * @since 3.0 */ protected String unSingleQuote(IToken token) { if (token == null) { return null; } String quoted = token.toString(); if (quoted == null) { return null; } String unquoted = unSingleQuote(quoted); if (unquoted == null) { return quoted; } Boolean backslashProcessingEnabled = null; BasicEnvironment benv = getEnvironment(); if (benv != null) { backslashProcessingEnabled = benv .getValue(ParsingOptions.USE_BACKSLASH_ESCAPE_PROCESSING); } if ((backslashProcessingEnabled == null) || !backslashProcessingEnabled) { return unquoted; } return decodeString(token, unquoted); } /** * Removes any quotes surrounding a quoted string. *

* The default implementation removes a leading/trailing single quote pair, * or an underscore-prefixed leading/trailing single quote pair. * * @param quoted string to be decoded * @return string content * @since 3.0 */ protected String unSingleQuote(String quoted) { int quotedLength = quoted.length(); if ((quotedLength >= 2) && (quoted.charAt(0) == '\'') && (quoted.charAt(quotedLength-1) == '\'')) { return quoted.substring(1, quotedLength-1); } else if ((quotedLength >= 3) && (quoted.charAt(0) == '_') && (quoted.charAt(1) == '\'') && (quoted.charAt(quotedLength-1) == '\'')) { return quoted.substring(2, quotedLength-1); } else { return null; } } /** * Returns a string with any escape sequences decoded by {@link #decodeEscapeSequence}. * * @param token the token from which the string originated * @param string to be decoded * @return string without any escapes replaced * @since 3.0 */ protected String decodeString(IToken token, String string) { if (string.indexOf('\\') < 0) { return string; } StringBuffer s = new StringBuffer(); StringCharacterIterator i = new StringCharacterIterator(string); for (char c = i.first(); c != StringCharacterIterator.DONE; c = i.next()) { if (c != '\\') { s.append(c); } else { int iStart = i.getIndex(); char ch = decodeEscapeSequence(i); if (ch != StringCharacterIterator.DONE) { s.append(ch); } else { BasicEnvironment benv = getEnvironment(); benv.problem(ProblemHandler.Severity.ERROR, ProblemHandler.Phase.PARSER, OCLMessages .bind(OCLMessages.InvalidEscapeSequence_ERROR, string.substring(iStart, i.getIndex())), "unquote", //$NON-NLS-1$ token); return string; } } } return s.toString(); } /** * Decodes an escape sequence in accordance with the Issue 14357 * proposals for Concrete Syntaxes. For compatibility octal escape sequences * are also supported. *

*

* * * * * * * * * * * * * * * * * * * *
\b \u0008: backspace BS
\t \u0009: horizontal tab HT
\n \u000a: line feed LF
\f \u000c: form feed FF
\r \u000d: carriage return CR
\" \u0022: double quote "
\' \u0027: single quote '
\\ \u005c: backslash \
\xhh \u00hh: hex byte
\uhhhh \uhhhh: hex code point
\7$ \u000o: octal byte
\77 \u00ff: octal byte
\37$ \u00ff: octal byte
\377 \u00ff: octal byte
h hex digit (0-9, a-f, A-F)
7 octal digit (0-7)
3 octal prefix digit (0-3)
$ end of string or non-octal digit next letter
*



























*

* @param i character iterator pointing at first character after the introducer * * @return the decoded character and i updated to point to the first character following the * escape sequence, or DONE and i unchanged if the escape sequence is unrecognised * @since 3.0 */ protected char decodeEscapeSequence(StringCharacterIterator i) { int savedIndex = i.getIndex(); char c = i.next(); switch (c) { case 'b' : return '\b'; case 'f' : return '\f'; case 't' : return '\t'; case 'n' : return '\n'; case 'r' : return '\r'; case '\\' : return '\\'; case '\'' : return '\''; case '"' : return '\"'; case '0' : case '1' : case '2' : case '3' : { int c1 = c - '0'; int c2 = decodeOctalCharacter(i); if (c2 < 0) { return (char)(c1); } int c3 = decodeOctalCharacter(i); if (c3 < 0) { return (char)((c1 << 3) + c2); } return (char)((c1 << 6) + (c2 << 3) + c3); } case '4' : case '5' : case '6' : case '7' : { int c1 = c - '0'; int c2 = decodeOctalCharacter(i); if (c2 < 0) { i.previous(); return (char)(c1); } return (char)((c1 << 3) + c2); } case 'x' : { int c1 = decodeHexCharacter(i.next()); int c2 = decodeHexCharacter(i.next()); if ((c1 < 0) || (c2 < 0)) { break; } return (char)((c1 << 4) + c2); } case 'u' : { int c1 = decodeHexCharacter(i.next()); int c2 = decodeHexCharacter(i.next()); int c3 = decodeHexCharacter(i.next()); int c4 = decodeHexCharacter(i.next()); if ((c1 < 0) || (c2 < 0) || (c3 < 0) || (c4 < 0)) { break; } return (char)((c1 << 12) + (c2 << 8) + (c3 << 4) + c4); } } i.setIndex(savedIndex); // Give derived augmentations the same starting point return StringCharacterIterator.DONE; } /** * Return the value of the next character if it is a hexadecimal character. * * @param c potentially hex character * @return the hex value or -1 if the next character is not a hex character * @since 3.0 */ protected int decodeHexCharacter(char c) { if (('0' <= c) && (c <= '9')) { return c - '0'; } if (('A' <= c) && (c <= 'F')) { return 10 + c - 'A'; } if (('a' <= c) && (c <= 'f')) { return 10 + c - 'a'; } return -1; } /** * Return the value of the next character if it is an octal character. * * @param c potentially octal character * @return the hex value or -1 if the next character is not a octal character * @since 3.0 */ protected int decodeOctalCharacter(StringCharacterIterator i) { char c = i.next(); if (c == StringCharacterIterator.DONE) { return -1; } if (('0' <= c) && (c <= '7')) { return c - '0'; } i.previous(); return -1; } // Some useful methods which will be implemented in the generated Parser abstract public String[] orderedTerminalSymbols(); /** * @return the number of different parser tokens * * @since 3.0 */ abstract public int numTokenKinds(); /** * @return the parser's {@link IPrsStream parseStream} * * @since 3.0 */ abstract public DerivedPrsStream getIPrsStream(); /** * Resets the parser's {@link ILexStream lexStream} * @param lexStream * * @since 3.0 */ abstract public void reset(ILexStream lexStream); /** * Runs the parser with the current {@link ILexStream lex} and {@link IPrsStream parse} streams * * @return the root {@link CSTNode} which results from the parsing process * @since 3.0 */ abstract public CSTNode parser(); /** * Runs the parser with the current {@link ILexStream lex} and {@link IPrsStream parse} streams * using the given {@link Monitor} * * @return the root {@link CSTNode} which results from the parsing process * @since 3.0 */ abstract public CSTNode parser(Monitor monitor); /** * Runs the parser with the current {@link ILexStream lex} and {@link IPrsStream parse} streams * using a given error_repair_count (useful for a backtracking parser) * * @return the root {@link CSTNode} which results from the parsing process * @since 3.0 */ abstract public CSTNode parser(int error_repair_count); /** * Runs the parser with the current {@link ILexStream lex} and {@link IPrsStream parse} streams * using the given {@link Monitor} and error_repair_count (useful for a backtracking parser) * * @return the root {@link CSTNode} which results from the parsing process * @since 3.0 */ abstract public CSTNode parser(Monitor monitor, int error_repair_count); /** * @return the {@link ParseTable} used by the parser * @since 3.0 */ abstract public ParseTable getParseTable(); /** * @since 3.0 */ abstract protected Object getRhsSym(int i); /** * @since 3.0 */ abstract protected int getRhsTokenIndex(int i); /** * @since 3.0 */ abstract protected IToken getRhsIToken(int i); /** * @since 3.0 */ abstract protected int getRhsFirstTokenIndex(int i); /** * @since 3.0 */ abstract protected IToken getRhsFirstIToken(int i); /** * @since 3.0 */ abstract protected int getRhsLastTokenIndex(int i); /** * @since 3.0 */ abstract protected IToken getRhsLastIToken(int i); /** * @since 3.0 */ abstract protected int getLeftSpan(); /** * @since 3.0 */ abstract protected IToken getLeftIToken(); /** * @since 3.0 */ abstract protected int getRightSpan(); /** * @since 3.0 */ abstract protected IToken getRightIToken(); /** * @since 3.0 */ abstract protected int getRhsErrorTokenIndex(int i); /** * @since 3.0 */ abstract protected ErrorToken getRhsErrorIToken(int i); /** * @since 3.0 */ abstract protected void setResult(Object object); }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy