All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.antlr.v4.parse.ANTLRParser.g Maven / Gradle / Ivy

There is a newer version: 4.9.0
Show newest version
/*
 * Copyright (c) 2012 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD-3-Clause license that
 * can be found in the LICENSE.txt file in the project root.
 */

/** The definitive ANTLR v3 grammar to parse ANTLR v4 grammars.
 *  The grammar builds ASTs that are sniffed by subsequent stages.
 */
parser grammar ANTLRParser;

options {
	// Target language is Java, which is the default but being specific
	// here as this grammar is also meant as a good example grammar for
	// for users.
	language      = Java;

	// The output of this grammar is going to be an AST upon which
	// we run a semantic checking phase, then the rest of the analysis
	// including final code generation.
	output        = AST;

	// The vocabulary (tokens and their int token types) we are using
	// for the parser. This is generated by the lexer. The vocab will be extended
	// to include the imaginary tokens below.
	tokenVocab    = ANTLRLexer;

	ASTLabelType  = GrammarAST;
}

// Imaginary Tokens
//
// Imaginary tokens do not exist as far as the lexer is concerned, and it cannot
// generate them. However we sometimes need additional 'tokens' to use as root
// nodes for the AST we are generating. The tokens section is where we
// specify any such tokens
tokens {
    RULE;
	PREC_RULE; // flip to this if we find that it's left-recursive
    RULES;
    RULEMODIFIERS;
    RULEACTIONS;
    BLOCK;
    OPTIONAL;
    CLOSURE;
    POSITIVE_CLOSURE;
    RANGE;
    SET;
    CHAR_RANGE;
    EPSILON;
    ALT;
    ALTLIST;
    ID;
    ARG;
    ARGLIST;
    RET;
	COMBINED;
    INITACTION;
    LABEL;                // $x used in rewrite rules
    TEMPLATE;
    WILDCARD;
    // A generic node indicating a list of something when we don't
    // really need to distinguish what we have a list of as the AST
    // will 'kinow' by context.
    //
    LIST;
    ELEMENT_OPTIONS;      // TOKEN
    RESULT;

    // lexer action stuff
    LEXER_ALT_ACTION;
    LEXER_ACTION_CALL; // ID(foo)
}

// Include the copyright in this source and also the generated source
//
@header {
/*
 * Copyright (c) 2012 The ANTLR Project. All rights reserved.
 * Use of this file is governed by the BSD-3-Clause license that
 * can be found in the LICENSE.txt file in the project root.
 */
package org.antlr.v4.parse;

import org.antlr.v4.tool.*;
import org.antlr.v4.tool.ast.*;

import java.util.ArrayDeque;
import java.util.Deque;
}

@members {
Deque paraphrases = new ArrayDeque();
public void grammarError(ErrorType etype, org.antlr.runtime.Token token, Object... args) { }
}

// The main entry point for parsing a V3 grammar from top to toe. This is
// the method call from whence to obtain the AST for the parse.
//
grammarSpec
@after {
GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS);
if ( options!=null ) {
	Grammar.setNodeOptions($tree, options);
}
}
    : // First we should see the type and name of the grammar file that
      // we are about to parse.
      //
      grammarType id SEMI

      // There now follows zero or more declaration sections that should
      // be given to us before the rules are declared
      //
// A number of things can be declared/stated before the grammar rules
// 'proper' are parsed. These include grammar imports (delegate), grammar
// options, imaginary token declarations, global scope declarations,
// and actions such as @header. In this rule we allow any number of
// these constructs in any order so that the grammar author is not
// constrained by some arbitrary order of declarations that nobody
// can remember. In the next phase of the parse, we verify that these
// constructs are valid, not repeated and so on.
      sync ( prequelConstruct sync )*

	  // We should now see at least one ANTLR EBNF style rule
	  // declaration. If the rules are missing we will let the
	  // semantic verification phase tell the user about it.
	  //
	  rules

	  modeSpec*

      // And we force ANTLR to process everything it finds in the input
      // stream by specifying hte need to match End Of File before the
      // parse is complete.
      //
      EOF

      // Having parsed everything in the file and accumulated the relevant
      // subtrees, we can now rewrite everything into the main AST form
      // that our tree walkers are expecting.
      //

      -> ^(grammarType       // The grammar type is our root AST node
             id              // We need to identify the grammar of course
             prequelConstruct* // The set of declarations we accumulated
             rules           // And of course, we need the set of rules we discovered
             modeSpec*
         )
	;

grammarType
@after {
	if ( $tg!=null ) throw new v3TreeGrammarException(tg);
	if ( $t!=null ) ((GrammarRootAST)$tree).grammarType = $t.type;
	else ((GrammarRootAST)$tree).grammarType=COMBINED;
}
    :	(	t=LEXER g=GRAMMAR  -> GRAMMAR[$g, "LEXER_GRAMMAR", getTokenStream()]
		| // A standalone parser specification
		  	t=PARSER g=GRAMMAR -> GRAMMAR[$g, "PARSER_GRAMMAR", getTokenStream()]

		// A combined lexer and parser specification
		| 	g=GRAMMAR          -> GRAMMAR[$g, "COMBINED_GRAMMAR", getTokenStream()]
		|   tg=TREE_GRAMMAR

		)
    ;

// This is the list of all constructs that can be declared before
// the set of rules that compose the grammar, and is invoked 0..n
// times by the grammarPrequel rule.
prequelConstruct
	: // A list of options that affect analysis and/or code generation
	  optionsSpec

    | // A list of grammars to which this grammar will delegate certain
      // parts of the parsing sequence - a set of imported grammars
      delegateGrammars

    | // The declaration of any token types we need that are not already
      // specified by a preceeding grammar, such as when a parser declares
      // imaginary tokens with which to construct the AST, or a rewriting
      // tree parser adds further imaginary tokens to ones defined in a prior
      // {tree} parser.
      tokensSpec

	| // A list of custom channels used by the grammar
	  channelsSpec

    | // A declaration of language target implemented constructs. All such
      // action sections start with '@' and are given to the language target's
      // StringTemplate group. For instance @parser::header and @lexer::header
      // are gathered here.
      action
    ;

// A list of options that affect analysis and/or code generation
optionsSpec
	:	OPTIONS (option SEMI)* RBRACE -> ^(OPTIONS[$OPTIONS, "OPTIONS"] option*)
    ;

option
    :   id ASSIGN^ optionValue
    ;

// ------------
// Option Value
//
// The actual value of an option - Doh!
//
optionValue
    : // If the option value is a single word that conforms to the
      // lexical rules of token or rule names, then the user may skip quotes
      // and so on. Many option values meet this description
      qid
    | STRING_LITERAL
	| ACTION
    | INT
    ;

// A list of grammars to which this grammar will delegate certain
// parts of the parsing sequence - a set of imported grammars
delegateGrammars
	: IMPORT delegateGrammar (COMMA delegateGrammar)* SEMI -> ^(IMPORT delegateGrammar+)
	;

// A possibly named grammar file that should be imported to this gramamr
// and delgated to for the rules it specifies
delegateGrammar
    :   id ASSIGN^ id
    |   id
    ;

tokensSpec
	: TOKENS_SPEC id (COMMA id)* COMMA? RBRACE -> ^(TOKENS_SPEC id+)
    | TOKENS_SPEC RBRACE ->
    | TOKENS_SPEC^ v3tokenSpec+ RBRACE!
      {grammarError(ErrorType.V3_TOKENS_SYNTAX, $TOKENS_SPEC);}
	;

v3tokenSpec
	:	id
		(	ASSIGN lit=STRING_LITERAL
            {
            grammarError(ErrorType.V3_ASSIGN_IN_TOKENS, $id.start,
                         $id.text, $lit.getText());
            }
						            	-> id // ignore assignment
		|								-> id
		)
		SEMI
	;

channelsSpec
	:	CHANNELS^ (id (COMMA! id)* COMMA?)? RBRACE!
	;

// A declaration of a language target specifc section,
// such as @header, @includes and so on. We do not verify these
// sections, they are just passed on to the language target.
/** Match stuff like @parser::members {int i;} */
action
	:	AT (actionScopeName COLONCOLON)? id ACTION -> ^(AT actionScopeName? id ACTION)
	;

/** Sometimes the scope names will collide with keywords; allow them as
 *  ids for action scopes.
 */
actionScopeName
	:	id
	|	LEXER	-> ID[$LEXER]
    |   PARSER	-> ID[$PARSER]
	;

modeSpec
    :	MODE id SEMI sync (lexerRule sync)*  -> ^(MODE id lexerRule*)
    ;

rules
    :	sync (rule sync)*
      // Rewrite with an enclosing node as this is good for counting
      // the number of rules and an easy marker for the walker to detect
      // that there are no rules.
      ->^(RULES rule*)
    ;

sync
@init {
	BitSet followSet = computeErrorRecoverySet();
	if ( input.LA(1)!=Token.EOF && !followSet.member(input.LA(1)) ) {
		reportError(new NoViableAltException("",0,0,input));
       	beginResync();
       	consumeUntil(input, followSet);
       	endResync();
	}
}	:
	;

rule:	parserRule
	|	lexerRule
	;

// The specification of an EBNF rule in ANTLR style, with all the
// rule level parameters, declarations, actions, rewrite specs and so
// on.
//
// Note that here we allow any number of rule declaration sections (such
// as scope, returns, etc) in any order and we let the upcoming semantic
// verification of the AST determine if things are repeated or if a
// particular functional element is not valid in the context of the
// grammar type, such as using returns in lexer rules and so on.
parserRule
@init { paraphrases.push("matching a rule"); }
@after {
	paraphrases.pop();
	GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS);
	if ( options!=null ) {
		Grammar.setNodeOptions($tree, options);
	}
}
    : // Start with the rule name. Here we do not distinguish between
	  // parser or lexer rules, the semantic verification phase will
	  // reject any rules that make no sense, such as lexer rules in
	  // a pure parser or tree parser.
	  RULE_REF

	  // Immediately following the rulename, there may be a specification
	  // of input parameters for the rule. We do not do anything with the
	  // parameters here except gather them for future phases such as
	  // semantic verifcation, type assignment etc. We require that
	  // the input parameters are the next syntactically significant element
	  // following the rule id.
	  ARG_ACTION?

	  ruleReturns?

	  throwsSpec?

	  localsSpec?

	  // Now, before the rule specification itself, which is introduced
	  // with a COLON, we may have zero or more configuration sections.
	  // As usual we just accept anything that is syntactically valid for
	  // one form of the rule or another and let the semantic verification
	  // phase throw out anything that is invalid.
// At the rule level, a programmer may specify a number of sections, such
// as scope declarations, rule return elements, @ sections (which may be
// language target specific) and so on. We allow any number of these in any
// order here and as usual rely onthe semantic verification phase to reject
// anything invalid using its addinotal context information. Here we are
// context free and just accept anything that is a syntactically correct
// construct.
//
      rulePrequels

      COLON

      // The rule is, at the top level, just a list of alts, with
      // finer grained structure defined within the alts.
      ruleBlock

      SEMI

      exceptionGroup

      -> ^( RULE RULE_REF ARG_ACTION?
      		ruleReturns? throwsSpec? localsSpec? rulePrequels? ruleBlock exceptionGroup*
      	  )
    ;

// Many language targets support exceptions and the rule will
// generally be able to throw the language target equivalent
// of a recognition exception. The grammar programmar can
// specify a list of exceptions to catch or a generic catch all
// and the target language code generation template is
// responsible for generating code that makes sense.
exceptionGroup
    : exceptionHandler* finallyClause?
    ;

// Specifies a handler for a particular type of exception
// thrown by a rule
exceptionHandler
	: CATCH ARG_ACTION ACTION -> ^(CATCH ARG_ACTION ACTION)
	;

finallyClause
	: FINALLY ACTION -> ^(FINALLY ACTION)
	;

rulePrequels
@init { paraphrases.push("matching rule preamble"); }
@after { paraphrases.pop(); }
	:	sync (rulePrequel sync)* -> rulePrequel*
	;

// An individual rule level configuration as referenced by the ruleActions
// rule above.
//
rulePrequel
    : optionsSpec
    | ruleAction
    ;

// A rule can return elements that it constructs as it executes.
// The return values are specified in a 'returns' prequel element,
// which contains COMMA separated declarations, where the declaration
// is target language specific. Here we see the returns declaration
// as a single lexical action element, to be processed later.
//
ruleReturns
	: RETURNS^ ARG_ACTION
	;

// --------------
// Exception spec
//
// Some target languages, such as Java and C# support exceptions
// and they are specified as a prequel element for each rule that
// wishes to throw its own exception type. Note that the name of the
// exception is just a single word, so the header section of the grammar
// must specify the correct import statements (or language equivalent).
// Target languages that do not support exceptions just safely ignore
// them.
//
throwsSpec
    : THROWS qid (COMMA qid)* -> ^(THROWS qid+)
    ;

// locals [Cat x, float g]
localsSpec : LOCALS^ ARG_ACTION ;

// @ Sections are generally target language specific things
// such as local variable declarations, code to run before the
// rule starts and so on. Fir instance most targets support the
// @init {} section where declarations and code can be placed
// to run before the rule is entered. The C target also has
// an @declarations {} section, where local variables are declared
// in order that the generated code is C89 copmliant.
//
/** Match stuff like @init {int i;} */
ruleAction
	:	AT id ACTION -> ^(AT id ACTION)
	;

// A set of alts, rewritten as a BLOCK for generic processing
// in tree walkers. Used by the rule 'rule' so that the list of
// alts for a rule appears as a BLOCK containing the alts and
// can be processed by the generic BLOCK rule. Note that we
// use a separate rule so that the BLOCK node has start and stop
// boundaries set correctly by rule post processing of rewrites.
ruleBlock
@init {Token colon = input.LT(-1);}
    :	ruleAltList -> ^(BLOCK[colon,"BLOCK"] ruleAltList)
    ;
    catch [ResyncToEndOfRuleBlock e] {
    	// just resyncing; ignore error
		retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), null);
    }

ruleAltList
	:	labeledAlt (OR labeledAlt)* -> labeledAlt+
	;

labeledAlt
	:	alternative
		(	POUND! id! {((AltAST)$alternative.tree).altLabel=$id.tree;}
		)?
	;

lexerRule
@init { paraphrases.push("matching a lexer rule"); }
@after {
	paraphrases.pop();
}
    : FRAGMENT?
	  TOKEN_REF COLON lexerRuleBlock SEMI
      -> ^( RULE TOKEN_REF
      		^(RULEMODIFIERS FRAGMENT)? lexerRuleBlock
      	  )
	;

lexerRuleBlock
@init {Token colon = input.LT(-1);}
    :	lexerAltList -> ^(BLOCK[colon,"BLOCK"] lexerAltList)
    ;
    catch [ResyncToEndOfRuleBlock e] {
    	// just resyncing; ignore error
		retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), null);
    }

lexerAltList
	:	lexerAlt (OR lexerAlt)* -> lexerAlt+
	;

lexerAlt
	:	lexerElements
		(	lexerCommands	-> ^(LEXER_ALT_ACTION lexerElements lexerCommands)
		|					-> lexerElements
		)
	;

lexerElements
    :	lexerElement+	-> ^(ALT lexerElement+)
	|					-> ^(ALT EPSILON) // empty alt
    ;

lexerElement
@init {
	paraphrases.push("looking for lexer rule element");
	int m = input.mark();
}
@after { paraphrases.pop(); }
	:	labeledLexerElement
		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK[$labeledLexerElement.start,"BLOCK"] ^(ALT labeledLexerElement) ) )
		|				-> labeledLexerElement
		)
	|	lexerAtom
		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK[$lexerAtom.start,"BLOCK"] ^(ALT lexerAtom) ) )
		|				-> lexerAtom
		)
	|	lexerBlock
		(	ebnfSuffix	-> ^(ebnfSuffix lexerBlock)
		|				-> lexerBlock
		)
	|	actionElement // actions only allowed at end of outer alt actually,
					  // but preds can be anywhere
	;
    catch [RecognitionException re] {
    	retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), re);
    	int ttype = input.get(input.range()).getType(); // seems to be next token
	    // look for anything that really belongs at the start of the rule minus the initial ID
    	if ( ttype==COLON || ttype==RETURNS || ttype==CATCH || ttype==FINALLY || ttype==AT || ttype==EOF ) {
			RecognitionException missingSemi =
				new v4ParserException("unterminated rule (missing ';') detected at '"+
									  input.LT(1).getText()+" "+input.LT(2).getText()+"'", input);
			reportError(missingSemi);
			if ( ttype==EOF ) {
				input.seek(input.index()+1);
			}
			else if ( ttype==CATCH || ttype==FINALLY ) {
				input.seek(input.range()); // ignore what's before rule trailer stuff
			}
			else if ( ttype==RETURNS || ttype==AT ) { // scan back looking for ID of rule header
				int p = input.index();
				Token t = input.get(p);
				while ( t.getType()!=RULE_REF && t.getType()!=TOKEN_REF ) {
					p--;
					t = input.get(p);
				}
				input.seek(p);
			}
			throw new ResyncToEndOfRuleBlock(); // make sure it goes back to rule block level to recover
		}
        reportError(re);
        recover(input,re);
	}

labeledLexerElement
	:	id (ass=ASSIGN|ass=PLUS_ASSIGN)
		(	lexerAtom	-> ^($ass id lexerAtom)
		|	lexerBlock	-> ^($ass id lexerBlock)
		)
	;


lexerBlock
@after {
GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS);
if ( options!=null ) {
	Grammar.setNodeOptions($tree, options);
}
}
 	:	LPAREN
        ( optionsSpec COLON )?
        lexerAltList
        RPAREN
      -> ^(BLOCK[$LPAREN,"BLOCK"] optionsSpec? lexerAltList )
    ;

// channel=HIDDEN, skip, more, mode(INSIDE), push(INSIDE), pop
lexerCommands
	:	RARROW lexerCommand (COMMA lexerCommand)* -> lexerCommand+
	;

lexerCommand
	:	lexerCommandName LPAREN lexerCommandExpr RPAREN -> ^(LEXER_ACTION_CALL lexerCommandName lexerCommandExpr)
	|	lexerCommandName
	;

lexerCommandExpr
	:	id
	|	INT
	;

lexerCommandName
        :       id
        |       MODE    ->ID[$MODE]
        ;

altList
    :	alternative (OR alternative)* -> alternative+
    ;

// An individual alt with an optional alt option like 
alternative
@init { paraphrases.push("matching alternative"); }
@after {
    paraphrases.pop();
    Grammar.setNodeOptions($tree, $o.tree);
}
	:	o=elementOptions?
		(	e+=element+                     -> ^(ALT elementOptions? $e+)
		|                                   -> ^(ALT elementOptions? EPSILON) // empty alt
		)
	;

element
@init {
	paraphrases.push("looking for rule element");
	int m = input.mark();
}
@after { paraphrases.pop(); }
	:	labeledElement
		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK[$labeledElement.start,"BLOCK"] ^(ALT labeledElement ) ))
		|				-> labeledElement
		)
	|	atom
		(	ebnfSuffix	-> ^( ebnfSuffix ^(BLOCK[$atom.start,"BLOCK"] ^(ALT atom) ) )
		|				-> atom
		)
	|	ebnf
	|	actionElement
	;
    catch [RecognitionException re] {
    	retval.tree = (GrammarAST)adaptor.errorNode(input, retval.start, input.LT(-1), re);
    	int ttype = input.get(input.range()).getType();
	    // look for anything that really belongs at the start of the rule minus the initial ID
    	if ( ttype==COLON || ttype==RETURNS || ttype==CATCH || ttype==FINALLY || ttype==AT ) {
			RecognitionException missingSemi =
				new v4ParserException("unterminated rule (missing ';') detected at '"+
									  input.LT(1).getText()+" "+input.LT(2).getText()+"'", input);
			reportError(missingSemi);
			if ( ttype==CATCH || ttype==FINALLY ) {
				input.seek(input.range()); // ignore what's before rule trailer stuff
			}
			if ( ttype==RETURNS || ttype==AT ) { // scan back looking for ID of rule header
				int p = input.index();
				Token t = input.get(p);
				while ( t.getType()!=RULE_REF && t.getType()!=TOKEN_REF ) {
					p--;
					t = input.get(p);
				}
				input.seek(p);
			}
			throw new ResyncToEndOfRuleBlock(); // make sure it goes back to rule block level to recover
		}
        reportError(re);
        recover(input,re);
	}

actionElement
@after {
	GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS);
	if ( options!=null ) {
		Grammar.setNodeOptions($tree, options);
	}
}
	:	ACTION
	|   ACTION elementOptions	-> ^(ACTION elementOptions)
	|   SEMPRED
	|   SEMPRED elementOptions	-> ^(SEMPRED elementOptions)
	;

labeledElement
	:	id (ass=ASSIGN|ass=PLUS_ASSIGN)
		(	atom						-> ^($ass id atom)
		|	block						-> ^($ass id block)
		)
	;

// A block of gramamr structure optionally followed by standard EBNF
// notation, or ANTLR specific notation. I.E. ? + ^ and so on
ebnf
    : block
      // And now we see if we have any of the optional suffixs and rewrite
      // the AST for this rule accordingly
      (	blockSuffix	-> ^(blockSuffix block)
      |				-> block
      )
    ;

// The standard EBNF suffixes with additional components that make
// sense only to ANTLR, in the context of a grammar block.
blockSuffix
    : ebnfSuffix // Standard EBNF
    ;

ebnfSuffix
	:	QUESTION nongreedy=QUESTION?	-> OPTIONAL[$start, $nongreedy]
  	|	STAR nongreedy=QUESTION?		-> CLOSURE[$start, $nongreedy]
   	|	PLUS nongreedy=QUESTION?		-> POSITIVE_CLOSURE[$start, $nongreedy]
	;

lexerAtom
	:	range
	|	terminal
    |   RULE_REF
    |	notSet
    |	wildcard
    |	LEXER_CHAR_SET
	;

atom
	:	// Qualified reference delegate.rule. This must be
	    // lexically contiguous (no spaces either side of the DOT)
	    // otherwise it is two references with a wildcard in between
	    // and not a qualified reference.
	    /*
	    {
	    	input.LT(1).getCharPositionInLine()+input.LT(1).getText().length()==
	        input.LT(2).getCharPositionInLine() &&
	        input.LT(2).getCharPositionInLine()+1==input.LT(3).getCharPositionInLine()
	    }?
	    id DOT ruleref -> ^(DOT id ruleref)

    |
    	*/
        range  // Range x..y - only valid in lexers
	|	terminal
    |   ruleref
    |	notSet
    |	wildcard
    ;
    catch [RecognitionException re] { throw re; } // pass upwards to element

wildcard
@after {
	GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS);
	if ( options!=null ) {
		Grammar.setNodeOptions($tree, options);
	}
}
	:	// Wildcard '.' means any character in a lexer, any
		// token in parser and any node or subtree in a tree parser
		// Because the terminal rule is allowed to be the node
		// specification for the start of a tree rule, we must
		// later check that wildcard was not used for that.
	    DOT elementOptions?
	    -> ^(WILDCARD[$DOT] elementOptions?)
	;

// --------------------
// Inverted element set
//
// A set of characters (in a lexer) or terminal tokens, if a parser,
// that are then used to create the inverse set of them.
notSet
    : NOT setElement	-> ^(NOT[$NOT] ^(SET[$setElement.start,"SET"] setElement))
    | NOT blockSet		-> ^(NOT[$NOT] blockSet)
    ;

blockSet
@init {
	Token t;
	boolean ebnf = false;
}
    :	LPAREN setElement (OR setElement)* RPAREN
		-> ^(SET[$LPAREN,"SET"] setElement+ )
    ;

setElement
	:	TOKEN_REF^ elementOptions?
	|	STRING_LITERAL^ elementOptions?
	|	range
    |   LEXER_CHAR_SET
	;

// -------------
// Grammar Block
//
// Anywhere where an element is valid, the grammar may start a new block
// of alts by surrounding that block with ( ). A new block may also have a set
// of options, which apply only to that block.
//
block
@after {
GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.OPTIONS);
if ( options!=null ) {
	Grammar.setNodeOptions($tree, options);
}
}
 	:	LPAREN
        ( optionsSpec? ra+=ruleAction* COLON )?
        altList
		RPAREN
      -> ^(BLOCK[$LPAREN,"BLOCK"] optionsSpec? $ra* altList )
    ;

// ----------------
// Parser rule ref
//
// Reference to a parser rule with optional arguments and optional
// directive to become the root node or ignore the tree produced
//
ruleref
@after {
GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS);
if ( options!=null ) {
	Grammar.setNodeOptions($tree, options);
}
}
    :	RULE_REF ARG_ACTION? elementOptions? -> ^(RULE_REF ARG_ACTION? elementOptions?)
    ;
    catch [RecognitionException re] { throw re; } // pass upwards to element

// ---------------
// Character Range
//
// Specifies a range of characters. Valid for lexer rules only, but
// we do not check that here, the tree walkers shoudl do that.
// Note also that the parser also allows through more than just
// character literals so that we can produce a much nicer semantic
// error about any abuse of the .. operator.
//
range
    : STRING_LITERAL RANGE^ STRING_LITERAL
    ;

terminal
@after {
GrammarAST options = (GrammarAST)$tree.getFirstChildWithType(ANTLRParser.ELEMENT_OPTIONS);
if ( options!=null ) {
	Grammar.setNodeOptions($tree, options);
}
}
	:   TOKEN_REF elementOptions?		-> ^(TOKEN_REF elementOptions?)
	|   STRING_LITERAL elementOptions?	-> ^(STRING_LITERAL elementOptions?)
	;

// Terminals may be adorned with certain options when
// reference in the grammar: TOK<,,,>
elementOptions
    :   LT (elementOption (COMMA elementOption)*)? GT
            -> ^(ELEMENT_OPTIONS[$LT,"ELEMENT_OPTIONS"] elementOption*)
    ;

// When used with elements we can specify what the tree node type can
// be and also assign settings of various options  (which we do not check here)
elementOption
    : // This format indicates the default element option
      qid
    | id ASSIGN^ optionValue
    ;

// The name of the grammar, and indeed some other grammar elements may
// come through to the parser looking like a rule reference or a token
// reference, hence this rule is used to pick up whichever it is and rewrite
// it as a generic ID token.
id
@init { paraphrases.push("looking for an identifier"); }
@after { paraphrases.pop(); }
    : RULE_REF  ->ID[$RULE_REF]
    | TOKEN_REF ->ID[$TOKEN_REF]
    ;

qid
@init { paraphrases.push("looking for a qualified identifier"); }
@after { paraphrases.pop(); }
	:	id (DOT id)* -> ID[$qid.start, $text]
	;

alternativeEntry : alternative EOF ; // allow gunit to call alternative and see EOF afterwards
elementEntry : element EOF ;
ruleEntry : rule EOF ;
blockEntry : block EOF ;




© 2015 - 2024 Weber Informatics LLC | Privacy Policy