All Downloads are FREE. Search and download functionalities are using the official Maven repository.

bparser.2.13.5.source-code.PreParser.scc Maven / Gradle / Ivy

The newest version!
Package de.be4.classicalb.core.preparser;

/*******************************************************************
 * Helpers                                                         *
 *******************************************************************/
Helpers
  triple_quote = 39 39 39;
  single_quote = 39;

  lf = 10;
  cr = 13;
  unicode_line_seperator = 8232;
  unicode_paragraph_seperator = 8233;

  letter = ['a' .. 'z'] | ['A' .. 'Z'];
  digit = ['0' .. '9'];

  // unicode_letter definition from java-1.7.sablecc by Etienne M. Gagnon, http://sablecc.org/java1.7/
  // but removed unicode lambda 0x03bb from [0x03a3..0x03ce]
  unicode_letter =
    [[[0x0041..0x005a] + [0x0061..0x007a]] + [[0x00aa..0x00aa] + [0x00b5..0x00b5]]] |
    [[[0x00ba..0x00ba] + [0x00c0..0x00d6]] + [[0x00d8..0x00f6] + [0x00f8..0x01f5]]] |
    [[[0x01fa..0x0217] + [0x0250..0x02a8]] + [[0x02b0..0x02b8] + [0x02bb..0x02c1]]] |
    [[[0x02d0..0x02d1] + [0x02e0..0x02e4]] + [[0x037a..0x037a] + [0x0386..0x0386]]] |
    [[[0x0388..0x038a] + [0x038c..0x038c]] + [[[0x038e..0x03a1] + [0x03a3..0x03ba]] + [0x03bc..0x03ce]]] |
    //[[[0x0388..0x038a] + [0x038c..0x038c]] + [[0x038e..0x03a1] + [0x03a3..0x03ce]]] | // removed unicode_lambda 0x03bb
    [[[0x03d0..0x03d6] + [0x03da..0x03da]] + [[0x03dc..0x03dc] + [0x03de..0x03de]]] |
    [[[0x03e0..0x03e0] + [0x03e2..0x03f3]] + [[0x0401..0x040c] + [0x040e..0x044f]]] |
    [[[0x0451..0x045c] + [0x045e..0x0481]] + [[0x0490..0x04c4] + [0x04c7..0x04c8]]] |
    [[[0x04cb..0x04cc] + [0x04d0..0x04eb]] + [[0x04ee..0x04f5] + [0x04f8..0x04f9]]] |
    [[[0x0531..0x0556] + [0x0559..0x0559]] + [[0x0561..0x0587] + [0x05d0..0x05ea]]] |
    [[[0x05f0..0x05f2] + [0x0621..0x063a]] + [[0x0640..0x064a] + [0x0671..0x06b7]]] |
    [[[0x06ba..0x06be] + [0x06c0..0x06ce]] + [[0x06d0..0x06d3] + [0x06d5..0x06d5]]] |
    [[[0x06e5..0x06e6] + [0x0905..0x0939]] + [[0x093d..0x093d] + [0x0958..0x0961]]] |
    [[[0x0985..0x098c] + [0x098f..0x0990]] + [[0x0993..0x09a8] + [0x09aa..0x09b0]]] |
    [[[0x09b2..0x09b2] + [0x09b6..0x09b9]] + [[0x09dc..0x09dd] + [0x09df..0x09e1]]] |
    [[[0x09f0..0x09f1] + [0x0a05..0x0a0a]] + [[0x0a0f..0x0a10] + [0x0a13..0x0a28]]] |
    [[[0x0a2a..0x0a30] + [0x0a32..0x0a33]] + [[0x0a35..0x0a36] + [0x0a38..0x0a39]]] |
    [[[0x0a59..0x0a5c] + [0x0a5e..0x0a5e]] + [[0x0a72..0x0a74] + [0x0a85..0x0a8b]]] |
    [[[0x0a8d..0x0a8d] + [0x0a8f..0x0a91]] + [[0x0a93..0x0aa8] + [0x0aaa..0x0ab0]]] |
    [[[0x0ab2..0x0ab3] + [0x0ab5..0x0ab9]] + [[0x0abd..0x0abd] + [0x0ae0..0x0ae0]]] |
    [[[0x0b05..0x0b0c] + [0x0b0f..0x0b10]] + [[0x0b13..0x0b28] + [0x0b2a..0x0b30]]] |
    [[[0x0b32..0x0b33] + [0x0b36..0x0b39]] + [[0x0b3d..0x0b3d] + [0x0b5c..0x0b5d]]] |
    [[[0x0b5f..0x0b61] + [0x0b85..0x0b8a]] + [[0x0b8e..0x0b90] + [0x0b92..0x0b95]]] |
    [[[0x0b99..0x0b9a] + [0x0b9c..0x0b9c]] + [[0x0b9e..0x0b9f] + [0x0ba3..0x0ba4]]] |
    [[[0x0ba8..0x0baa] + [0x0bae..0x0bb5]] + [[0x0bb7..0x0bb9] + [0x0c05..0x0c0c]]] |
    [[[0x0c0e..0x0c10] + [0x0c12..0x0c28]] + [[0x0c2a..0x0c33] + [0x0c35..0x0c39]]] |
    [[[0x0c60..0x0c61] + [0x0c85..0x0c8c]] + [[0x0c8e..0x0c90] + [0x0c92..0x0ca8]]] |
    [[[0x0caa..0x0cb3] + [0x0cb5..0x0cb9]] + [[0x0cde..0x0cde] + [0x0ce0..0x0ce1]]] |
    [[[0x0d05..0x0d0c] + [0x0d0e..0x0d10]] + [[0x0d12..0x0d28] + [0x0d2a..0x0d39]]] |
    [[[0x0d60..0x0d61] + [0x0e01..0x0e2e]] + [[0x0e30..0x0e30] + [0x0e32..0x0e33]]] |
    [[[0x0e40..0x0e46] + [0x0e81..0x0e82]] + [[0x0e84..0x0e84] + [0x0e87..0x0e88]]] |
    [[[0x0e8a..0x0e8a] + [0x0e8d..0x0e8d]] + [[0x0e94..0x0e97] + [0x0e99..0x0e9f]]] |
    [[[0x0ea1..0x0ea3] + [0x0ea5..0x0ea5]] + [[0x0ea7..0x0ea7] + [0x0eaa..0x0eab]]] |
    [[[0x0ead..0x0eae] + [0x0eb0..0x0eb0]] + [[0x0eb2..0x0eb3] + [0x0ebd..0x0ebd]]] |
    [[[0x0ec0..0x0ec4] + [0x0ec6..0x0ec6]] + [[0x0edc..0x0edd] + [0x0f40..0x0f47]]] |
    [[[0x0f49..0x0f69] + [0x10a0..0x10c5]] + [[0x10d0..0x10f6] + [0x1100..0x1159]]] |
    [[[0x115f..0x11a2] + [0x11a8..0x11f9]] + [[0x1e00..0x1e9b] + [0x1ea0..0x1ef9]]] |
    [[[0x1f00..0x1f15] + [0x1f18..0x1f1d]] + [[0x1f20..0x1f45] + [0x1f48..0x1f4d]]] |
    [[[0x1f50..0x1f57] + [0x1f59..0x1f59]] + [[0x1f5b..0x1f5b] + [0x1f5d..0x1f5d]]] |
    [[[0x1f5f..0x1f7d] + [0x1f80..0x1fb4]] + [[0x1fb6..0x1fbc] + [0x1fbe..0x1fbe]]] |
    [[[0x1fc2..0x1fc4] + [0x1fc6..0x1fcc]] + [[0x1fd0..0x1fd3] + [0x1fd6..0x1fdb]]] |
    [[[0x1fe0..0x1fec] + [0x1ff2..0x1ff4]] + [[0x1ff6..0x1ffc] + [0x207f..0x207f]]] |
    [[[0x2102..0x2102] + [0x2107..0x2107]] + [[0x210a..0x2113] + [0x2115..0x2115]]] |
    [[[0x2118..0x211d] + [0x2124..0x2124]] + [[0x2126..0x2126] + [0x2128..0x2128]]] |
    [[[0x212a..0x2131] + [0x2133..0x2138]] + [[0x3005..0x3005] + [0x3031..0x3035]]] |
    [[[0x3041..0x3094] + [0x309b..0x309e]] + [[0x30a1..0x30fa] + [0x30fc..0x30fe]]] |
    [[[0x3105..0x312c] + [0x3131..0x318e]] + [[0x4e00..0x9fa5] + [0xac00..0xd7a3]]] |
    [[[0xf900..0xfa2d] + [0xfb00..0xfb06]] + [[0xfb13..0xfb17] + [0xfb1f..0xfb28]]] |
    [[[0xfb2a..0xfb36] + [0xfb38..0xfb3c]] + [[0xfb3e..0xfb3e] + [0xfb40..0xfb41]]] |
    [[[0xfb43..0xfb44] + [0xfb46..0xfbb1]] + [[0xfbd3..0xfd3d] + [0xfd50..0xfd8f]]] |
    [[[0xfd92..0xfdc7] + [0xfdf0..0xfdfb]] + [[0xfe70..0xfe72] + [0xfe74..0xfe74]]] |
    [[[0xfe76..0xfefc] + [0xff21..0xff3a]] + [[0xff41..0xff5a] + [0xff66..0xffbe]]] |
    [[[0xffc2..0xffc7] + [0xffca..0xffcf]] + [[0xffd2..0xffd7] + [0xffda..0xffdc]]];

  unicode_lambda = 0x03bb | 0x1d706;
  unicode_subscripts = [0x2080..0x2089];
  unicode_prime = [0x2032 + [8216..8217]]; // ' primes; we could also add 8219

  all_chars = [0 .. 0xffff];
  line_break = [[[lf + cr] + unicode_line_seperator] + unicode_paragraph_seperator];
  layout_char = [[9 + 32] + 160];

  single_line_string_char = [all_chars - line_break];
  string_literal = '"' ([single_line_string_char - ['"' + '\']] | '\' single_line_string_char)* '"';
  // backquote to generate identifiers with special symbols or which clash with keywords
  quoted_identifier_literal = '`' ([single_line_string_char - ['`' + '\']] | '\' single_line_string_char)* '`';
  regular_identifier_literal = (unicode_letter) (unicode_letter | unicode_lambda | digit | '_')* (unicode_subscripts)* (unicode_prime)*;

  safe_to_skip = ',' | '{' | '}' | '-' | '>'  | '+' | ':' |
                 '<' | '.' | '|' | '(' | ')' | '[' | ']' | '%'; // safe to skip in normal mode


/*******************************************************************
 * States                                                          *
 *******************************************************************/
States
  start, // special initial state in which we accept #PREDICATE, ...
  normal,
  definitions,
  definitions_rhs,
  no_definitions,
  // Multiline constructs (block comments, multiline strings) have their own lexer states
  // to provide better errors when the closing delimiter is missing.
  // If e. g. block comments were parsed as a single token,
  // then a long unclosed block comment would cause a pushback buffer overflow:
  // the lexer first tries to lex the entire remaining text as a block comment,
  // but once EOF is reached,
  // it tries to backtrack all the way to the start of the comment.
  // This is not an issue for line comments,
  // because EOF is a valid line comment terminator.
  block_comment,
  multiline_string,
  multiline_template;

/*******************************************************************
 * Tokens                                                          *
 *******************************************************************/

Tokens
  {start->normal, normal, definitions, definitions_rhs} comment = '/*'; // switch to block_comment state is done in PreLexer.java
  {block_comment} comment_end = '*/'; // switch back to previous state is done in PreLexer.java
  {block_comment} comment_body = [all_chars - '*']*;
  {block_comment} star = '*';

  {start->normal, normal, definitions_rhs} multiline_string_start = triple_quote;
  {multiline_string} multiline_string_end = triple_quote; // switch back to previous state is done in PreLexer.java
  // One or two single quotes followed by a different character don't terminate the string.
  {multiline_string} multiline_string_content = ((single_quote single_quote?)? [all_chars - [single_quote + '\']] | '\' all_chars)*;

  {start->normal, normal, definitions_rhs} multiline_template_start = '```';
  {multiline_template} multiline_template_end = '```'; // switch back to previous state is done in PreLexer.java
  // One or two backquotes followed by a different character don't terminate the string.
  {multiline_template} multiline_template_content = (('`' '`'?)? [all_chars - ['`' + '\']] | '\' all_chars)*;

  {start -> normal, normal, definitions, definitions_rhs} line_comment = '//' [all_chars - line_break]* line_break?;

  {start -> definitions, normal -> definitions, definitions -> definitions} pre_parser_definitions = 'DEFINITIONS';
   // definitions -> definitions is an error, which will be caught later,
   // generating Clause 'DEFINITIONS' is used more than once error message

  {start -> normal, normal, definitions -> normal, definitions_rhs} other_clause_begin =
    'MACHINE' | 'MODEL' | 'SYSTEM' | 'RULES_MACHINE' |
    'ABSTRACT_CONSTANTS' | 'ABSTRACT_VARIABLES' | 'ASSERTIONS' | 'CONCRETE_CONSTANTS' |
    'CONCRETE_VARIABLES' | 'CONSTANTS' | 'CONSTRAINTS' | 'IMPLEMENTATION' |
    'IMPORTS' | 'INCLUDES' | 'INITIALISATION' | 'INITIALIZATION' | 'INVARIANT' | 'LOCAL_OPERATIONS' |
    'OPERATIONS' | 'EVENTS' | 'PROMOTES' | 'PROPERTIES' | 'REFINES' | 'REFINEMENT' | 'EXTENDS' |
    'FREETYPES' |
    'SEES' | 'SETS' | 'USES' | 'VALUES' | 'VARIABLES';

  {start -> normal, normal, definitions -> normal} end_machine = 'END';

  {start -> no_definitions} kw_prefix = '#EXPRESSION' | '#PREDICATE' | '#SUBSTITUTION';
  {no_definitions} no_def_something = all_chars*;

  {definitions} pre_parser_identifier = regular_identifier_literal | quoted_identifier_literal;
  {definitions, definitions_rhs} left_par = '(';
  {definitions, definitions_rhs} right_par = ')';
  {definitions, definitions_rhs} slash = '/';
  {definitions} comma = ',';

  {definitions -> definitions_rhs} begin_def_body = '==' | 0x225c; // Unicode DELTA EQUAL TO
  {definitions_rhs} rhs_body = [[[[[[all_chars - layout_char] - ';'] - '('] - ')'] - '/'] - '"'];
  {definitions_rhs} begin_nesting =
    'BEGIN' | 'PRE' | 'ASSERT' | 'CHOICE' | 'IF' | 'WITNESS' |
    'SELECT' | 'CASE' | 'ANY' | 'VAR' | 'WHILE' | 'LET';
  {definitions_rhs} end_nesting = 'END';

  // Lex identifiers as complete words to ignore keywords within identifiers.
  // This only needs letter and not unicode_letter, because none of the keywords contain Unicode letters.
  {definitions_rhs} rhs_identifier = letter (letter | digit | '_')*;

  {definitions, definitions_rhs} semicolon = ';'; // returning to state definitions is done by PreLexer

  {definitions, definitions_rhs} pre_parser_string = string_literal;

  {definitions, definitions_rhs} white_space = line_break | layout_char+;

  {start -> normal, normal} some_value = // could actually be renamed to some_identifier
    (letter (letter | digit | '_')*)
     // if we allow whitespace after identifiers, maximal munch rule means Lexer will miss DEFINITIONS token
   ;

  // we want to detect things like { "abc" |-> 10, "ef" |-> 20, ... } as a single chunk

  // other transition rules in normal mode we need to be careful about: line_comment
  // something could in principle match more than line_comment, but as we match only safe characters
  // we will not miss start of DEFINITIONS or '/' or '"' or ''' or ` (backquote) or machine clause
  // we ar thus safe to skip over any char that is not a letter, a / or a " or '
  {start -> normal, normal} something =
     //[[all_chars - '"'] - layout_char] // could be backslash !
     ( [all_chars - '"'] // could be backslash !
       | string_literal
       | quoted_identifier_literal
      )
      (layout_char |
       digit |
       safe_to_skip
       // string_literal |  // otherwise testStringLiteralNotClosedLongString fails with Pushback buffer overflow
       ) *;  // make token as large as possible to avoid creating too many TSomething objects


/*******************************************************************
 * Ignored Tokens                                                  *
 *******************************************************************/
Ignored Tokens
  white_space, other_clause_begin, end_machine,
  some_value, something,
  comment, comment_body, star, comment_end, line_comment,
  multiline_string_start, multiline_string_end, multiline_string_content,
  multiline_template_start, multiline_template_end, multiline_template_content;

/*******************************************************************
 * Productions                                                     *
 *******************************************************************/
Productions

parse_unit {-> pre_parse_unit} =
  {defs_clause} [def_clause]:def_clause {-> New pre_parse_unit([def_clause.pre_parser_definition])} |
  {something} kw_prefix no_def_something {-> New pre_parse_unit([])} |
  {nothing} {-> New pre_parse_unit([])};

def_clause {-> pre_parser_definition*} = pre_parser_definitions [list]:definition_list semicolon? {-> [list.pre_parser_definition]};

definition_list {-> pre_parser_definition*} =
  {single} [def]:definition {-> [def.pre_parser_definition]} |
  {multi} [rest]:definition_list semicolon [def]:definition {-> [rest.pre_parser_definition, def.pre_parser_definition]};

definition {-> pre_parser_definition} =
  [def_name]:pre_parser_identifier [parameters]:def_parameters? begin_def_body [rhs]:rhs_body {-> New pre_parser_definition(def_name, [parameters.pre_parser_identifier], rhs)} |
  {file} [filename]:pre_parser_string {-> New pre_parser_definition.file(filename)};

def_parameters {-> pre_parser_identifier*} = left_par [parameters]:identifier_list right_par {-> [parameters.pre_parser_identifier]};

identifier_list {-> pre_parser_identifier*} =
  {single} [parameter]:pre_parser_identifier {-> [parameter]} |
  {multi} [rest]:identifier_list comma [parameter]:pre_parser_identifier {-> [rest.pre_parser_identifier, parameter]};

/*******************************************************************
 * Abstract Syntax Tree                                            *
 *******************************************************************/
Abstract Syntax Tree

pre_parse_unit = pre_parser_definition*;

pre_parser_definition =
  [def_name]:pre_parser_identifier [parameters]:pre_parser_identifier* [rhs]:rhs_body |
  {file} [filename]:pre_parser_string;




© 2015 - 2024 Weber Informatics LLC | Privacy Policy