grammars.antlr.tree_format.TreeFormat.g4 Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of Treepat Show documentation
Treepat is a language to recognise patterns in trees in a similar way as regular expressions recognize patterns in strings. Treepat includes analogous operators to regex union, concatenation, and closure, which are extended to the notion of trees.
There is a newer version: 2.0.0
Show newest version
// Define a grammar called TreeFormat.
grammar TreeFormat;

// Here starts the implementation for INDENT and DEDENT tokens.
tokens { INDENT, DEDENT }
@lexer::members {
  // A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
  private java.util.LinkedList tokens = new java.util.LinkedList<>();
  // The stack that keeps track of the indentation level.
  private java.util.Stack indents = new java.util.Stack<>();
  // The amount of opened braces, brackets and parenthesis.
  private int opened = 0;
  // The most recently produced token.
  private Token lastToken = null;
  @Override
  public void emit(Token t) {
    super.setToken(t);
    tokens.offer(t);
  }

  @Override
  public Token nextToken() {
    // Check if the end-of-file is ahead and there are still some DEDENTS expected.
    if (_input.LA(1) == EOF && !this.indents.isEmpty()) {
      // Remove any trailing EOF tokens from our buffer.
      for (int i = tokens.size() - 1; i >= 0; i--) {
        if (tokens.get(i).getType() == EOF) {
          tokens.remove(i);
        }
      }

      // First emit an extra line break that serves as the end of the statement.
      this.emit(commonToken(TreeFormatParser.NEWLINE, "\n"));

      // Now emit as much DEDENT tokens as needed.
      while (!indents.isEmpty()) {
        this.emit(createDedent());
        indents.pop();
      }

      // Put the EOF back on the token stream.
      this.emit(commonToken(TreeFormatParser.EOF, ""));
    }

    Token next = super.nextToken();

    if (next.getChannel() == Token.DEFAULT_CHANNEL) {
      // Keep track of the last token on the default channel.
      this.lastToken = next;
    }

    return tokens.isEmpty() ? next : tokens.poll();
  }

  private Token createDedent() {
    CommonToken dedent = commonToken(TreeFormatParser.DEDENT, "");
    dedent.setLine(this.lastToken.getLine());
    return dedent;
  }

  private CommonToken commonToken(int type, String text) {
    int stop = this.getCharIndex() - 1;
    int start = text.isEmpty() ? stop : stop - text.length() + 1;
    return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop);
  }

  // Calculates the indentation of the provided spaces, taking the
  // following rules into account:
  //
  // "Tabs are replaced (from left to right) by one to eight spaces
  //  such that the total number of characters up to and including
  //  the replacement is a multiple of eight [...]"
  //
  //  -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
  static int getIndentationCount(String spaces) {
    int count = 0;
    for (char ch : spaces.toCharArray()) {
      switch (ch) {
        case '\t':
          count += 8 - (count % 8);
          break;
        default:
          // A normal space char.
          count++;
      }
    }

    return count;
  }

  boolean atStartOfInput() {
    return super.getCharPositionInLine() == 0 && super.getLine() == 1;
  }
}

@parser::members{

}

subtree
    :   node NEWLINE* child
    |   node NEWLINE*
    ;

child
    :   NEWLINE INDENT sibling DEDENT
    ;

sibling
    :   subtree+
    ;

node
    :   name=information COLON tag=information
    ;

information
    :   STRING
    |   MARKLESS_STRING
    ;

COLON
    :   ':'
    ;

STRING
    :   '"' ~'"'* '"'
    ;

MARKLESS_STRING
    :   [A-Za-z0-9_]+
    ;

NEWLINE
    : ( {atStartOfInput()}?   SPACES
    | ( '\r'? '\n' | '\r' | '\f' ) SPACES? )
    {
       String newLine = getText().replaceAll("[^\r\n]+", "");
       String spaces = getText().replaceAll("[\r\n]+", "");
       int next = _input.LA(1);
       if (opened > 0 || next == '\r' || next == '\n' || next == '#') {
         // If we're inside a list or on a blank line, ignore all indents,
         // dedents and line breaks.
         skip();
       }
       else {
         emit(commonToken(NEWLINE, newLine));
         int indent = getIndentationCount(spaces);
         int previous = indents.isEmpty() ? 0 : indents.peek();
         if (indent == previous) {
           // skip indents of the same size as the present indent-size
           skip();
         }
         else if (indent > previous) {
           indents.push(indent);
           emit(commonToken(TreeFormatParser.INDENT, spaces));
         }
         else {
           // Possibly emit more than 1 DEDENT token.
           while(!indents.isEmpty() && indents.peek() > indent) {
             this.emit(createDedent());
             indents.pop();
           }
         }
       }
     }
    ;

WS
    : SPACES+ -> skip
    ;

fragment
SPACES
    :   [ \t]+
    ;