grammars.antlr.tree_format.TreeFormat.g4 Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of Treepat Show documentation
Show all versions of Treepat Show documentation
Treepat is a language to recognise patterns in trees in a similar way as regular expressions recognize patterns in strings. Treepat includes analogous operators to regex union, concatenation, and closure, which are extended to the notion of trees.
// Define a grammar called TreeFormat.
grammar TreeFormat;
// Here starts the implementation for INDENT and DEDENT tokens.
tokens { INDENT, DEDENT }
@lexer::members {
// A queue where extra tokens are pushed on (see the NEWLINE lexer rule).
private java.util.LinkedList tokens = new java.util.LinkedList<>();
// The stack that keeps track of the indentation level.
private java.util.Stack indents = new java.util.Stack<>();
// The amount of opened braces, brackets and parenthesis.
private int opened = 0;
// The most recently produced token.
private Token lastToken = null;
@Override
public void emit(Token t) {
super.setToken(t);
tokens.offer(t);
}
@Override
public Token nextToken() {
// Check if the end-of-file is ahead and there are still some DEDENTS expected.
if (_input.LA(1) == EOF && !this.indents.isEmpty()) {
// Remove any trailing EOF tokens from our buffer.
for (int i = tokens.size() - 1; i >= 0; i--) {
if (tokens.get(i).getType() == EOF) {
tokens.remove(i);
}
}
// First emit an extra line break that serves as the end of the statement.
this.emit(commonToken(TreeFormatParser.NEWLINE, "\n"));
// Now emit as much DEDENT tokens as needed.
while (!indents.isEmpty()) {
this.emit(createDedent());
indents.pop();
}
// Put the EOF back on the token stream.
this.emit(commonToken(TreeFormatParser.EOF, ""));
}
Token next = super.nextToken();
if (next.getChannel() == Token.DEFAULT_CHANNEL) {
// Keep track of the last token on the default channel.
this.lastToken = next;
}
return tokens.isEmpty() ? next : tokens.poll();
}
private Token createDedent() {
CommonToken dedent = commonToken(TreeFormatParser.DEDENT, "");
dedent.setLine(this.lastToken.getLine());
return dedent;
}
private CommonToken commonToken(int type, String text) {
int stop = this.getCharIndex() - 1;
int start = text.isEmpty() ? stop : stop - text.length() + 1;
return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop);
}
// Calculates the indentation of the provided spaces, taking the
// following rules into account:
//
// "Tabs are replaced (from left to right) by one to eight spaces
// such that the total number of characters up to and including
// the replacement is a multiple of eight [...]"
//
// -- https://docs.python.org/3.1/reference/lexical_analysis.html#indentation
static int getIndentationCount(String spaces) {
int count = 0;
for (char ch : spaces.toCharArray()) {
switch (ch) {
case '\t':
count += 8 - (count % 8);
break;
default:
// A normal space char.
count++;
}
}
return count;
}
boolean atStartOfInput() {
return super.getCharPositionInLine() == 0 && super.getLine() == 1;
}
}
@parser::members{
}
subtree
: node NEWLINE* child
| node NEWLINE*
;
child
: NEWLINE INDENT sibling DEDENT
;
sibling
: subtree+
;
node
: name=information COLON tag=information
;
information
: STRING
| MARKLESS_STRING
;
COLON
: ':'
;
STRING
: '"' ~'"'* '"'
;
MARKLESS_STRING
: [A-Za-z0-9_]+
;
NEWLINE
: ( {atStartOfInput()}? SPACES
| ( '\r'? '\n' | '\r' | '\f' ) SPACES? )
{
String newLine = getText().replaceAll("[^\r\n]+", "");
String spaces = getText().replaceAll("[\r\n]+", "");
int next = _input.LA(1);
if (opened > 0 || next == '\r' || next == '\n' || next == '#') {
// If we're inside a list or on a blank line, ignore all indents,
// dedents and line breaks.
skip();
}
else {
emit(commonToken(NEWLINE, newLine));
int indent = getIndentationCount(spaces);
int previous = indents.isEmpty() ? 0 : indents.peek();
if (indent == previous) {
// skip indents of the same size as the present indent-size
skip();
}
else if (indent > previous) {
indents.push(indent);
emit(commonToken(TreeFormatParser.INDENT, spaces));
}
else {
// Possibly emit more than 1 DEDENT token.
while(!indents.isEmpty() && indents.peek() > indent) {
this.emit(createDedent());
indents.pop();
}
}
}
}
;
WS
: SPACES+ -> skip
;
fragment
SPACES
: [ \t]+
;