org.sfj.PegLegParser Maven / Gradle / Ivy
Show all versions of single-file-java Show documentation
/*
* Copyright 2020 C. Schanck
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.sfj;
import java.util.AbstractMap;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Supplier;
import static java.util.stream.Collectors.toList;
/**
* This class implements a complete PEG parser, a la https://en.wikipedia.org/wiki/Parsing_expression_grammar
*
I have long been a big fan of the Parboiled parser framework ( https://github.com/sirthias/parboiled/wiki ),
* especially for quick and dirty things. But I also always disliked the proxying/byte code manipulation
* in it. A looong while ago I looked at building one myself with just anon classes, around Java 6
* timeframe, but it was super clunky. For a while now I have wanted to take another swing at it
* using lambdas; it seemed like a way to do all of what Parboiled (and Rats!, etc) did without
* needing anything too exotic.
*
Turns out, yup, works pretty well. An approach like this will never be the quickest parser
* to run, I mean, there is no packrat processing, no memoization, etc. So it is not a speed
* demon. But the intention was to make it super expressive and a speed demon to write parsers.
*
See the PegLeParser.adoc file for an overview on how it works, and see the unit test
* class for a set of example grammars.
* @param Value stack type.
* @author cschanck
*/
public class PegLegParser implements Supplier> {
private Source source;
private String whiteSpace = " \t\r\n";
private String lineSep = System.lineSeparator();
private LinkedList frame = new LinkedList<>();
private Values values = new Values<>();
private RuleReturn lastReturn = null;
private RuleReturn lastSuccessfulReturn = null;
public SourcePosition farthestSuccessfulPos = new SourcePosition();
/**
* Random holder class for intra rule parser data manipulation. Used within sibling rules.
* @param
*/
static class Ref {
private final Supplier init;
private LinkedList stack = new LinkedList<>();
public Ref(V value) { this.init = () -> value; }
public Ref(Supplier init) { this.init = init == null ? () -> null : init; }
public V get() { return stack.peekFirst(); }
public void set(V value) { stack.set(0, value); }
public String toString() { return "Ref{" + get() + '}'; }
void enterRef() { stack.push(init.get()); }
void exitRef() { stack.pop(); }
}
/**
* A Rule. Core to parsing is defining your own rules.
* @param Value type
*/
@FunctionalInterface
interface PegLegRule {
RuleReturn rule();
}
/**
* An execution, which return true to continue parsing, false to stop.
*/
@FunctionalInterface
interface Exec {
boolean exec();
}
/**
* Terminal rule, with optional ignoring of case.
* @param value type
*/
@FunctionalInterface
interface TerminalRule {
RuleReturn rule(boolean ignore);
}
class ParentRule implements PegLegRule {
private PegLegRule rule;
private Ref>[] refs = null;
public ParentRule(PegLegRule rule) {
this.rule = rule;
}
public PegLegRule refs(Ref>... refs) {
this.refs = refs;
return this;
}
@Override
public RuleReturn rule() {
if (refs == null) { return rule.rule(); }
for (Ref> r : refs) { r.enterRef(); }
try {
return rule.rule();
} finally {
for (Ref> r : refs) { r.exitRef(); }
}
}
}
static class SourcePosition {
int line = 1;
int linePos = 0;
int srcPos = 0;
public SourcePosition() { }
public SourcePosition(int srcPos, int line, int linePos) {
this.line = line;
this.linePos = linePos;
this.srcPos = srcPos;
}
public SourcePosition dup() {
return new SourcePosition(srcPos, line, linePos);
}
@Override
public String toString() {
return "Source @" + srcPos + "(line=" + line + ":linePos=" + linePos + ")";
}
}
static class SourceFrame extends SourcePosition {
final String name;
public SourceFrame(String name, SourcePosition pos) {
super(pos.srcPos, pos.line, pos.linePos);
this.name = name;
}
}
private static class Source {
private final CharSequence src;
private SourcePosition state;
public Source(CharSequence src) {
this.state = new SourcePosition();
this.src = src;
}
public boolean atEnd() { return state.srcPos >= src.length(); }
public SourcePosition getState() { return state.dup(); }
public boolean peekOneOf(String chars) {
if (state.srcPos < src.length()) { return chars.indexOf(src.charAt(state.srcPos)) >= 0; }
return false;
}
public void setState(SourcePosition state) { this.state = state; }
public int nextChar() {
if (state.srcPos < src.length()) {
int ret = src.charAt(state.srcPos++);
if (((char) ret) == '\n') {
state.line++;
state.linePos = 0;
} else {
state.linePos++;
}
return ret;
}
return -1;
}
public String substring(int pos, int len) { return src.subSequence(pos, pos + len).toString(); }
public String substring(int pos) { return substring(pos, src.length() - pos); }
}
private static class SingleNode {
V value;
SingleNode down;
public SingleNode(V value, SingleNode down) {
this.value = value;
this.down = down;
}
@Override
public String toString() { return Objects.toString(value); }
}
/**
* Stack of values. Supports normal stack ops like peek(), pop(), pus(), etc.
* @param Value type
*/
public static class Values {
private SingleNode top = null;
/**
* Snapshot the values in a list, top element as the 0th element
* @return list of values.
*/
public List allValues() {
ArrayList ret = new ArrayList<>();
for (SingleNode n = top; n != null; n = n.down) { ret.add(n.value); }
return ret;
}
/**
* All the values in reverse.
* @return list of values
*/
public List reverse() {
List all = allValues();
Collections.reverse(all);
return all;
}
public Optional peek() {
SingleNode ret = top;
return (ret != null) ? Optional.ofNullable(ret.value) : Optional.empty();
}
public void push(V v) { top = new SingleNode<>(v, top); }
public void swap() {
V p1 = pop();
V p2 = pop();
push(p1);
push(p2);
}
public V pop() {
V ret = top.value;
top = top.down;
return ret;
}
/**
* Pop the value 'pos' down the stack. pos=0 is the top, pos=1 is one below the top, etc.
* @param pos position to pop off
* @return value
*/
public V pop(int pos) {
if (pos == 0) { return pop(); }
LinkedList hold = new LinkedList<>();
for (int i = 0; i <= pos; i++) { hold.push(pop()); }
V ret = hold.pop();
while (!hold.isEmpty()) { push(hold.pop()); }
return ret;
}
SingleNode save() { return top; }
void restore(SingleNode prior) { top = prior; }
@Override
public String toString() { return "Values:" + allValues(); }
}
private int frameDepth() { return frame.size(); }
private boolean peekOneOf(String chars) { return source.peekOneOf(chars); }
private void tossTopFrame() { frame.pop(); }
private SingleNode saveValues() { return values.save(); }
private void restoreValues(SingleNode point) { values.restore(point); }
private void pushFrame() { pushFrame(null); }
private void pushFrame(String name) { frame.push(new SourceFrame(name, source.getState())); }
private void trimFramesTo(int size) { while (frame.size() > size) { frame.pop(); } }
private void resetToLastFrame() {
SourcePosition state = frame.pop();
source.setState(state);
}
private int nextChar() { return source.nextChar(); }
private void rollback(int frameCount, SingleNode oldTop) {
trimFramesTo(frameCount);
restoreValues(oldTop);
}
public List parseTrail() {
List ret = frame.stream().filter(Objects::nonNull).map(s -> s.name).collect(toList());
Collections.reverse(ret);
return ret;
}
/**
* The last rule's matched literal.
* @return last match literal
*/
public Optional match() {
if (lastReturn != null && lastReturn.matched()) {
return Optional.of(source.substring(lastReturn.match.srcPos, lastReturn.matchLen));
}
return Optional.empty();
}
/**
* Last rule return.
* @return rule return
*/
public RuleReturn getLastReturn() { return lastReturn; }
public RuleReturn getLastSuccessfulReturn() { return lastSuccessfulReturn; }
private RuleReturn ruleReturn(boolean matched, boolean consumed) {
RuleReturn ret = lastReturn = new RuleReturn<>(this, matched, consumed);
if (consumed) { tossTopFrame(); } else { resetToLastFrame(); }
if (matched && consumed) {
if (ret.matchLen + ret.match.srcPos > farthestSuccessfulPos.srcPos) {
farthestSuccessfulPos.srcPos = ret.match.srcPos + ret.matchLen;
farthestSuccessfulPos.line = ret.match.line;
farthestSuccessfulPos.linePos = ret.match.linePos + ret.matchLen;
}
if ((lastSuccessfulReturn == null) || (ret.match.srcPos > lastSuccessfulReturn.match.srcPos)) {
lastSuccessfulReturn = ret;
}
}
return ret;
}
/**
* Rule return.
*/
public static class RuleReturn {
private final PegLegParser parser;
private final boolean consumed;
private final SourcePosition match;
private int matchLen = 0;
private RuleReturn(PegLegParser parser, boolean matched, boolean consumed) {
this.parser = parser;
this.consumed = consumed;
if (matched) {
SourcePosition prev = parser.frame.get(0);
this.match = new SourcePosition(prev.srcPos, prev.line, prev.linePos);
matchLen = parser.source.state.srcPos - prev.srcPos;
} else {
this.match = null;
}
}
public Optional match() {
if (matched()) {
return Optional.of(parser.source.substring(match.srcPos, matchLen));
}
return Optional.empty();
}
/**
* Line of match, first line is 1.
* @return line
*/
public int matchLine() { return match.line; }
/**
* Line offset of match, 1st char on line is 0.
* @return offset
*/
public int matchLineOffset() { return match.linePos; }
/**
* Char offset into input where match occurred.
* @return line offset
*/
public int matchPos() { return match.srcPos; }
/**
* Length of match.
* @return match len
*/
public int matchLen() { return matchLen; }
/**
* Did we match.
* @return true if matched
*/
public boolean matched() { return match != null; }
/**
* Did the rule consume chars.
* @return true if consumed
*/
public boolean consumed() { return consumed; }
@Override
public String toString() {
if (matched()) {
return String.format("RuleReturn match=%s(%s) @ %d for %d (line %d, nextPos=%d)", match != null, consumed, match.srcPos,
matchLen, match.line, match.linePos);
} else {
return "RuleReturn match=false";
}
}
}
/**
* Terminal based on some set of characters. Allows for ignoring of case.
* @param value type
*/
protected static class CharTerminal implements PegLegRule {
private final TerminalRule delegate;
boolean ignoreCase;
public CharTerminal(TerminalRule delegate) {
this.delegate = delegate;
}
/**
* Ignore case.
* @return rule
*/
public CharTerminal ignoreCase() {
ignoreCase = true;
return this;
}
@Override
public RuleReturn rule() { return delegate.rule(ignoreCase); }
}
public PegLegParser() { }
@SuppressWarnings("unchecked")
private class Step {
private PegLegRule rule = null;
private Exec exec = null;
public Step(Object thing) {
if (thing instanceof CharSequence) {
this.rule = str((CharSequence) thing);
} else if (thing instanceof Character) {
this.rule = ch((Character) thing);
} else if (thing instanceof PegLegParser.PegLegRule) {
this.rule = (PegLegRule) thing;
} else if (thing instanceof Exec) {
this.exec = (Exec) thing;
} else if (thing instanceof Runnable) { this.exec = ex((Runnable) thing); } else {
throw new RuntimeException("Expected String/char/PegLegRule/Exec/Runnable; found: " + thing);
}
}
public Exec asExec() { return this.exec; }
boolean isRule() { return rule != null; }
PegLegRule asRule() { return rule; }
}
private Map.Entry