
org.parboiled.parser.BaseParser Maven / Gradle / Ivy
/*
* Copyright (C) 2009-2011 Mathias Doenitz
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.parboiled.parser;
import org.parboiled.Action;
import org.parboiled.Rule;
import org.parboiled.annotations.Cached;
import org.parboiled.annotations.DontExtend;
import org.parboiled.annotations.DontLabel;
import org.parboiled.annotations.SkipActionsInPredicates;
import org.parboiled.annotations.SuppressNode;
import org.parboiled.annotations.SuppressSubnodes;
import org.parboiled.common.Utils;
import org.parboiled.errors.GrammarException;
import org.parboiled.matchers.ActionMatcher;
import org.parboiled.matchers.AnyMatcher;
import org.parboiled.matchers.AnyOfMatcher;
import org.parboiled.matchers.CharIgnoreCaseMatcher;
import org.parboiled.matchers.CharMatcher;
import org.parboiled.matchers.CharRangeMatcher;
import org.parboiled.matchers.EmptyMatcher;
import org.parboiled.matchers.FirstOfMatcher;
import org.parboiled.matchers.FirstOfStringsMatcher;
import org.parboiled.matchers.NothingMatcher;
import org.parboiled.matchers.OneOrMoreMatcher;
import org.parboiled.matchers.OptionalMatcher;
import org.parboiled.matchers.SequenceMatcher;
import org.parboiled.matchers.StringMatcher;
import org.parboiled.matchers.TestMatcher;
import org.parboiled.matchers.TestNotMatcher;
import org.parboiled.matchers.ZeroOrMoreMatcher;
import org.parboiled.support.Characters;
import org.parboiled.support.Chars;
import org.parboiled.support.Checks;
import org.parboiled.matchers.ActionMatcher;
import org.parboiled.matchers.AnyMatcher;
import org.parboiled.matchers.AnyOfMatcher;
import org.parboiled.matchers.CharIgnoreCaseMatcher;
import org.parboiled.matchers.CharMatcher;
import org.parboiled.matchers.CharRangeMatcher;
import org.parboiled.matchers.EmptyMatcher;
import org.parboiled.matchers.FirstOfMatcher;
import org.parboiled.matchers.FirstOfStringsMatcher;
import org.parboiled.matchers.NothingMatcher;
import org.parboiled.matchers.OneOrMoreMatcher;
import org.parboiled.matchers.OptionalMatcher;
import org.parboiled.matchers.SequenceMatcher;
import org.parboiled.matchers.StringMatcher;
import org.parboiled.matchers.TestMatcher;
import org.parboiled.matchers.TestNotMatcher;
import org.parboiled.matchers.ZeroOrMoreMatcher;
import org.parboiled.support.Chars;
import org.parboiled.support.Checks;
import java.util.Arrays;
import static org.parboiled.common.Preconditions.checkArgNotNull;
import static org.parboiled.common.Preconditions.checkArgument;
/**
* Base class of all parboiled parsers. Defines the basic rule creation methods.
*
* @param the type of the parser values
*/
@SuppressWarnings( {"UnusedDeclaration"})
public abstract class BaseParser extends BaseActions {
/**
* Matches the {@link Chars#EOI} (end of input) character.
*/
public static final Rule EOI = new CharMatcher(Chars.EOI);
/**
* Matches the special {@link Chars#INDENT} character produces by the org.parboiled.buffers.IndentDedentInputBuffer
*/
public static final Rule INDENT = new CharMatcher(Chars.INDENT);
/**
* Matches the special {@link Chars#DEDENT} character produces by the org.parboiled.buffers.IndentDedentInputBuffer
*/
public static final Rule DEDENT = new CharMatcher(Chars.DEDENT);
/**
* Matches any character except {@link Chars#EOI}.
*/
public static final Rule ANY = new AnyMatcher();
/**
* Matches nothing and always succeeds.
*/
public static final Rule EMPTY = new EmptyMatcher();
/**
* Matches nothing and always fails.
*/
public static final Rule NOTHING = new NothingMatcher();
/**
* Creates a new instance of this parsers class using the no-arg constructor. If no no-arg constructor
* exists this method will fail with a java.lang.NoSuchMethodError.
* Using this method is faster than using {@link Parboiled#createParser(Class, Object...)} for creating
* new parser instances since this method does not use reflection.
*
* @param the parser class
* @return a new parser instance
*/
public
> P newInstance() {
throw new UnsupportedOperationException(
"Illegal parser instance, you have to use Parboiled.createParser(...) to create your parser instance!");
}
/**
* Explicitly creates a rule matching the given character. Normally you can just specify the character literal
* directly in you rule description. However, if you don't want to go through {@link #fromCharLiteral(char)},
* e.g. because you redefined it, you can also use this wrapper.
*
Note: This methods carries a {@link Cached} annotation, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param c the char to match
* @return a new rule
*/
@Cached
@DontLabel
public Rule Ch(char c) {
return new CharMatcher(c);
}
/**
* Explicitly creates a rule matching the given character case-independently.
* Note: This methods carries a {@link Cached} annotation, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param c the char to match independently of its case
* @return a new rule
*/
@Cached
@DontLabel
public Rule IgnoreCase(char c) {
if (Character.isLowerCase(c) == Character.isUpperCase(c)) {
return Ch(c);
}
return new CharIgnoreCaseMatcher(c);
}
/**
* Creates a rule matching a range of characters from cLow to cHigh (both inclusively).
* Note: This methods carries a {@link Cached} annotation, which means that multiple invocations with the same
* arguments will yield the same rule instance.
*
* @param cLow the start char of the range (inclusively)
* @param cHigh the end char of the range (inclusively)
* @return a new rule
*/
@Cached
@DontLabel
public Rule CharRange(char cLow, char cHigh) {
return cLow == cHigh ? Ch(cLow) : new CharRangeMatcher(cLow, cHigh);
}
/**
* Creates a new rule that matches any of the characters in the given string.
* Note: This methods provides caching, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param characters the characters
* @return a new rule
*/
@DontLabel
public Rule AnyOf(String characters) {
checkArgNotNull(characters, "characters");
return AnyOf(characters.toCharArray());
}
/**
* Creates a new rule that matches any of the characters in the given char array.
* Note: This methods provides caching, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param characters the characters
* @return a new rule
*/
@DontLabel
public Rule AnyOf(char[] characters) {
checkArgNotNull(characters, "characters");
checkArgument(characters.length > 0);
return characters.length == 1 ? Ch(characters[0]) : AnyOf(Characters.of(characters));
}
/**
* Creates a new rule that matches any of the given characters.
* Note: This methods carries a {@link Cached} annotation, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param characters the characters
* @return a new rule
*/
@Cached
@DontLabel
public Rule AnyOf(Characters characters) {
checkArgNotNull(characters, "characters");
if (!characters.isSubtractive() && characters.getChars().length == 1) {
return Ch(characters.getChars()[0]);
}
if (characters.equals(Characters.NONE)) return NOTHING;
return new AnyOfMatcher(characters);
}
/**
* Creates a new rule that matches all characters except the ones in the given string and EOI.
* Note: This methods provides caching, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param characters the characters
* @return a new rule
*/
@DontLabel
public Rule NoneOf(String characters) {
checkArgNotNull(characters, "characters");
return NoneOf(characters.toCharArray());
}
/**
* Creates a new rule that matches all characters except the ones in the given char array and EOI.
* Note: This methods provides caching, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param characters the characters
* @return a new rule
*/
@DontLabel
public Rule NoneOf(char[] characters) {
checkArgNotNull(characters, "characters");
checkArgument(characters.length > 0);
// make sure to always exclude EOI as well
boolean containsEOI = false;
for (char c : characters) if (c == Chars.EOI) { containsEOI = true; break; }
if (!containsEOI) {
char[] withEOI = new char[characters.length + 1];
System.arraycopy(characters, 0, withEOI, 0, characters.length);
withEOI[characters.length] = Chars.EOI;
characters = withEOI;
}
return AnyOf(Characters.allBut(characters));
}
/**
* Explicitly creates a rule matching the given string. Normally you can just specify the string literal
* directly in you rule description. However, if you want to not go through {@link #fromStringLiteral(String)},
* e.g. because you redefined it, you can also use this wrapper.
* Note: This methods provides caching, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param string the String to match
* @return a new rule
*/
@DontLabel
public Rule String(String string) {
checkArgNotNull(string, "string");
return String(string.toCharArray());
}
/**
* Explicitly creates a rule matching the given string. Normally you can just specify the string literal
* directly in you rule description. However, if you want to not go through {@link #fromStringLiteral(String)},
* e.g. because you redefined it, you can also use this wrapper.
* Note: This methods carries a {@link Cached} annotation, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param characters the characters of the string to match
* @return a new rule
*/
@Cached
@SuppressSubnodes
@DontLabel
public Rule String(char... characters) {
if (characters.length == 1) return Ch(characters[0]); // optimize one-char strings
Rule[] matchers = new Rule[characters.length];
for (int i = 0; i < characters.length; i++) {
matchers[i] = Ch(characters[i]);
}
return new StringMatcher(matchers, characters);
}
/**
* Explicitly creates a rule matching the given string in a case-independent fashion.
* Note: This methods provides caching, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param string the string to match
* @return a new rule
*/
@DontLabel
public Rule IgnoreCase(String string) {
checkArgNotNull(string, "string");
return IgnoreCase(string.toCharArray());
}
/**
* Explicitly creates a rule matching the given string in a case-independent fashion.
* Note: This methods carries a {@link Cached} annotation, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param characters the characters of the string to match
* @return a new rule
*/
@Cached
@SuppressSubnodes
@DontLabel
public Rule IgnoreCase(char... characters) {
if (characters.length == 1) return IgnoreCase(characters[0]); // optimize one-char strings
Rule[] matchers = new Rule[characters.length];
for (int i = 0; i < characters.length; i++) {
matchers[i] = IgnoreCase(characters[i]);
}
return ((SequenceMatcher) Sequence(matchers)).label('"' + String.valueOf(characters) + '"');
}
/**
* Creates a new rule that successively tries all of the given subrules and succeeds when the first one of
* its subrules matches. If all subrules fail this rule fails as well.
* Note: This methods provides caching, which means that multiple invocations with the same
* arguments will yield the same rule instance.
*
* @param rule the first subrule
* @param rule2 the second subrule
* @param moreRules the other subrules
* @return a new rule
*/
@DontLabel
public Rule FirstOf(Object rule, Object rule2, Object... moreRules) {
checkArgNotNull(moreRules, "moreRules");
return FirstOf(Utils.arrayOf(rule, rule2, moreRules));
}
/**
* Creates a new rule that successively tries all of the given subrules and succeeds when the first one of
* its subrules matches. If all subrules fail this rule fails as well.
* Note: This methods carries a {@link Cached} annotation, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param rules the subrules
* @return a new rule
*/
@Cached
@DontLabel
public Rule FirstOf(Object[] rules) {
checkArgNotNull(rules, "rules");
if (rules.length == 1) {
return toRule(rules[0]);
}
Rule[] convertedRules = toRules(rules);
char[][] chars = new char[rules.length][];
for (int i = 0, convertedRulesLength = convertedRules.length; i < convertedRulesLength; i++) {
Object rule = convertedRules[i];
if (rule instanceof StringMatcher) {
chars[i] = ((StringMatcher) rule).characters;
} else {
return new FirstOfMatcher(convertedRules);
}
}
return new FirstOfStringsMatcher(convertedRules, chars);
}
/**
* Creates a new rule that tries repeated matches of its subrule and succeeds if the subrule matches at least once.
* If the subrule does not match at least once this rule fails.
* Note: This methods carries a {@link Cached} annotation, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param rule the subrule
* @return a new rule
*/
@Cached
@DontLabel
public Rule OneOrMore(Object rule) {
return new OneOrMoreMatcher(toRule(rule));
}
/**
* Creates a new rule that tries repeated matches of a sequence of the given subrules and succeeds if the sequence
* matches at least once. If the sequence does not match at least once this rule fails.
* Note: This methods provides caching, which means that multiple invocations with the same
* arguments will yield the same rule instance.
*
* @param rule the first subrule
* @param rule2 the second subrule
* @param moreRules the other subrules
* @return a new rule
*/
@DontLabel
public Rule OneOrMore(Object rule, Object rule2, Object... moreRules) {
checkArgNotNull(moreRules, "moreRules");
return OneOrMore(Sequence(rule, rule2, moreRules));
}
/**
* Creates a new rule that tries a match on its subrule and always succeeds, independently of the matching
* success of its sub rule.
* Note: This methods carries a {@link Cached} annotation, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param rule the subrule
* @return a new rule
*/
@Cached
@DontLabel
public Rule Optional(Object rule) {
return new OptionalMatcher(toRule(rule));
}
/**
* Creates a new rule that tries a match on the sequence of the given subrules and always succeeds, independently
* of the matching success of its sub sequence.
* Note: This methods provides caching, which means that multiple invocations with the same
* arguments will yield the same rule instance.
*
* @param rule the first subrule
* @param rule2 the second subrule
* @param moreRules the other subrules
* @return a new rule
*/
@DontLabel
public Rule Optional(Object rule, Object rule2, Object... moreRules) {
checkArgNotNull(moreRules, "moreRules");
return Optional(Sequence(rule, rule2, moreRules));
}
/**
* Creates a new rule that only succeeds if all of its subrule succeed, one after the other.
* Note: This methods provides caching, which means that multiple invocations with the same
* arguments will yield the same rule instance.
*
* @param rule the first subrule
* @param rule2 the second subrule
* @param moreRules the other subrules
* @return a new rule
*/
@DontLabel
public Rule Sequence(Object rule, Object rule2, Object... moreRules) {
checkArgNotNull(moreRules, "moreRules");
return Sequence(Utils.arrayOf(rule, rule2, moreRules));
}
/**
* Creates a new rule that only succeeds if all of its subrule succeed, one after the other.
* Note: This methods carries a {@link Cached} annotation, which means that multiple invocations with the same
* arguments will yield the same rule instance.
*
* @param rules the sub rules
* @return a new rule
*/
@Cached
@DontLabel
public Rule Sequence(Object[] rules) {
checkArgNotNull(rules, "rules");
return rules.length == 1 ? toRule(rules[0]) : new SequenceMatcher(toRules(rules));
}
/**
* Creates a new rule that acts as a syntactic predicate, i.e. tests the given sub rule against the current
* input position without actually matching any characters. Succeeds if the sub rule succeeds and fails if the
* sub rule rails. Since this rule does not actually consume any input it will never create a parse tree node.
* Also it carries a {@link SuppressNode} annotation, which means all sub nodes will also never create a parse
* tree node. This can be important for actions contained in sub rules of this rule that otherwise expect the
* presence of certain parse tree structures in their context.
* Also see {@link SkipActionsInPredicates}
* Note: This methods carries a {@link Cached} annotation, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param rule the subrule
* @return a new rule
*/
@Cached
@SuppressNode
@DontLabel
public Rule Test(Object rule) {
Rule subMatcher = toRule(rule);
return new TestMatcher(subMatcher);
}
/**
* Creates a new rule that acts as a syntactic predicate, i.e. tests the sequence of the given sub rule against
* the current input position without actually matching any characters. Succeeds if the sub sequence succeeds and
* fails if the sub sequence rails. Since this rule does not actually consume any input it will never create a
* parse tree node.
* Also it carries a {@link SuppressNode} annotation, which means all sub nodes will also never create a parse
* tree node. This can be important for actions contained in sub rules of this rule that otherwise expect the
* presence of certain parse tree structures in their context.
* Also see {@link SkipActionsInPredicates}
* Note: This methods provides caching, which means that multiple invocations with the same
* arguments will yield the same rule instance.
*
* @param rule the first subrule
* @param rule2 the second subrule
* @param moreRules the other subrules
* @return a new rule
*/
@DontLabel
public Rule Test(Object rule, Object rule2, Object... moreRules) {
checkArgNotNull(moreRules, "moreRules");
return Test(Sequence(rule, rule2, moreRules));
}
/**
* Creates a new rule that acts as an inverse syntactic predicate, i.e. tests the given sub rule against the
* current input position without actually matching any characters. Succeeds if the sub rule fails and fails if the
* sub rule succeeds. Since this rule does not actually consume any input it will never create a parse tree node.
* Also it carries a {@link SuppressNode} annotation, which means all sub nodes will also never create a parse
* tree node. This can be important for actions contained in sub rules of this rule that otherwise expect the
* presence of certain parse tree structures in their context.
* Also see {@link SkipActionsInPredicates}
* Note: This methods carries a {@link Cached} annotation, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param rule the subrule
* @return a new rule
*/
@Cached
@SuppressNode
@DontLabel
public Rule TestNot(Object rule) {
Rule subMatcher = toRule(rule);
return new TestNotMatcher(subMatcher);
}
/**
* Creates a new rule that acts as an inverse syntactic predicate, i.e. tests the sequence of the given sub rules
* against the current input position without actually matching any characters. Succeeds if the sub sequence fails
* and fails if the sub sequence succeeds. Since this rule does not actually consume any input it will never create
* a parse tree node.
* Also it carries a {@link SuppressNode} annotation, which means all sub nodes will also never create a parse
* tree node. This can be important for actions contained in sub rules of this rule that otherwise expect the
* presence of certain parse tree structures in their context.
* Also see {@link SkipActionsInPredicates}
* Note: This methods provides caching, which means that multiple invocations with the same
* arguments will yield the same rule instance.
*
* @param rule the first subrule
* @param rule2 the second subrule
* @param moreRules the other subrules
* @return a new rule
*/
@DontLabel
public Rule TestNot(Object rule, Object rule2, Object... moreRules) {
checkArgNotNull(moreRules, "moreRules");
return TestNot(Sequence(rule, rule2, moreRules));
}
/**
* Creates a new rule that tries repeated matches of its subrule.
* Succeeds always, even if the subrule doesn't match even once.
* Note: This methods carries a {@link Cached} annotation, which means that multiple invocations with the same
* argument will yield the same rule instance.
*
* @param rule the subrule
* @return a new rule
*/
@Cached
@DontLabel
public Rule ZeroOrMore(Object rule) {
return new ZeroOrMoreMatcher(toRule(rule));
}
/**
* Creates a new rule that tries repeated matches of the sequence of the given sub rules.
* Succeeds always, even if the sub sequence doesn't match even once.
* Note: This methods provides caching, which means that multiple invocations with the same
* arguments will yield the same rule instance.
*
* @param rule the first subrule
* @param rule2 the second subrule
* @param moreRules the other subrules
* @return a new rule
*/
@DontLabel
public Rule ZeroOrMore(Object rule, Object rule2, Object... moreRules) {
checkArgNotNull(moreRules, "moreRules");
return ZeroOrMore(Sequence(rule, rule2, moreRules));
}
/**
* Creates a new rule that repeatedly matches a given sub rule a certain fixed number of times.
* Note: This methods provides caching, which means that multiple invocations with the same
* arguments will yield the same rule instance.
*
* @param repetitions The number of repetitions to match. Must be >= 0.
* @param rule the sub rule to match repeatedly.
* @return a new rule
*/
@Cached
@DontLabel
public Rule NTimes(int repetitions, Object rule) {
return NTimes(repetitions, rule, null);
}
/**
* Creates a new rule that repeatedly matches a given sub rule a certain fixed number of times, optionally
* separated by a given separator rule.
* Note: This methods provides caching, which means that multiple invocations with the same
* arguments will yield the same rule instance.
*
* @param repetitions The number of repetitions to match. Must be >= 0.
* @param rule the sub rule to match repeatedly.
* @param separator the separator to match, if null the individual sub rules will be matched without separator.
* @return a new rule
*/
@Cached
@DontLabel
public Rule NTimes(int repetitions, Object rule, Object separator) {
checkArgNotNull(rule, "rule");
checkArgument(repetitions >= 0, "repetitions must be non-negative");
switch (repetitions) {
case 0: return EMPTY;
case 1: return toRule(rule);
default:
Object[] rules = new Object[separator == null ? repetitions : repetitions * 2 - 1];
if (separator != null) {
for (int i = 0; i < rules.length; i++)
rules[i] = i % 2 == 0 ? rule : separator;
} else Arrays.fill(rules, rule);
return Sequence(rules);
}
}
///************************* "MAGIC" METHODS ***************************///
/**
* Explicitly marks the wrapped expression as an action expression.
* parboiled transforms the wrapped expression into an {@link Action} instance during parser construction.
*
* @param expression the expression to turn into an Action
* @return the Action wrapping the given expression
*/
public static Action ACTION(boolean expression) {
throw new UnsupportedOperationException("ACTION(...) calls can only be used in Rule creating parser methods");
}
///************************* HELPER METHODS ***************************///
/**
* Used internally to convert the given character literal to a parser rule.
* You can override this method, e.g. for specifying a Sequence that automatically matches all trailing
* whitespace after the character.
*
* @param c the character
* @return the rule
*/
@DontExtend
protected Rule fromCharLiteral(char c) {
return Ch(c);
}
/**
* Used internally to convert the given string literal to a parser rule.
* You can override this method, e.g. for specifying a Sequence that automatically matches all trailing
* whitespace after the string.
*
* @param string the string
* @return the rule
*/
@DontExtend
protected Rule fromStringLiteral(String string) {
checkArgNotNull(string, "string");
return fromCharArray(string.toCharArray());
}
/**
* Used internally to convert the given char array to a parser rule.
* You can override this method, e.g. for specifying a Sequence that automatically matches all trailing
* whitespace after the characters.
*
* @param array the char array
* @return the rule
*/
@DontExtend
protected Rule fromCharArray(char[] array) {
checkArgNotNull(array, "array");
return String(array);
}
/**
* Converts the given object array to an array of rules.
*
* @param objects the objects to convert
* @return the rules corresponding to the given objects
*/
@DontExtend
public Rule[] toRules(Object... objects) {
checkArgNotNull(objects, "objects");
Rule[] rules = new Rule[objects.length];
for (int i = 0; i < objects.length; i++) {
rules[i] = toRule(objects[i]);
}
return rules;
}
/**
* Converts the given object to a rule.
* This method can be overriden to enable the use of custom objects directly in rule specifications.
*
* @param obj the object to convert
* @return the rule corresponding to the given object
*/
@DontExtend
public Rule toRule(Object obj) {
if (obj instanceof Rule) return (Rule) obj;
if (obj instanceof Character) return fromCharLiteral((Character) obj);
if (obj instanceof String) return fromStringLiteral((String) obj);
if (obj instanceof char[]) return fromCharArray((char[]) obj);
if (obj instanceof Action) {
Action action = (Action) obj;
return new ActionMatcher(action);
}
Checks.ensure(!(obj instanceof Boolean), "Rule specification contains an unwrapped Boolean value, " +
"if you were trying to specify a parser action wrap the expression with ACTION(...)");
throw new GrammarException("'" + obj + "' cannot be automatically converted to a parser Rule");
}
}