de.unkrig.commons.text.scanner.StatefulScanner Maven / Gradle / Ivy
/*
* de.unkrig.commons - A general-purpose Java class library
*
* Copyright (c) 2012, Arno Unkrig
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
* following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
* following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
* following disclaimer in the documentation and/or other materials provided with the distribution.
* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package de.unkrig.commons.text.scanner;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import de.unkrig.commons.nullanalysis.Nullable;
/**
* A scanner that produces {@link AbstractScanner.Token Token}s from a character stream.
*
* Before {@link #produce()} is called, the scanner must be configured by invoking its {@link #addRule(String, Enum)}
* methods.
* These define how character sequences are converted into {@link AbstractScanner.Token}s, and also how the scanner
* changes state. Initially the scanner is in the default state.
*
*
* The non-default states are defined by the S type parameter.
*
*
* For an example usage, see {@link JavaScanner}.
*
*
* @param Enumerates the scanner-specific token types
* @param Enumerates the scanner-specific non-default states
*/
public
class StatefulScanner, S extends Enum> extends AbstractScanner {
/**
* Special value for some method parameters.
*/
@Nullable public final EnumSet ANY_STATE = null; // SUPPRESS CHECKSTYLE MemberName
/**
* Special value for some method parameters; indicates that the current state should remain when the
* rule applies.
*
* @see #addRule(EnumSet, String, Enum, Enum)
*/
@Nullable public final S REMAIN = null; // SUPPRESS CHECKSTYLE MemberName|Align
private final List> stateStack = new ArrayList>();
public
StatefulScanner(Class states) {
this.defaultStateRules = new ArrayList();
this.nonDefaultStateRules = new HashMap>();
this.currentStateRules = this.defaultStateRules;
for (S state : states.getEnumConstants()) {
this.nonDefaultStateRules.put(state, new ArrayList());
}
}
/**
* Clones the configuration of the other scanner, but has a separate state.
*/
public
StatefulScanner(StatefulScanner that) {
this.defaultStateRules = that.defaultStateRules;
this.nonDefaultStateRules = that.nonDefaultStateRules;
// We start in the default state.
this.currentStateRules = this.defaultStateRules;
}
/**
* Adds a rule that applies iff the scanner is in the "default state". After the rule has matched, the scanner
* remains in the default state.
*
* @see Pattern
*/
public Rule
addRule(String regex, TT tokenType) {
Rule rule = new Rule(regex, tokenType, this.defaultStateRules);
this.defaultStateRules.add(rule);
return rule;
}
/**
* Adds a rule that applies iff the scanner is in the given non-default state. The scanner returns to
* the default state after the rule has matched.
*
* @param state {@code null} means "any state"
* @see Pattern
*/
public Rule
addRule(S state, String regex, TT tokenType) {
Rule rule = new Rule(regex, tokenType, this.defaultStateRules);
this.nonDefaultStateRules.get(state).add(rule);
return rule;
}
/**
* Adds a rule that applies iff states{@code ==} {@link #ANY_STATE}, or the scanner is in one of the
* given non-default states, or the states contain {@code null} and the scanner is in the
* default state. The scanner returns to the default state after the rule has matched.
*
* @see Pattern
*/
public Rule
addRule(@Nullable EnumSet states, String regex, TT tokenType) {
Rule rule = new Rule(regex, tokenType, this.defaultStateRules);
if (states == /*this.ANY_STATE*/ null) {
for (List rules : this.nonDefaultStateRules.values()) rules.add(rule);
this.defaultStateRules.add(rule);
} else {
for (S s : states) this.nonDefaultStateRules.get(s).add(rule);
}
return rule;
}
/**
* @return The current state of this scanner; {@code null} for the default state
*/
@Nullable public S
getCurrentState() {
if (this.currentStateRules == this.defaultStateRules) return null;
for (Entry> e : this.nonDefaultStateRules.entrySet()) {
S state = e.getKey();
List rules = e.getValue();
if (rules == this.currentStateRules) return state;
}
throw new AssertionError(this.currentStateRules);
}
/**
* @param newState The new state of this scanner; {@code null} for the default state
*/
public void
setCurrentState(@Nullable S newState) {
this.currentStateRules = newState == null ? this.defaultStateRules : this.nonDefaultStateRules.get(newState);
}
/**
* @return {@code null} iff the input string is exhausted
*/
@Override @Nullable public Token
produce() throws ScanException {
int length = this.cs.length();
if (this.offset == length) return null;
for (Rule rule : this.currentStateRules) {
Matcher matcher = rule.regex.matcher(this.cs);
matcher.region(this.offset, length);
if (matcher.lookingAt()) {
if (rule.popState) {
this.currentStateRules = this.stateStack.remove(this.stateStack.size() - 1);
} else {
if (rule.pushState) this.stateStack.add(this.currentStateRules);
if (rule.nextStateRules != null/*this.REMAIN*/) this.currentStateRules = rule.nextStateRules;
}
this.previousTokenOffset = this.offset;
this.offset = matcher.end();
int gc = matcher.groupCount();
String[] captured = new String[gc];
for (int i = 0; i < gc; i++) captured[i] = matcher.group(i + 1);
return new Token(rule.tokenType, matcher.group(), captured);
}
}
String message = (
"Unexpected character \""
+ this.cs.charAt(this.offset)
+ "\" at offset "
+ this.offset
+ " of input string \""
+ this.cs
+ "\""
);
if (this.currentStateRules == this.defaultStateRules) {
message += " in default state";
} else {
for (Entry> e : this.nonDefaultStateRules.entrySet()) {
S state = e.getKey();
List rules = e.getValue();
if (this.currentStateRules == rules) {
message += " in state " + state;
break;
}
}
}
if (this.currentStateRules.size() == 1) {
message += "; expected " + this.currentStateRules.get(0).tokenType;
} else
if (this.currentStateRules.size() > 1) {
message += "; expected one of " + this.currentStateRules.get(0).tokenType;
for (int i = 1; i < this.currentStateRules.size(); i++) {
message += ", " + this.currentStateRules.get(i).tokenType;
}
}
throw new ScanException(message);
}
/**
* @deprecated Use {@code ss.addRule(regex, tokenType).goTo(nextState)} instead.
*/
@Deprecated public Rule
addRule(String regex, TT tokenType, S nextState) {
return this.addRule(regex, tokenType).goTo(nextState);
}
/**
* @deprecated Use {@code addRule(state, regex, tokenType).goTo(nextState)}
*/
@Deprecated public Rule
addRule(S state, String regex, TT tokenType, @Nullable S nextState) {
return this.addRule(state, regex, tokenType).goTo(nextState);
}
/**
* @deprecated Use {@code ss.addRule(states, regex, tokenType).goTo(nextState)} instead
*/
@Deprecated public Rule
addRule(@Nullable EnumSet states, String regex, TT tokenType, @Nullable S nextState) {
return this.addRule(states, regex, tokenType).goTo(nextState);
}
// IMPLEMENTATION
/**
* The return type of the various {@code addRule(...)} methods, which is not really useful.
*/
public
class Rule {
private final TT tokenType;
private final Pattern regex;
@Nullable private List nextStateRules;
private boolean pushState, popState;
/**
* @param nextStateRules {@code null} means remain in current state
*/
Rule(String regex, TT tokenType, @Nullable List nextStateRules) {
this.regex = Pattern.compile(regex);
this.tokenType = tokenType;
this.nextStateRules = nextStateRules;
}
/**
* @param nextState {@code ss.REMAIN} means remain in current state
*/
public Rule
goTo(@Nullable S nextState) {
this.nextStateRules = StatefulScanner.this.nonDefaultStateRules.get(nextState);
return this;
}
/**
* Saves the current state and changes to the nextState.
*
* @see #pop()
*/
public Rule
push(S nextState) {
this.pushState = true;
this.nextStateRules = StatefulScanner.this.nonDefaultStateRules.get(nextState);
return this;
}
/**
* Restores a previously pushed state.
*
* @see #push(Enum)
*/
public Rule
pop() {
this.popState = true;
return this;
}
@Override public String
toString() { return ">>" + this.regex + "<< => " + this.tokenType; }
}
// CONFIGURATION
private final List defaultStateRules;
private final Map> nonDefaultStateRules;
// STATE
private List currentStateRules;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy