de.unkrig.commons.text.scanner.StatelessScanner Maven / Gradle / Ivy
/*
* de.unkrig.commons - A general-purpose Java class library
*
* Copyright (c) 2012, Arno Unkrig
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
* following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
* following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
* following disclaimer in the documentation and/or other materials provided with the distribution.
* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package de.unkrig.commons.text.scanner;
import java.util.ArrayList;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import de.unkrig.commons.nullanalysis.Nullable;
/**
* A scanner that produces {@link AbstractScanner.Token}s. Before {@link #produce()} is called, the scanner must be
* configured by invoking its {@link #addRule(String, Enum)} methods. These define how character sequences are
* converted into {@link AbstractScanner.Token}s.
*
* For an example usage, see the source code of {@link de.unkrig.commons.text.expression.Scanner}
*
* @param Enumerates the scanner-specific token types
*/
public
class StatelessScanner> extends AbstractScanner {
/**
* Adds a rule that produces the given tokenType if the next characters of the input match the given
* regex.
*/
public void
addRule(String regex, TT tokenType) {
this.rules.add(new Rule(regex, tokenType));
}
/**
* @return {@code null} iff the input string is exhausted
*/
@Override @Nullable public Token
produce() throws ScanException {
int length = this.cs.length();
if (this.offset == length) return null;
for (Rule rule : this.rules) {
Matcher matcher = rule.regex.matcher(this.cs);
matcher.region(this.offset, length);
if (matcher.lookingAt()) {
this.previousTokenOffset = this.offset;
this.offset = matcher.end();
int gc = matcher.groupCount();
String[] captured = new String[gc];
for (int i = 0; i < gc; i++) captured[i] = matcher.group(i + 1);
return new Token(rule.tokenType, matcher.group(), captured);
}
}
throw new ScanException(
"Unexpected character \""
+ this.cs.charAt(this.offset)
+ "\" at offset "
+ this.offset
+ " of \""
+ this.cs
+ "\""
);
}
// IMPLEMENTATION
private static
class Rule> {
final TT tokenType;
final Pattern regex;
Rule(String regex, TT tokenType) {
this.regex = Pattern.compile(regex);
this.tokenType = tokenType;
}
}
// CONFIGURATION
private final List> rules = new ArrayList>();
}