de.unkrig.commons.text.scanner.AbstractScanner Maven / Gradle / Ivy
/*
* de.unkrig.commons - A general-purpose Java class library
*
* Copyright (c) 2012, Arno Unkrig
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
* following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
* following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
* following disclaimer in the documentation and/or other materials provided with the distribution.
* 3. Neither the name of the copyright holder nor the names of its contributors may be used to endorse or promote
* products derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package de.unkrig.commons.text.scanner;
import java.util.Collection;
import java.util.regex.Pattern;
import de.unkrig.commons.lang.AssertionUtil;
import de.unkrig.commons.lang.protocol.Predicate;
import de.unkrig.commons.lang.protocol.ProducerUtil;
import de.unkrig.commons.lang.protocol.ProducerWhichThrows;
/**
* A scanner that produces {@link Token}s.
*
* @param Enumerates the scanner-specific token types
* @see #produce()
*/
public abstract
class AbstractScanner> implements StringScanner {
static { AssertionUtil.enableAssertionsForThisClass(); }
/**
* Representation of a scanned token.
*
* @param
*/
public static
class Token> {
/**
* The type of this token.
*/
public final TT type;
/**
* The text of this token, exactly as read from the document.
*/
public final String text;
/**
* The input subsequences captured by the rule's pattern match. The length equals the number of capturing
* groups, and the first capturing group is stored at position zero.
*
* Notice that multiple rules may generate the same token type, while the rules' patterns have different
* capturing groups.
*
*
* An array of length zero means that the matched pattern has no capturing groups.
*
*
* @see Pattern
*/
public final String[] captured;
private static final String[] DEFAULT_CAPTURED = new String[0];
public
Token(TT type, String text) {
assert type != null;
assert text != null;
this.type = type;
this.text = text;
this.captured = Token.DEFAULT_CAPTURED;
}
public
Token(TT type, String text, String[] captured) {
assert type != null;
assert text != null;
this.type = type;
this.text = text;
this.captured = captured;
}
@Override public String
toString() {
return this.text;
}
}
@Override public AbstractScanner
setInput(CharSequence cs) {
this.cs = cs;
this.offset = 0;
this.previousTokenOffset = -1;
return this;
}
@Override public int
getOffset() {
return this.offset;
}
@Override public int
getPreviousTokenOffset() {
return this.previousTokenOffset;
}
/**
* Creates and returns a producer which skips tokens of the suppressedTokenType.
*/
public ProducerWhichThrows, ScanException>
suppress(final TT suppressedTokenType) {
return ProducerUtil.filter(this, new Predicate>() {
@Override public boolean evaluate(Token token) { return token.type != suppressedTokenType; }
});
}
/**
* Creates and returns a producer which skips tokens of the suppressedTokenTypes.
*/
public ProducerWhichThrows, ScanException>
suppress(final Collection suppressedTokenTypes) {
return ProducerUtil.filter(this, new Predicate>() {
@Override public boolean evaluate(Token token) { return !suppressedTokenTypes.contains(token.type); }
});
}
@Override public String
toString() {
return "\"" + this.cs + "\" at offset " + this.previousTokenOffset;
}
// STATE
/**
* The string currently being scanned; typically one line from an input document.
*/
protected CharSequence cs = "";
/**
* The position within {@link #cs} of the next token to be scanned.
*/
protected int offset;
/**
* The position within {@link #cs} of the previously scanned token.
*/
protected int previousTokenOffset = -1;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy