All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.unkrig.commons.text.scanner.StatefulScanner Maven / Gradle / Ivy

Go to download

A versatile Java(TM) library that implements many useful container and utility classes.

There is a newer version: 1.1.12
Show newest version

/*
 * de.unkrig.commons - A general-purpose Java class library
 *
 * Copyright (c) 2012, Arno Unkrig
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without modification, are permitted provided that the
 * following conditions are met:
 *
 *    1. Redistributions of source code must retain the above copyright notice, this list of conditions and the
 *       following disclaimer.
 *    2. Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the
 *       following disclaimer in the documentation and/or other materials provided with the distribution.
 *    3. The name of the author may not be used to endorse or promote products derived from this software without
 *       specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
 * THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 * POSSIBILITY OF SUCH DAMAGE.
 */

package de.unkrig.commons.text.scanner;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import de.unkrig.commons.nullanalysis.Nullable;

/**
 * A scanner that produces {@link AbstractScanner.Token Token}s from a character stream.
 * 

* Before {@link #produce()} is called, the scanner must be configured by invoking its {@link #addRule(String, Enum)} * methods. * These define how character sequences are converted into {@link AbstractScanner.Token}s, and also how the scanner * changes state. Initially the scanner is in the default state. *

*

* The non-default states are defined by the {@code S} type parameter. *

*

* For an example usage, see {@link JavaScanner}. *

* * @param Enumerates the scanner-specific token types * @param Enumerates the scanner-specific non-default states */ public class StatefulScanner, S extends Enum> extends AbstractScanner { public StatefulScanner(Class states) { for (S state : states.getEnumConstants()) { this.nonDefaultStateRules.put(state, new ArrayList>()); } } /** * Adds a rule that applies iff the scanner is in the "default state". After the rule has matched, the scanner * remains in the default state. * * @see Pattern */ public void addRule(String regex, TT tokenType) { this.addRule(regex, tokenType, this.defaultStateRules); } /** * Adds a rule that applies iff the scanner is in the "default state". * * @param nextState The new current state after the rule has matched * @see Pattern */ public void addRule(String regex, TT tokenType, S nextState) { this.addRule(regex, tokenType, this.nonDefaultStateRules.get(nextState)); } /** * Adds a rule that applies iff the scanner is in the the given {@code state}. The scanner returns to the default * state after the rule has matched. * * @param state {@code null} means "any state" * @see Pattern */ public void addRule(@Nullable S state, String regex, TT tokenType) { this.addRule(state, regex, tokenType, this.defaultStateRules); } /** * Adds a rule that applies iff the scanner is in the the given {@code state}. * * @param state {@code null} means "any state" * @param nextState The new current state after the rule has matched * @see Pattern */ public void addRule(@Nullable S state, String regex, TT tokenType, S nextState) { this.addRule(state, regex, tokenType, this.nonDefaultStateRules.get(nextState)); } /** * @return {@code null} iff the input string is exhausted */ @Override @Nullable public Token produce() throws ScanException { int length = this.cs.length(); if (this.offset == length) return null; for (Rule rule : this.currentStateRules) { Matcher matcher = rule.regex.matcher(this.cs); matcher.region(this.offset, length); if (matcher.lookingAt()) { this.currentStateRules = rule.nextStateRules; this.previousTokenOffset = this.offset; this.offset = matcher.end(); return new Token(rule.tokenType, matcher.group()); } } throw new ScanException( "Unexpected character '" + this.cs.charAt(this.offset) + "' at offset " + this.offset + " of '" + this.cs + "'" ); } // IMPLEMENTATION private void addRule(String regex, TT tokenType, List> nextRules) { this.defaultStateRules.add(new Rule(regex, tokenType, nextRules)); } /** * @param state {@code null} means "any state" */ private void addRule(@Nullable S state, String regex, TT tokenType, List> nextRules) { Rule rule = new Rule(regex, tokenType, nextRules); if (state != null) { this.nonDefaultStateRules.get(state).add(rule); } else { for (List> rules : this.nonDefaultStateRules.values()) rules.add(rule); this.defaultStateRules.add(rule); } } private static class Rule, S extends Enum> { final TT tokenType; final Pattern regex; final List> nextStateRules; public Rule(String regex, TT tokenType, List> nextStateRules) { this.regex = Pattern.compile(regex); this.tokenType = tokenType; this.nextStateRules = nextStateRules; } } // CONFIGURATION private final List> defaultStateRules = new ArrayList>(); private final Map>> nonDefaultStateRules = new HashMap>>(); // STATE private List> currentStateRules = this.defaultStateRules; }