net.sf.mmm.util.scanner.CharStreamScanner Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of mmm-util-core
provides core utilities that fill gaps in the jdk.
The newest version!
/* $Id: CharStreamScanner.java 401 2008-01-13 21:02:06Z hohwille $
 * Copyright (c) The m-m-m Team, Licensed under the Apache License, Version 2.0
 * http://www.apache.org/licenses/LICENSE-2.0 */
package net.sf.mmm.util.scanner;

import net.sf.mmm.util.filter.CharFilter;

/**
 * This is the interface for a scanner that can be used to parse a stream or
 * sequence of characters.
 * 
 * @author Joerg Hohwiller (hohwille at users.sourceforge.net)
 */
public interface CharStreamScanner {

  /**
   * This method gets the current position in the stream to scan. It will
   * initially be 0. In other words this method returns the
   * number of characters that have already been {@link #next() consumed}.
   * 
   * @return the current index position.
   */
  int getCurrentIndex();

  /**
   * This method determines if there is at least one more character available.
   * 
   * @return true if there is at least one character available,
   *         false if the end has been reached.
   */
  boolean hasNext();

  /**
   * This method reads the current character and increments the
   * {@link #getCurrentIndex() index} stepping to the next character. You need
   * to {@link #hasNext() check} if a character is available before calling this
   * method.
   * 
   * @return the current character.
   */
  char next();

  /**
   * Like {@link #next()} this method reads the
   * {@link #getCurrentIndex() current} character and increments the
   * {@link #getCurrentIndex() index}. If there is no character
   * {@link #hasNext() available} this method will do nothing and returns
   * 0 (the NULL character and NOT '0').
   * 
   * @return the current character or 0 if none is
   *         {@link #hasNext() available}.
   */
  char forceNext();

  /**
   * This method reads the current character without incrementing the
   * {@link #getCurrentIndex() index}. You need to {@link #hasNext() check} if
   * a character is available before calling this method.
   * 
   * @return the current character.
   */
  char peek();

  /**
   * This method reads the {@link #getCurrentIndex() current} character without
   * incrementing the {@link #getCurrentIndex() index}. If there is no
   * character {@link #hasNext() available} this method will return
   * 0 (the NULL character and NOT '0').
   * 
   * @return the current character or 0 if none is
   *         {@link #hasNext() available}.
   */
  char forcePeek();

  /**
   * This method reads the {@link #next() next character} if it is a digit. Else
   * the state remains unchanged.
   * 
   * @return the numeric value of the next Latin digit (e.g. 0 if
   *         '0') or -1 if the
   *         {@link #peek() current character} is no Latin digit.
   */
  int readDigit();

  /**
   * This method reads the long starting at the
   * {@link #getCurrentIndex() current position} by reading as many Latin digits
   * as available but at maximum the given maxDigits and returns
   * its {@link Long#parseLong(String) parsed} value.

   * ATTENTION:

   * This method does NOT treat signs (+ or -)
   * to do so, scan them yourself before and negate the result as needed.
   * 
   * @param maxDigits is the maximum number of digits that will be read. The
   *        value has to be positive (greater than zero).
   * @return the parsed number.
   * @throws NumberFormatException if the current
   *         {@link #getCurrentIndex() current position} does NOT point to a
   *         number.
   */
  long readLong(int maxDigits) throws NumberFormatException;

  /**
   * This method reads the number of {@link #next() next characters} given by
   * count and returns them as string. If there are less
   * characters {@link #hasNext() available} the returned string will be shorter
   * than count and only contain the available characters.
   * 
   * @param count is the number of characters to read. You may use
   *        {@link Integer#MAX_VALUE} to read until the end of data if the
   *        data-size is suitable.
   * @return a string with the given number of characters or all available
   *         characters if less than count. Will be the empty
   *         string if no character is {@link #hasNext() available} at all.
   */
  String read(int count);

  /**
   * This method skips all {@link #next() next characters} as long as they equal
   * to the according character of the expected string.

   * If a character differs this method stops and the parser points to the first
   * character that differs from expected. Except for the latter
   * circumstance, this method behaves like the following code:
   * 
   *    * {@link #read(int) read}(expected.length).equals[IgnoreCase](expected)
   * 
   * 
   * ATTENTION:

   * Be aware that if already the first character differs, this method will NOT
   * change the state of the scanner. So take care NOT to produce infinity
   * loops.
   * 
   * @param exprected is the expected string.
   * @param ignoreCase - if true the case of the characters is
   *        ignored when compared.
   * @return true if the expected string was
   *         successfully consumed from this scanner, false
   *         otherwise.
   */
  boolean expect(String exprected, boolean ignoreCase);

  /**
   * This method checks that the {@link #next() current character} is equal to
   * the given expected character.

   * If the current character was as expected, the parser points to the next
   * character. Otherwise its position will remain unchanged.
   * 
   * @param expected is the expected character.
   * @return true if the current character is the same as
   *         expected, false otherwise.
   */
  boolean expect(char expected);

  /**
   * This method skips all {@link #next() next characters} until the given
   * stop character or the end is reached. If the
   * stop character was reached, this scanner will point to the
   * next character after stop when this method returns.
   * 
   * @param stop is the character to read until.
   * @return true if the first occurrence of the given
   *         stop character has been passed, false
   *         if there is no such character.
   */
  boolean skipUntil(char stop);

  /**
   * This method reads all {@link #next() next characters} until the given
   * stop character or the end of the string to parse is reached.
   * In advance to {@link #skipUntil(char)}, this method will read over the
   * stop character if it is escaped with the given
   * escape character.
   * 
   * @param stop is the character to read until.
   * @param escape is the character used to escape the stop character (e.g.
   *        '\').
   * @return true if the first occurrence of the given
   *         stop character has been passed, false
   *         if there is no such character.
   */
  boolean skipUntil(char stop, char escape);

  /**
   * This method reads all {@link #next() next characters} until the given
   * stop character or the end is reached.

   * After the call of this method, the {@link #getCurrentIndex() current index}
   * will point to the next character after the (first) stop
   * character or to the end if NO such character exists.
   * 
   * @param stop is the character to read until.
   * @param acceptEof if true EOF will be treated as
   *        stop, too.
   * @return the string with all read characters excluding the stop
   *         character or null if there was no stop
   *         character and acceptEof is false.
   */
  String readUntil(char stop, boolean acceptEof);

  /**
   * This method reads all {@link #next() next characters} until the given
   * stop character or the end of the string to parse is reached.
   * In advance to {@link #readUntil(char, boolean)}, this method will scan the
   * input using the given syntax which e.g. allows to
   * {@link CharScannerSyntax#getEscape() escape} the stop character. 

   * After the call of this method, the {@link #getCurrentIndex() current index}
   * will point to the next character after the (first) stop
   * character or to the end of the string if NO such character exists.
   * 
   * @param stop is the character to read until.
   * @param acceptEof if true EOF will be treated as
   *        stop, too.
   * @param syntax contains the characters specific for the syntax to read.
   * @return the string with all read characters excluding the stop
   *         character or null if there was no stop
   *         character.
   */
  String readUntil(char stop, boolean acceptEof, CharScannerSyntax syntax);

  /**
   * This method reads all {@link #next() next characters} that are
   * {@link CharFilter#accept(char) accepted} by the given filter.

   * After the call of this method, the {@link #getCurrentIndex() current index}
   * will point to the next character that was NOT
   * {@link CharFilter#accept(char) accepted} by the given filter
   * or to the end if NO such character exists.
   * 
   * @see #skipWhile(CharFilter)
   * 
   * @param filter is used to {@link CharFilter#accept(char) decide} which
   *        characters should be accepted.
   * @return a string with all characters
   *         {@link CharFilter#accept(char) accepted} by the given
   *         filter.
   */
  String readWhile(CharFilter filter);

  /**
   * This method reads all {@link #next() next characters} that are
   * {@link CharFilter#accept(char) accepted} by the given filter.

   * After the call of this method, the {@link #getCurrentIndex() current index}
   * will point to the next character that was NOT
   * {@link CharFilter#accept(char) accepted} by the given filter.
   * If the next max characters or the characters left until the
   * {@link #hasNext() end} of this scanner are
   * {@link CharFilter#accept(char) accepted}, only that amount of characters
   * are skipped.
   * 
   * @see #skipWhile(char)
   * 
   * @param filter is used to {@link CharFilter#accept(char) decide} which
   *        characters should be accepted.
   * @param max is the maximum number of characters that should be read.
   * @return a string with all characters
   *         {@link CharFilter#accept(char) accepted} by the given
   *         filter limited to the length of max
   *         and the {@link #hasNext() end} of this scanner.
   */
  String readWhile(CharFilter filter, int max);

  /**
   * This method reads all {@link #next() next characters} until the given
   * substring has been detected.

   * After the call of this method, the {@link #getCurrentIndex() current index}
   * will point to the next character after the first occurrence of
   * substring or to the end of the string if the given
   * substring was NOT found.

   * 
   * @param substring is the substring to search and skip over starting at the
   *        {@link #getCurrentIndex() current index}.
   * @param ignoreCase - if true the case of the characters is
   *        ignored when compared with characters from substring.
   * @return true if the given substring occurred
   *         and has been passed and false if the end of the
   *         string has been reached without any occurrence of the given
   *         substring.
   */
  boolean skipOver(String substring, boolean ignoreCase);

  /**
   * This method reads all {@link #next() next characters} until the given
   * substring has been detected.

   * If a {@link CharFilter#accept(char) stop character} is detected by the
   * given stopFilter this method returns false
   * pointing to the character next to that stop character. Otherwise after this
   * method, the {@link #getCurrentIndex() current index} will point to the next
   * character after the first occurrence of substring or to the
   * end of the string if the given substring was NOT found.

   * 
   * @param substring is the substring to search and skip over starting at the
   *        {@link #getCurrentIndex() current index}.
   * @param ignoreCase - if true the case of the characters is
   *        ignored when compared with characters from substring.
   * @param stopFilter is the filter used to
   *        {@link CharFilter#accept(char) detect} stop characters. If such
   *        character was detected, the skip is stopped and the parser points to
   *        the character after the stop character. The substring
   *        should NOT contain a {@link CharFilter#accept(char) stop character}.
   * @return true if the given substring occurred
   *         and has been passed and false if a stop character
   *         has been detected or the end of the string has been reached without
   *         any occurrence of the given substring or stop
   *         character.
   */
  boolean skipOver(String substring, boolean ignoreCase, CharFilter stopFilter);

  /**
   * This method reads all {@link #next() next characters} that are identical to
   * the character given by c.

   * E.g. use {@link #skipWhile(char) readWhile}(' ') to skip
   * all blanks from the {@link #getCurrentIndex() current index}. After the
   * call of this method, the {@link #getCurrentIndex() current index} will
   * point to the next character that is different to c or to the
   * end if NO such character exists.
   * 
   * @param c is the character to read over.
   * @return the number of characters that have been skipped.
   */
  int skipWhile(char c);

  /**
   * This method reads all {@link #next() next characters} that are
   * {@link CharFilter#accept(char) accepted} by the given filter.

   * After the call of this method, the {@link #getCurrentIndex() current index}
   * will point to the next character that was NOT
   * {@link CharFilter#accept(char) accepted} by the given filter
   * or to the end if NO such character exists.
   * 
   * @see #skipWhile(char)
   * 
   * @param filter is used to {@link CharFilter#accept(char) decide} which
   *        characters should be accepted.
   * @return the number of characters {@link CharFilter#accept(char) accepted}
   *         by the given filter that have been skipped.
   */
  int skipWhile(CharFilter filter);

  /**
   * This method reads all {@link #next() next characters} that are
   * {@link CharFilter#accept(char) accepted} by the given filter.

   * After the call of this method, the {@link #getCurrentIndex() current index}
   * will point to the next character that was NOT
   * {@link CharFilter#accept(char) accepted} by the given filter.
   * If the next max characters or the characters left until the
   * {@link #hasNext() end} of this scanner are
   * {@link CharFilter#accept(char) accepted}, only that amount of characters
   * are skipped.
   * 
   * @see #skipWhile(char)
   * 
   * @param filter is used to {@link CharFilter#accept(char) decide} which
   *        characters should be accepted.
   * @param max is the maximum number of characters that should be skipped.
   * @return the number of skipped characters.
   */
  int skipWhile(CharFilter filter, int max);

}