
net.sf.mmm.util.scanner.CharSequenceScanner Maven / Gradle / Ivy
The newest version!
/* $Id: CharSequenceScanner.java 401 2008-01-13 21:02:06Z hohwille $
* Copyright (c) The m-m-m Team, Licensed under the Apache License, Version 2.0
* http://www.apache.org/licenses/LICENSE-2.0 */
package net.sf.mmm.util.scanner;
import java.util.Locale;
import net.sf.mmm.util.filter.CharFilter;
/**
* This class represents a {@link String} or better a sequence of characters (char[]
)
* together with a {@link #getCurrentIndex() position} in that sequence.
* It has various useful methods for scanning the sequence. This scanner is
* designed to be fast on long sequences and therefore internally
* {@link String#toCharArray() converts} {@link String}s to a char array
* instead of frequently calling {@link String#charAt(int)}.
* ATTENTION:
* This implementation is NOT and has no intention to be thread-safe.
*
* @author Joerg Hohwiller (hohwille at users.sourceforge.net)
*/
public class CharSequenceScanner implements CharStreamScanner {
/** @see #getOriginalString() */
private String str;
/** the string to parse as char array */
private char[] chars;
/** @see #getCurrentIndex() */
private int pos;
/** The start-index in {@link #chars}. */
private final int startIndex;
/** The exclusive end-index in {@link #chars}. */
private final int endIndex;
/**
* The length of the char-sequence:
* {@link #endIndex} - {@link #startIndex}
.
*/
private final int length;
/**
* The constructor.
*
* @param charSequence is the {@link #getOriginalString() string} to scan.
*/
public CharSequenceScanner(CharSequence charSequence) {
this(charSequence.toString());
}
/**
* The constructor.
*
* @param string is the {@link #getOriginalString() string} to parse.
*/
public CharSequenceScanner(String string) {
this(string.toCharArray());
this.str = string;
}
/**
* The constructor.
*
* @param characters is an array containing the characters to scan.
*/
public CharSequenceScanner(char[] characters) {
this(characters, 0, characters.length);
}
/**
* The constructor.
*
* @param characters is an array containing the characters to scan.
* @param offset is the index of the first char to scan in
* characters
(typically 0
to start at
* the beginning of the array).
* @param length is the {@link #getLength() number of characters} to scan from
* characters
starting at offset
* (typically characters.length - offset
).
*/
public CharSequenceScanner(char[] characters, int offset, int length) {
super();
if (offset < 0) {
throw new IndexOutOfBoundsException(Integer.toString(offset));
}
if (length < 0) {
throw new IndexOutOfBoundsException(Integer.toString(length));
}
if (offset > characters.length - length) {
throw new IndexOutOfBoundsException(Integer.toString(offset + length));
}
this.chars = characters;
this.length = length;
this.startIndex = offset;
this.endIndex = offset + this.length;
this.pos = this.startIndex;
}
/**
* @see java.lang.CharSequence#charAt(int)
*
* @param index is the index of the requested character.
* @return the character at the given index
.
*/
public char charAt(int index) {
return this.chars[this.startIndex + index];
}
/**
* @see java.lang.CharSequence#length()
*
* @return the total length of the
* {@link #getOriginalString() string to parse}.
*/
public int getLength() {
return this.length;
}
/**
* @see String#substring(int, int)
* @see #appendSubstring(StringBuffer, int, int)
*
* @param start the start index, inclusive.
* @param end the end index, exclusive.
* @return the specified substring.
*/
public String substring(int start, int end) {
return new String(this.chars, this.startIndex + start, end - start);
}
/**
* This method gets the {@link #getOriginalString() original string} where the
* {@link #substring(int, int) substring} specified by start
* and end
is replaced by substitute
.
*
* @param substitute is the string used as replacement.
* @param start is the inclusive start index of the substring to replace.
* @param end is the exclusive end index of the substring to replace.
* @return the {@link #getOriginalString() original string} with the specified
* substring replaced by substitute
.
*/
public String getReplaced(String substitute, int start, int end) {
int restLength = this.endIndex - end;
StringBuffer buffer = new StringBuffer(start + restLength + substitute.length());
buffer.append(this.chars, this.startIndex, start);
buffer.append(substitute);
buffer.append(this.chars, this.startIndex + end, restLength);
return buffer.toString();
}
/**
* This method appends the {@link #substring(int, int) substring} specified by
* start
and end
to the given
* buffer
.
* This avoids the overhead of creating a new string and copying the char
* array.
*
* @param buffer is the buffer where to append the substring to.
* @param start the start index, inclusive.
* @param end the end index, exclusive.
*/
public void appendSubstring(StringBuffer buffer, int start, int end) {
buffer.append(this.chars, this.startIndex + start, end - start);
}
/**
* {@inheritDoc}
*/
public int getCurrentIndex() {
return this.pos - this.startIndex;
}
/**
* This method sets the {@link #getCurrentIndex() current index}.
*
* @param index is the next index position to set. The value has to be greater
* or equal to 0
and less or equal to
* {@link #getLength()}.
*/
public void setCurrentIndex(int index) {
// yes, index == this.length is allowed - that is the state when the end
// is reached and setCurrentIndex(getCurrentPosition()) should NOT cause an
// exception...
if ((index < 0) || (index > this.length)) {
throw new IndexOutOfBoundsException(Integer.toString(index));
}
this.pos = this.startIndex + index;
}
/**
* {@inheritDoc}
*/
public boolean hasNext() {
return (this.pos < this.endIndex);
}
/**
* {@inheritDoc}
*/
public char next() {
return this.chars[this.pos++];
}
/**
* {@inheritDoc}
*/
public char forceNext() {
if (this.pos < this.endIndex) {
return this.chars[this.pos++];
} else {
return 0;
}
}
/**
* {@inheritDoc}
*/
public char peek() {
return this.chars[this.pos];
}
/**
* {@inheritDoc}
*/
public char forcePeek() {
if (this.pos < this.endIndex) {
return this.chars[this.pos];
} else {
return 0;
}
}
/**
* This method decrements the {@link #getCurrentIndex() index} by one. If the
* {@link #getCurrentIndex() index} is 0
this method will have
* no effect.
* E.g. use this method if you read a character too much.
*/
public void stepBack() {
if (this.pos > this.startIndex) {
this.pos--;
}
}
/**
* {@inheritDoc}
*/
public boolean skipUntil(char stop) {
while (this.pos < this.endIndex) {
if (this.chars[this.pos++] == stop) {
return true;
}
}
return false;
}
/**
* {@inheritDoc}
*/
public String readUntil(char stop, boolean acceptEof) {
int start = this.pos;
while (this.pos < this.endIndex) {
if (this.chars[this.pos++] == stop) {
return new String(this.chars, start, this.pos - start - 1);
}
}
if (acceptEof) {
int len = this.pos - start;
if (len > 0) {
return new String(this.chars, start, len);
} else {
return "";
}
} else {
return null;
}
}
/**
* {@inheritDoc}
*/
public String readUntil(char stop, boolean acceptEof, CharScannerSyntax syntax) {
StringBuilder result = new StringBuilder();
char escape = syntax.getEscape();
char quoteStart = syntax.getQuoteStart();
char altQuoteStart = syntax.getAltQuoteStart();
char entityStart = syntax.getEntityStart();
boolean escapeActive = false;
boolean done = false;
char quoteEnd = 0;
char quoteEscape = 0;
char entityEnd = 0;
boolean quoteLazy = false;
int index = this.pos;
int restIndex = this.endIndex;
while (this.pos < this.endIndex) {
char c = this.chars[this.pos++];
boolean append = false;
boolean newEscapeActive = false;
if (quoteEnd != 0) {
// in quotation
if (escapeActive) {
// current character c was escaped
// it will be taken as is on the next append
} else if (c == quoteEscape) {
// escape in quote --> lookahead
if (this.pos < this.endIndex) {
c = this.chars[this.pos];
if (c == quoteEnd) {
// quoteEnd was escaped
append = true;
newEscapeActive = true;
} else if (quoteEscape == quoteEnd) {
// quotation done
quoteEnd = 0;
append = true;
}
} else {
// end reached without stop char
if (quoteEscape == quoteEnd) {
// omit quote on appending of rest
restIndex--;
}
break;
}
} else if (c == quoteEnd) {
// quotation done
quoteEnd = 0;
append = true;
}
} else if (entityEnd != 0) {
if (c == entityEnd) {
// entity end detected...
entityEnd = 0;
int len = this.pos - index - 1;
String entity = new String(this.chars, index, len);
result.append(syntax.resolveEntity(entity));
index = this.pos;
}
} else if (escapeActive) {
// current character c was escaped
// it will be taken as is on the next append
} else if (c == stop) {
append = true;
done = true;
} else if (c == escape) {
append = true;
newEscapeActive = true;
} else if (c == entityStart) {
entityEnd = syntax.getEntityEnd();
append = true;
} else {
if (c == quoteStart) {
quoteEnd = syntax.getQuoteEnd();
quoteEscape = syntax.getQuoteEscape();
quoteLazy = syntax.isQuoteEscapeLazy();
} else if (c == altQuoteStart) {
quoteEnd = syntax.getAltQuoteEnd();
quoteEscape = syntax.getAltQuoteEscape();
quoteLazy = syntax.isAltQuoteEscapeLazy();
}
if (quoteEnd != 0) {
append = true;
if ((quoteEnd == quoteEscape) && (c == quoteEscape) && (quoteLazy)) {
// lazy quotation mode active --> lookahead
if (this.pos < this.endIndex) {
if (this.chars[this.pos] == quoteEscape) {
// lazy quotation detected
quoteEnd = 0;
newEscapeActive = true;
}
}
}
}
}
if (append) {
int len = this.pos - index - 1;
if (len > 0) {
result.append(this.chars, index, len);
}
if (done) {
return result.toString();
}
index = this.pos;
}
escapeActive = newEscapeActive;
}
if (acceptEof) {
int len = restIndex - index;
if (len > 0) {
// append rest
result.append(this.chars, index, len);
}
return result.toString();
} else {
return null;
}
}
/**
* {@inheritDoc}
*/
public String read(int count) {
int len = this.endIndex - this.pos;
if (len > count) {
len = count;
}
String result = new String(this.chars, this.pos, len);
this.pos += len;
return result;
}
/**
* {@inheritDoc}
*/
public int readDigit() {
int result = -1;
if (this.pos < this.endIndex) {
char c = this.chars[this.pos];
if ((c >= '0') && (c <= '9')) {
result = c - '0';
this.pos++;
}
}
return result;
}
/**
* {@inheritDoc}
*/
public long readLong(int maxDigits) throws NumberFormatException {
if (maxDigits <= 0) {
throw new IllegalArgumentException();
}
int index = this.pos;
int end = this.pos + maxDigits;
if (end > this.endIndex) {
end = this.endIndex;
}
while (this.pos < end) {
char c = this.chars[this.pos];
if ((c < '0') || (c > '9')) {
break;
}
this.pos++;
}
int len = this.pos - index;
String number = new String(this.chars, index, len);
return Long.parseLong(number);
}
/**
* {@inheritDoc}
*/
public boolean skipOver(String substring, boolean ignoreCase) {
return skipOver(substring, ignoreCase, null);
}
/**
* {@inheritDoc}
*/
public boolean skipOver(String substring, boolean ignoreCase, CharFilter stopFilter) {
int subLength = substring.length();
if (subLength == 0) {
return true;
}
char[] subChars;
if (ignoreCase) {
subChars = substring.toLowerCase(Locale.ENGLISH).toCharArray();
} else {
subChars = substring.toCharArray();
}
// we can only find the substring at a position
// until where enough chars are left to go...
int max = this.endIndex - subLength;
char first = subChars[0];
while (this.pos <= max) {
char c = this.chars[this.pos++];
if (stopFilter != null) {
if (stopFilter.accept(c)) {
return false;
}
}
if (ignoreCase) {
c = Character.toLowerCase(c);
}
if (c == first) {
// found first character
int myCharsIndex = this.pos;
int subCharsIndex = 1;
boolean found = true;
while (subCharsIndex < subLength) {
c = this.chars[myCharsIndex++];
if (ignoreCase) {
c = Character.toLowerCase(c);
}
if (c != subChars[subCharsIndex++]) {
found = false;
break;
}
}
if (found) {
this.pos = myCharsIndex;
return true;
}
}
}
// substring not found (EOF)
this.pos = this.endIndex;
return false;
}
/**
* {@inheritDoc}
*/
public boolean expect(String exprected, boolean ignoreCase) {
int len = exprected.length();
for (int i = 0; i < len; i++) {
if (this.pos >= this.endIndex) {
return false;
}
char c = this.chars[this.pos];
char exp = exprected.charAt(i);
if (c != exp) {
if (!ignoreCase) {
return false;
}
if (Character.toLowerCase(c) != Character.toLowerCase(exp)) {
return false;
}
}
this.pos++;
}
return true;
}
/**
* {@inheritDoc}
*/
public boolean expect(char expected) {
if (this.pos < this.endIndex) {
if (this.chars[this.pos] == expected) {
this.pos++;
return true;
}
}
return false;
}
/**
* {@inheritDoc}
*/
public boolean skipUntil(char stop, char escape) {
boolean escapeActive = false;
while (this.pos < this.endIndex) {
char c = this.chars[this.pos++];
if (c == escape) {
escapeActive = !escapeActive;
} else {
if ((c == stop) && (!escapeActive)) {
return true;
}
escapeActive = false;
}
}
return false;
}
/**
* {@inheritDoc}
*/
public int skipWhile(char c) {
int currentPos = this.pos;
while (this.pos < this.endIndex) {
if (this.chars[this.pos] != c) {
break;
}
this.pos++;
}
return this.pos - currentPos;
}
/**
* {@inheritDoc}
*/
public int skipWhile(CharFilter filter) {
return skipWhile(filter, Integer.MAX_VALUE);
}
/**
* {@inheritDoc}
*/
public int skipWhile(CharFilter filter, int max) {
if (max < 0) {
throw new IllegalArgumentException("Max must NOT be negative: " + max);
}
int currentPos = this.pos;
int end = currentPos + max;
if (end < 0) {
end = max;
}
if (this.endIndex < end) {
end = this.endIndex;
}
while (this.pos < end) {
char c = this.chars[this.pos];
if (!filter.accept(c)) {
break;
}
this.pos++;
}
return this.pos - currentPos;
}
/**
* {@inheritDoc}
*/
public String readWhile(CharFilter filter) {
int currentPos = this.pos;
int len = skipWhile(filter);
if (len == 0) {
return "";
} else {
return new String(this.chars, currentPos, len);
}
}
/**
* {@inheritDoc}
*/
public String readWhile(CharFilter filter, int max) {
int currentPos = this.pos;
int len = skipWhile(filter);
if (len == 0) {
return "";
} else {
return new String(this.chars, currentPos, len);
}
}
/**
* This method gets the original string to parse.
*
* @see CharSequenceScanner#CharSequenceScanner(String)
*
* @return the original string.
*/
public String getOriginalString() {
if (this.str != null) {
this.str = new String(this.chars, this.startIndex, this.length);
}
return this.str;
}
/**
* {@inheritDoc}
*/
@Override
public String toString() {
if (this.pos < this.endIndex) {
return new String(this.chars, this.pos, this.endIndex - this.pos);
} else {
return "";
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy