com.ibm.icu.impl.data.TokenIterator Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of icu4j Show documentation
Show all versions of icu4j Show documentation
International Component for Unicode for Java (ICU4J) is a mature, widely used Java library
providing Unicode and Globalization support
/*
**********************************************************************
* Copyright (c) 2004-2011, International Business Machines
* Corporation and others. All Rights Reserved.
**********************************************************************
* Author: Alan Liu
* Created: March 16 2004
* Since: ICU 3.0
**********************************************************************
*/
package com.ibm.icu.impl.data;
import java.io.IOException;
import com.ibm.icu.impl.PatternProps;
import com.ibm.icu.impl.Utility;
import com.ibm.icu.text.UTF16;
/**
* An iterator class that returns successive string tokens from some
* source. String tokens are, in general, separated by Pattern_White_Space
* in the source test. Furthermore, they may be delimited by
* either single or double quotes (opening and closing quotes must
* match). Escapes are processed using standard ICU unescaping.
*/
public class TokenIterator {
private ResourceReader reader;
private String line;
private StringBuffer buf;
private boolean done;
private int pos;
private int lastpos;
/**
* Construct an iterator over the tokens returned by the given
* ResourceReader, ignoring blank lines and comment lines (first
* non-blank character is '#'). Note that trailing comments on a
* line, beginning with the first unquoted '#', are recognized.
*/
public TokenIterator(ResourceReader r) {
reader = r;
line = null;
done = false;
buf = new StringBuffer();
pos = lastpos = -1;
}
/**
* Return the next token from this iterator, or null if the last
* token has been returned.
*/
public String next() throws IOException {
if (done) {
return null;
}
for (;;) {
if (line == null) {
line = reader.readLineSkippingComments();
if (line == null) {
done = true;
return null;
}
pos = 0;
}
buf.setLength(0);
lastpos = pos;
pos = nextToken(pos);
if (pos < 0) {
line = null;
continue;
}
return buf.toString();
}
}
/**
* Return the one-based line number of the line of the last token returned by
* next(). Should only be called
* after a call to next(); otherwise the return
* value is undefined.
*/
public int getLineNumber() {
return reader.getLineNumber();
}
/**
* Return a string description of the position of the last line
* returned by readLine() or readLineSkippingComments().
*/
public String describePosition() {
return reader.describePosition() + ':' + (lastpos+1);
}
/**
* Read the next token from 'this.line' and append it to
* 'this.buf'. Tokens are separated by Pattern_White_Space. Tokens
* may also be delimited by double or single quotes. The closing
* quote must match the opening quote. If a '#' is encountered,
* the rest of the line is ignored, unless it is backslash-escaped
* or within quotes.
* @param position the offset into the string
* @return offset to the next character to read from line, or if
* the end of the line is reached without scanning a valid token,
* -1
*/
private int nextToken(int position) {
position = PatternProps.skipWhiteSpace(line, position);
if (position == line.length()) {
return -1;
}
int startpos = position;
char c = line.charAt(position++);
char quote = 0;
switch (c) {
case '"':
case '\'':
quote = c;
break;
case '#':
return -1;
default:
buf.append(c);
break;
}
int[] posref = null;
while (position < line.length()) {
c = line.charAt(position); // 16-bit ok
if (c == '\\') {
if (posref == null) {
posref = new int[1];
}
posref[0] = position+1;
int c32 = Utility.unescapeAt(line, posref);
if (c32 < 0) {
throw new RuntimeException("Invalid escape at " +
reader.describePosition() + ':' +
position);
}
UTF16.append(buf, c32);
position = posref[0];
} else if ((quote != 0 && c == quote) ||
(quote == 0 && PatternProps.isWhiteSpace(c))) {
return ++position;
} else if (quote == 0 && c == '#') {
return position; // do NOT increment
} else {
buf.append(c);
++position;
}
}
if (quote != 0) {
throw new RuntimeException("Unterminated quote at " +
reader.describePosition() + ':' +
startpos);
}
return position;
}
}