com.hp.hpl.sparta.xpath.SimpleStreamTokenizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pinyin4j Show documentation
Show all versions of pinyin4j Show documentation
Support Chinese character (both Simplified and Tranditional) to most popular Pinyin systems, including
Hanyu Pinyin, Tongyong Pinyin, Wade-Giles, MPS2, Yale and Gwoyeu Romatzyh. Support multiple pronounciations and
customized output.
The newest version!
package com.hp.hpl.sparta.xpath;
import java.io.IOException;
import java.io.Reader;
/** Simplified replacement for java.util.StreamTokenizer which is not
avilable in J2ME.
Copyright (C) 2002 Hewlett-Packard Company.
This file is part of Sparta, an XML Parser, DOM, and XPath library.
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU
Lesser General Public License as published by the Free Software
Foundation; either version 2.1 of the License, or (at your option)
any later version. This library is distributed in the hope that it
will be useful, but WITHOUT ANY WARRANTY; without even the implied
warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
PURPOSE.
@version $Date: 2003/07/28 04:34:31 $ $Revision: 1.2 $
@author Eamonn O'Brien-Strain
*/
public class SimpleStreamTokenizer {
public static final int TT_EOF = -1;
public static final int TT_NUMBER = -2;
public static final int TT_WORD = -3;
public int ttype = Integer.MIN_VALUE;
public int nval = Integer.MIN_VALUE;
public String sval = "";
static private final int WHITESPACE = -5;
static private final int QUOTE = -6;
private final StringBuffer buf_ = new StringBuffer();
private int nextType_;
private final Reader reader_;
private final int[] charType_ = new int[256];
private boolean pushedBack_ = false;
private char inQuote_ = 0;
public String toString() {
switch (ttype) {
case TT_NUMBER:
return Integer.toString(nval);
case TT_WORD:
case '\"':
return "\"" + sval + "\"";
case '\'':
return "\'" + sval + "\'";
case TT_EOF:
return "(EOF)";
default:
return "\'" + (char) ttype + "\'";
}
}
public SimpleStreamTokenizer(Reader reader) throws IOException {
reader_ = reader;
for (char ch = 0; ch < charType_.length; ++ch) {
if ('A' <= ch && ch <= 'Z' || 'a' <= ch && ch <= 'z' || ch == '-')
charType_[ch] = TT_WORD;
else if ('0' <= ch && ch <= '9')
charType_[ch] = TT_NUMBER;
else if ('\u0000' <= ch && ch <= '\u0020')
charType_[ch] = WHITESPACE;
else
charType_[ch] = ch;
}
nextToken();
}
/** precondition 0<=ch && ch<128 */
public void ordinaryChar(char ch) {
charType_[ch] = ch;
}
/** precondition 0<=ch && ch<128 */
public void wordChars(char from, char to) {
for (char ch = from; ch <= to; ++ch)
charType_[ch] = TT_WORD;
}
public int nextToken() throws IOException {
if (pushedBack_) {
pushedBack_ = false;
return ttype;
}
ttype = nextType_;
while (true) {
int ch;
int currentType;
boolean transition = false;
boolean whitespace;
do {
ch = reader_.read();
if (ch == -1) {
if (inQuote_ != 0) throw new IOException("Unterminated quote");
currentType = TT_EOF;
} else
currentType = charType_[ch];
whitespace = inQuote_ == 0 && currentType == WHITESPACE;
transition = transition || whitespace;
} while (whitespace);
if (currentType == '\'' || currentType == '\"') {
if (inQuote_ == 0)
inQuote_ = (char) currentType;
else {
if (inQuote_ == currentType) inQuote_ = 0;
}
}
if (inQuote_ != 0) currentType = inQuote_;
transition =
transition || (ttype >= TT_EOF && ttype != '\'' && ttype != '\"') || ttype != currentType;
if (transition) {
//transition: we have a token to emit
switch (ttype) {
case TT_WORD:
sval = buf_.toString();
buf_.setLength(0);
break;
case '\'':
case '\"':
sval = buf_.toString().substring(1, buf_.length() - 1);
buf_.setLength(0);
break;
case TT_NUMBER:
nval = Integer.parseInt(buf_.toString());
buf_.setLength(0);
break;
default:
break;
}
if (currentType != WHITESPACE) nextType_ = currentType == QUOTE ? ch : currentType;
}
switch (currentType) {
case TT_WORD:
case TT_NUMBER:
case '\'':
case '\"':
buf_.append((char) ch);
break;
default:
break;
}
if (transition) return ttype;
}
}
public void pushBack() {
pushedBack_ = true;
}
}