org.frameworkset.util.tokenizer.Tokenizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of bboss-util Show documentation
Show all versions of bboss-util Show documentation
bboss is a j2ee framework include aop/ioc,mvc,persistent,taglib,rpc,event ,bean-xml serializable and so on.http://www.bbossgroups.com
/*
* Licensed under the GPL License. You may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://probe.jstripe.com/d/license.shtml
*
* THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
* WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
*/
package org.frameworkset.util.tokenizer;
import java.io.IOException;
import java.io.Reader;
import java.util.Collections;
import java.util.List;
public class Tokenizer {
public static final int TT_TOKEN = 0;
public static final int TT_SYMBOL = 1;
public static final int TT_BLOCK = 2;
public static final int TT_ERROR = 3;
private Reader reader;
private final List symbols;
/*
private boolean enableHidden;
private boolean hideNonSymbols;
*/
private int pushCount = 0;
//
private final TokenizerToken token;
private final TokenizerToken upcomingToken;
//
private int cachePosition;
private int cacheSize;
private final char[] cacheBuffer;
private int cachePinPosition;
public Tokenizer() {
this(null, 4096);
}
public Tokenizer(Reader reader) {
this(reader, 4096);
}
public Tokenizer(Reader reader, int cacheBufferSize) {
symbols = new UniqueList();
token = new TokenizerToken();
upcomingToken = new TokenizerToken();
cacheBuffer = new char[cacheBufferSize];
setReader(reader);
}
private void loadCache(int count) throws IOException {
int charToRead = count == 0 ? 0 : count - 1;
if(cachePosition + charToRead >= cacheSize) {
if (cacheSize == 0) {
cacheSize = reader.read(cacheBuffer, 0, cacheBuffer.length);
cachePosition = 0;
} else if (cacheSize == cacheBuffer.length){
// make sure we do not read beyond the stream
int halfCacheSize = cacheSize / 2;
// copy the lower half into the upper half
System.arraycopy(cacheBuffer, halfCacheSize, cacheBuffer, 0, halfCacheSize);
cachePosition -= halfCacheSize;
if (cachePinPosition != -1) cachePinPosition -= halfCacheSize;
int charsRead = reader.read(cacheBuffer, halfCacheSize, cacheSize - halfCacheSize);
if (charsRead == -1)
cacheSize = halfCacheSize;
else
cacheSize = charsRead + halfCacheSize;
}
}
}
public Token getToken() throws IOException {
if (token.type == Tokenizer.TT_ERROR)
return nextToken();
return token;
}
public Token nextToken() throws IOException {
if (pushCount > 0) {
pushCount--;
return token;
} else if (upcomingToken.type != Tokenizer.TT_ERROR) {
token.assign(upcomingToken);
upcomingToken.type = Tokenizer.TT_ERROR;
return token;
} else {
token.init();
char b[] = new char[1];
while (hasMore()) {
read(b, 1);
int symbolIndex = lookupSymbol(b[0]);
if (symbolIndex != -1) {
// we have found a symbol
TokenizerToken workToken = token.type == Tokenizer.TT_TOKEN && token.text.length() > 0 ? upcomingToken : token;
TokenizerSymbol symbol = ((TokenizerSymbol)symbols.get(symbolIndex));
boolean hideSymbol = symbol.hidden;
if (!hideSymbol) {
workToken.init();
workToken.text.append(symbol.startText);
workToken.type = Tokenizer.TT_SYMBOL;
workToken.name = symbol.name;
}
if (symbol.tailText != null) {
// the symbol is a block
// look for the tailText
while (hasMore() && !compare(symbol.tailText.toCharArray(), 0)) {
read(b, 1);
if (!hideSymbol) {
workToken.text.append(b);
workToken.innerText.append(b);
}
}
if (!hideSymbol) workToken.text.append(symbol.tailText);
workToken.type = Tokenizer.TT_BLOCK;
}
//if (!hideSymbol) break;
if (token.text.length() > 0) break;
} else {
token.text.append(b);
token.type = Tokenizer.TT_TOKEN;
}
}
}
return token;
}
public void pushBack() {
pushCount++;
}
public void setReader(Reader reader) {
this.reader = reader;
cachePosition = 0;
cachePinPosition = -1;
cacheSize = 0;
token.type = TT_ERROR;
upcomingToken.type = TT_ERROR;
}
private boolean compare(char[] chars, int offs) throws IOException {
char b[] = new char[chars.length-offs];
cachePinPosition = cachePosition;
read(b, b.length);
for (int i=0; i < b.length; i++) {
if (b[i] != chars[i+offs]) {
cachePosition = cachePinPosition;
cachePinPosition = -1;
return false;
}
}
return true;
}
private int lookupSymbol(char b) throws IOException {
int result = -1;
Character c = new Character(b);
int index = Collections.binarySearch(symbols, c);
if (index >= 0) {
// the index could be anywhere within a group of sybols with the same first letter
// so we need to scroll up the group to make sure we start test from the beginning
while (index > 0 && ((TokenizerSymbol) symbols.get(index-1)).compareTo(c) == 0) index--;
while (index < symbols.size()) {
TokenizerSymbol symbol = ((TokenizerSymbol) symbols.get(index));
if (symbol.compareTo(c) == 0) {
if (compare(symbol.startText.toCharArray(), 1)) {
result = index;
break;
} else {
index++;
}
} else {
break;
}
}
}
return result;
}
private void read(char[] b, int count) throws IOException {
loadCache(count);
int endPoint = cachePosition + count - 1 >= cacheSize ? cacheSize : cachePosition + count - 1;
if (cachePosition <= endPoint) System.arraycopy(cacheBuffer, cachePosition, b, 0, endPoint - cachePosition+1);
cachePosition = endPoint+1;
}
public boolean hasMore() throws IOException {
loadCache(0);
return (cachePosition < cacheSize) || upcomingToken.type != Tokenizer.TT_ERROR || pushCount > 0;
}
public void addSymbol(String text) {
symbols.add(new TokenizerSymbol(null, text, null, false, false, true, false));
}
public void addSymbol(String text, boolean hidden) {
symbols.add(new TokenizerSymbol(null, text, null, hidden, false, true, false));
}
public void addSymbol(String startText, String endText, boolean hidden) {
symbols.add(new TokenizerSymbol(null, startText, endText, hidden, false, true, false));
}
public void addSymbol(TokenizerSymbol symbol) {
symbols.add(symbol);
}
public String getNextString(String defaultValue) throws IOException {
return hasMore() ? nextToken().getInnerText() : defaultValue;
}
public boolean getNextBoolean(String trueValue, boolean defaultValue) throws IOException {
return hasMore() ? trueValue.equalsIgnoreCase(nextToken().getInnerText()) : defaultValue;
}
public long getNextLong(long defaultValue) throws IOException {
String stval = getNextString(null);
if (stval == null) return defaultValue;
try {
return Long.parseLong(stval);
} catch (NumberFormatException e) {
return defaultValue;
}
}
}
class TokenizerToken implements Token{
final StringBuffer text = new StringBuffer();
final StringBuffer innerText = new StringBuffer();
String name = "";
int type = Tokenizer.TT_ERROR;
int line = 0;
int col = 0;
public TokenizerToken() {
type = Tokenizer.TT_ERROR;
}
public String getText() {
return text.toString();
}
public String getInnerText() {
return type == Tokenizer.TT_BLOCK ? innerText.toString() : getText();
}
public String getName() {
return name;
}
public int getType() {
return type;
}
public int getLine() {
return line;
}
public int getCol() {
return col;
}
public String toString() {
return getText();
}
public void assign(TokenizerToken token) {
this.text.setLength(0);
this.text.append(token.text);
this.innerText.setLength(0);
this.innerText.append(token.innerText);
this.name = token.name;
this.type = token.type;
this.col = token.col;
this.line = token.line;
}
public void init() {
text.setLength(0);
innerText.setLength(0);
name = "";
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy