All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.frameworkset.util.tokenizer.Tokenizer Maven / Gradle / Ivy

Go to download

bboss is a j2ee framework include aop/ioc,mvc,persistent,taglib,rpc,event ,bean-xml serializable and so on.http://www.bbossgroups.com

There is a newer version: 6.2.7
Show newest version
/*
 * Licensed under the GPL License. You may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://probe.jstripe.com/d/license.shtml
 *
 *  THIS PACKAGE IS PROVIDED "AS IS" AND WITHOUT ANY EXPRESS OR
 *  IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
 *  WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE.
 */

package org.frameworkset.util.tokenizer;

import java.io.IOException;
import java.io.Reader;
import java.util.Collections;
import java.util.List;

public class Tokenizer {
    public static final int TT_TOKEN = 0;
    public static final int TT_SYMBOL = 1;
    public static final int TT_BLOCK = 2;
    public static final int TT_ERROR = 3;

    private Reader reader;
    private final List symbols;
/*
    private boolean enableHidden;
    private boolean hideNonSymbols;
*/
    private int pushCount = 0;
    //
    private final TokenizerToken token;
    private final TokenizerToken upcomingToken;
    //
    private int cachePosition;
    private int cacheSize;
    private final char[] cacheBuffer;
    private int cachePinPosition;

    public Tokenizer() {
        this(null, 4096);
    }

    public Tokenizer(Reader reader) {
        this(reader, 4096);
    }

    public Tokenizer(Reader reader, int cacheBufferSize) {
        symbols = new UniqueList();
        token = new TokenizerToken();
        upcomingToken = new TokenizerToken();
        cacheBuffer = new char[cacheBufferSize];
        setReader(reader);
    }

    private void loadCache(int count) throws IOException {
        int charToRead = count == 0 ? 0 : count - 1;
        if(cachePosition + charToRead  >= cacheSize) {
            if (cacheSize == 0) {
                cacheSize = reader.read(cacheBuffer, 0, cacheBuffer.length);
                cachePosition = 0;
            } else if (cacheSize == cacheBuffer.length){
                // make sure we do not read beyond the stream
                int halfCacheSize = cacheSize / 2;
                // copy the lower half into the upper half
                System.arraycopy(cacheBuffer, halfCacheSize, cacheBuffer, 0, halfCacheSize);
                cachePosition -= halfCacheSize;
                if (cachePinPosition != -1) cachePinPosition -= halfCacheSize;

                int charsRead = reader.read(cacheBuffer, halfCacheSize, cacheSize - halfCacheSize);
                if (charsRead == -1)
                    cacheSize = halfCacheSize;
                else
                    cacheSize = charsRead + halfCacheSize;
            }
        }
    }

    public Token getToken() throws IOException {
        if (token.type == Tokenizer.TT_ERROR)
            return nextToken();
        return token;
    }

    public Token nextToken() throws IOException {
        if (pushCount > 0) {
            pushCount--;
            return token;
        } else if (upcomingToken.type != Tokenizer.TT_ERROR) {
            token.assign(upcomingToken);
            upcomingToken.type = Tokenizer.TT_ERROR;
            return token;
        } else {
            token.init();
            char b[] = new char[1];
            while (hasMore()) {
                read(b, 1);

                int symbolIndex = lookupSymbol(b[0]);

                if (symbolIndex != -1) {
                    // we have found a symbol
                    TokenizerToken workToken = token.type == Tokenizer.TT_TOKEN && token.text.length() > 0 ? upcomingToken : token;
                    TokenizerSymbol symbol = ((TokenizerSymbol)symbols.get(symbolIndex));
                    boolean hideSymbol = symbol.hidden;

                    if (!hideSymbol) {
                        workToken.init();
                        workToken.text.append(symbol.startText);
                        workToken.type = Tokenizer.TT_SYMBOL;
                        workToken.name = symbol.name;
                    }

                    if (symbol.tailText != null) {
                        // the symbol is a block
                        // look for the tailText
                        while (hasMore() && !compare(symbol.tailText.toCharArray(), 0)) {
                            read(b, 1);
                            if (!hideSymbol) {
                                workToken.text.append(b);
                                workToken.innerText.append(b);
                            }
                        }

                        if (!hideSymbol) workToken.text.append(symbol.tailText);
                        workToken.type = Tokenizer.TT_BLOCK;
                    }

                    //if (!hideSymbol) break;
                    if (token.text.length() > 0) break;
                } else {
                    token.text.append(b);
                    token.type = Tokenizer.TT_TOKEN;
                }
            }
        }
        return token;
    }

    public void pushBack() {
        pushCount++;
    }

    public void setReader(Reader reader) {
        this.reader = reader;
        cachePosition = 0;
        cachePinPosition = -1;
        cacheSize = 0;
        token.type = TT_ERROR;
        upcomingToken.type = TT_ERROR;
    }

    private boolean compare(char[] chars, int offs) throws IOException {
        char b[] = new char[chars.length-offs];
        cachePinPosition = cachePosition;
        read(b, b.length);
        for (int i=0; i < b.length; i++) {
            if (b[i] != chars[i+offs]) {
                cachePosition = cachePinPosition;
                cachePinPosition = -1;
                return false;
            }
        }
        return true;
    }

    private int lookupSymbol(char b) throws IOException {
        int result = -1;

        Character c = new Character(b);
        int index = Collections.binarySearch(symbols, c);

        if (index >= 0) {
            // the index could be anywhere within a group of sybols with the same first letter
            // so we need to scroll up the group to make sure we start test from the beginning
            while (index > 0 && ((TokenizerSymbol) symbols.get(index-1)).compareTo(c) == 0) index--;
            while (index < symbols.size()) {
                TokenizerSymbol symbol = ((TokenizerSymbol) symbols.get(index));
                if (symbol.compareTo(c) == 0) {
                    if (compare(symbol.startText.toCharArray(), 1)) {
                        result = index;
                        break;
                    } else {
                        index++;
                    }
                } else {
                    break;
                }
            }
        }
        return result;
    }

    private void read(char[] b, int count) throws IOException {
        loadCache(count);
        int endPoint = cachePosition + count - 1 >= cacheSize ? cacheSize : cachePosition + count - 1;
        if (cachePosition <= endPoint) System.arraycopy(cacheBuffer, cachePosition, b, 0, endPoint - cachePosition+1);
        cachePosition = endPoint+1;
    }

    public boolean hasMore() throws IOException {
        loadCache(0);
        return (cachePosition < cacheSize) || upcomingToken.type != Tokenizer.TT_ERROR || pushCount > 0;
    }

    public void addSymbol(String text) {
        symbols.add(new TokenizerSymbol(null, text, null, false, false, true, false));
    }

    public void addSymbol(String text, boolean hidden) {
        symbols.add(new TokenizerSymbol(null, text, null, hidden, false, true, false));
    }

    public void addSymbol(String startText, String endText, boolean hidden) {
        symbols.add(new TokenizerSymbol(null, startText, endText, hidden, false, true, false));
    }

    public void addSymbol(TokenizerSymbol symbol) {
        symbols.add(symbol);
    }

    public String getNextString(String defaultValue) throws IOException {
        return hasMore() ? nextToken().getInnerText() : defaultValue;
    }

    public boolean getNextBoolean(String trueValue, boolean defaultValue) throws IOException {
        return hasMore() ? trueValue.equalsIgnoreCase(nextToken().getInnerText()) : defaultValue;
    }

    public long getNextLong(long defaultValue) throws IOException {
        String stval = getNextString(null);

        if (stval == null) return defaultValue;

        try {
            return Long.parseLong(stval);
        } catch (NumberFormatException e) {
            return defaultValue;
        }
    }
}

class TokenizerToken implements Token{
    final StringBuilder text = new StringBuilder();
    final StringBuilder innerText = new StringBuilder();
    String name = "";
    int type = Tokenizer.TT_ERROR;
    int line = 0;
    int col = 0;

    public TokenizerToken() {
        type = Tokenizer.TT_ERROR;
    }

    public String getText() {
        return text.toString();
    }

    public String getInnerText() {
        return type == Tokenizer.TT_BLOCK ? innerText.toString() : getText();
    }

    public String getName() {
        return name;
    }

    public int getType() {
        return type;
    }

    public int getLine() {
        return line;
    }

    public int getCol() {
        return col;
    }

    public String toString() {
        return getText();
    }

    public void assign(TokenizerToken token) {
        this.text.setLength(0);
        this.text.append(token.text);
        this.innerText.setLength(0);
        this.innerText.append(token.innerText);
        this.name = token.name;
        this.type = token.type;
        this.col = token.col;
        this.line = token.line;
    }

    public void init() {
        text.setLength(0);
        innerText.setLength(0);
        name = "";
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy