All Downloads are FREE. Search and download functionalities are using the official Maven repository.

software.amazon.awssdk.codegen.poet.rules2.Tokenizer Maven / Gradle / Ivy

/*
 * Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License").
 * You may not use this file except in compliance with the License.
 * A copy of the License is located at
 *
 *  http://aws.amazon.com/apache2.0
 *
 * or in the "license" file accompanying this file. This file is distributed
 * on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing
 * permissions and limitations under the License.
 */

package software.amazon.awssdk.codegen.poet.rules2;

import java.util.ArrayList;
import java.util.List;
import java.util.function.BiConsumer;
import java.util.function.Consumer;
import software.amazon.awssdk.utils.ToString;

/**
 * Tokenizer for string literals inside a rule set document.
 */
public class Tokenizer {
    private static final Token EOF = new Token(TokenKind.EOF, "");
    private final List tokens;
    private int index = 0;

    public Tokenizer(String source) {
        this.tokens = tokenize(source);
    }

    private static List tokenize(String source) {
        List tokens = new ArrayList<>();
        TokenizerState state = new TokenizerState(source);
        do {
            Token token = next(state);
            tokens.add(token);
            if (token.type == TokenKind.EOF) {
                break;
            }
        } while (true);
        return tokens;
    }

    private static Token next(TokenizerState state) {
        if (!state.hasNext()) {
            return EOF;
        }
        char ch = state.next();
        if (ch == '{') {
            if (state.peek() == '{') {
                state.next();
                return consumeString(state, '{');
            }
            return new Token(TokenKind.OPEN_CURLY, "{");
        }
        if (ch == '}') {
            if (state.peek() == '}') {
                state.next();
                return consumeString(state, '}');
            }
            return new Token(TokenKind.CLOSE_CURLY, "}");
        }
        if (ch == '[') {
            if (state.peek() == '[') {
                state.next();
                return consumeString(state, '[');
            }
            return new Token(TokenKind.OPEN_SQUARE, "[");
        }
        if (ch == ']') {
            if (state.peek() == ']') {
                state.next();
                return consumeString(state, ']');
            }
            return new Token(TokenKind.CLOSE_SQUARE, "]");
        }
        if (ch == '#') {
            return new Token(TokenKind.HASH, "#");
        }
        if (isDigit(ch)) {
            return consumeNumber(state, ch);
        }
        if (isIdentifierStart(ch)) {
            return consumeIdentifierOrString(state, ch);
        }
        return consumeString(state, ch);
    }

    private static Token consumeNumber(TokenizerState state, char start) {
        StringBuilder buf = new StringBuilder();
        buf.append(start);
        do {
            char ch = state.peek();
            if (!isDigit(ch)) {
                break;
            }
            buf.append(state.next());
        } while (true);
        return new Token(TokenKind.NUMBER, buf.toString());
    }

    private static Token consumeIdentifierOrString(TokenizerState state, char start) {
        StringBuilder buf = new StringBuilder();
        buf.append(start);
        char ch;
        do {
            ch = state.peek();
            if (!isIdentifierPart(ch)) {
                break;
            }
            buf.append(state.next());
        } while (true);
        if (isSpecialChar(ch)) {
            return new Token(TokenKind.IDENTIFIER, buf.toString());
        }
        return consumeString(state, buf);
    }

    private static Token consumeString(TokenizerState state, char start) {
        StringBuilder buf = new StringBuilder();
        buf.append(start);
        return consumeString(state, buf);
    }

    private static Token consumeString(TokenizerState state, StringBuilder buf) {
        do {
            char ch = state.peek();
            if (isSpecialChar(ch)) {
                break;
            }
            buf.append(state.next());
        } while (true);
        return new Token(TokenKind.STRING, buf.toString());
    }

    private static boolean isSpecialChar(char ch) {
        switch (ch) {
            case 0:
            case '{':
            case '}':
            case '[':
            case ']':
            case '#':
                return true;
            default:
                return false;
        }
    }

    private static boolean isIdentifierStart(char ch) {
        return (ch >= 'a' && ch <= 'z')
               || (ch >= 'A' && ch <= 'Z')
               || (ch == '_');
    }

    private static boolean isIdentifierPart(char ch) {
        return (ch >= 'a' && ch <= 'z')
               || (ch >= 'A' && ch <= 'Z')
               || (ch >= '0' && ch <= '9')
               || (ch == '_');
    }

    private static boolean isDigit(char ch) {
        return ch >= '0' && ch <= '9';
    }

    public Token peek() {
        if (index >= tokens.size()) {
            // This should never happen.
            throw new IllegalStateException("Peek called with out of bounds index");
        }
        return tokens.get(index);
    }

    public Token next() {
        if (atEof()) {
            return EOF;
        }
        Token res = tokens.get(index);
        index += 1;
        return res;
    }

    public boolean matches(TokenKind... kinds) {
        if (index + kinds.length >= tokens.size()) {
            return false;
        }
        for (int idx = 0; idx < kinds.length; idx++) {
            if (tokens.get(index + idx).type != kinds[idx]) {
                return false;
            }
        }
        return true;
    }

    // e.g., resourceId[123]
    public boolean isIndexedAccess() {
        return matches(TokenKind.IDENTIFIER, TokenKind.OPEN_SQUARE, TokenKind.NUMBER, TokenKind.CLOSE_SQUARE);
    }

    public void consumeIndexed(BiConsumer consumer) {
        if (!isIndexedAccess()) {
            throw new IllegalStateException("not at indexed");
        }
        consumer.accept(tokens.get(index).value, Integer.parseInt(tokens.get(index + 2).value));
        index += 4;
    }

    // e.g., [123]
    public boolean isDirectIndexedAccess() {
        return matches(TokenKind.OPEN_SQUARE, TokenKind.NUMBER, TokenKind.CLOSE_SQUARE);
    }

    public void consumeDirectIndexed(Consumer consumer) {
        if (!isDirectIndexedAccess()) {
            throw new IllegalStateException("not at direct indexed");
        }
        consumer.accept(Integer.parseInt(tokens.get(index + 1).value));
        index += 3;
    }

    // e.g., {url#scheme}
    public boolean isNamedAccess() {
        return matches(TokenKind.OPEN_CURLY, TokenKind.IDENTIFIER, TokenKind.HASH, TokenKind.IDENTIFIER, TokenKind.CLOSE_CURLY);
    }

    public void consumeNamedAccess(BiConsumer consumer) {
        if (!isNamedAccess()) {
            throw new IllegalStateException("not at named access");
        }
        consumer.accept(tokens.get(index + 1).value, tokens.get(index + 3).value);
        index += 5;
    }

    // e.g., {Region}
    public boolean isReference() {
        return matches(TokenKind.OPEN_CURLY, TokenKind.IDENTIFIER, TokenKind.CLOSE_CURLY);
    }

    public boolean isIdentifier() {
        return matches(TokenKind.IDENTIFIER);
    }

    public void consumeIdentifier(Consumer consumer) {
        if (!isIdentifier()) {
            throw new IllegalStateException("not at identifier");
        }
        consumer.accept(tokens.get(index).value);
        index += 1;
    }

    public void expectAtEof(String state) {
        if (!atEof()) {
            throw new IllegalArgumentException(
                String.format("unexpected extra tokens while parsing %s, starting at: %s", state, peek()));
        }
    }

    public void consumeReferenceAccess(Consumer consumer) {
        if (!isReference()) {
            throw new IllegalStateException("not at reference expression");
        }
        consumer.accept(tokens.get(index + 1).value);
        index += 3;
    }

    public boolean atEof() {
        return index >= tokens.size() - 1;
    }

    enum TokenKind {
        STRING,
        NUMBER,
        IDENTIFIER,
        HASH,
        OPEN_CURLY,
        CLOSE_CURLY,
        OPEN_SQUARE,
        CLOSE_SQUARE,
        EOF,
    }

    static class TokenizerState {
        private final String source;
        private int index = 0;

        TokenizerState(String source) {
            this.source = source;
        }

        public char peek() {
            if (index == source.length()) {
                return 0;
            }
            return source.charAt(index);
        }

        public boolean hasNext() {
            return index < source.length();
        }

        public char next() {
            if (index == source.length()) {
                return 0;
            }
            char res = source.charAt(index);
            index += 1;
            return res;
        }
    }

    static class Token {
        private final TokenKind type;
        private final String value;

        Token(TokenKind type, String value) {
            this.type = type;
            this.value = value;
        }

        public TokenKind type() {
            return type;
        }

        public String value() {
            return value;
        }

        @Override
        public String toString() {
            return ToString.builder("Token")
                           .add("type", type)
                           .add("value", value)
                           .build();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy