All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.oracle.graal.python.pegparser.AbstractParser Maven / Gradle / Ivy

There is a newer version: 24.1.1
Show newest version
/*
 * Copyright (c) 2021, 2022, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * The Universal Permissive License (UPL), Version 1.0
 *
 * Subject to the condition set forth below, permission is hereby granted to any
 * person obtaining a copy of this software, associated documentation and/or
 * data (collectively the "Software"), free of charge and under any and all
 * copyright rights in the Software, and any and all patent rights owned or
 * freely licensable by each licensor hereunder covering either (i) the
 * unmodified Software as contributed to or provided by such licensor, or (ii)
 * the Larger Works (as defined below), to deal in both
 *
 * (a) the Software, and
 *
 * (b) any piece of software and/or hardware listed in the lrgrwrks.txt file if
 * one is included with the Software each a "Larger Work" to which the Software
 * is contributed by such licensors),
 *
 * without restriction, including without limitation the rights to copy, create
 * derivative works of, display, perform, and distribute the Software and make,
 * use, sell, offer for sale, import, export, have made, and have sold the
 * Software and the Larger Work(s), and to sublicense the foregoing rights on
 * either these or other terms.
 *
 * This license is subject to the following condition:
 *
 * The above copyright notice and either this complete permission notice or at a
 * minimum a reference to the UPL must be included in all copies or substantial
 * portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 */
package com.oracle.graal.python.pegparser;

import static com.oracle.graal.python.pegparser.tokenizer.Token.Kind.DEDENT;
import static com.oracle.graal.python.pegparser.tokenizer.Token.Kind.ERRORTOKEN;
import static com.oracle.graal.python.pegparser.tokenizer.Token.Kind.INDENT;

import java.lang.reflect.Array;
import java.math.BigInteger;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.EnumSet;
import java.util.List;
import java.util.function.Supplier;

import com.oracle.graal.python.pegparser.sst.ArgTy;
import com.oracle.graal.python.pegparser.sst.CmpOpTy;
import com.oracle.graal.python.pegparser.sst.ComprehensionTy;
import com.oracle.graal.python.pegparser.sst.ConstantValue;
import com.oracle.graal.python.pegparser.sst.ConstantValue.Kind;
import com.oracle.graal.python.pegparser.sst.ExprContextTy;
import com.oracle.graal.python.pegparser.sst.ExprTy;
import com.oracle.graal.python.pegparser.sst.KeywordTy;
import com.oracle.graal.python.pegparser.sst.ModTy;
import com.oracle.graal.python.pegparser.sst.PatternTy;
import com.oracle.graal.python.pegparser.sst.SSTNode;
import com.oracle.graal.python.pegparser.sst.StmtTy;
import com.oracle.graal.python.pegparser.sst.TypeIgnoreTy;
import com.oracle.graal.python.pegparser.tokenizer.SourceRange;
import com.oracle.graal.python.pegparser.tokenizer.Token;
import com.oracle.graal.python.pegparser.tokenizer.Tokenizer;
import com.oracle.graal.python.pegparser.tokenizer.Tokenizer.Flag;

/**
 * From this class is extended the generated parser. It allow access to the tokenizer. The methods
 * defined in this class are mostly equivalents to those defined in CPython's {@code pegen.c}. This
 * allows us to keep the actions and parser generator very similar to CPython for easier updating in
 * the future.
 */
public abstract class AbstractParser {
    static final ExprTy[] EMPTY_EXPR_ARRAY = new ExprTy[0];
    static final KeywordTy[] EMPTY_KEYWORD_ARRAY = new KeywordTy[0];
    static final ArgTy[] EMPTY_ARG_ARRAY = new ArgTy[0];

    /**
     * Corresponds to TARGET_TYPES in CPython
     */
    public enum TargetsType {
        STAR_TARGETS,
        DEL_TARGETS,
        FOR_TARGETS
    }

    public enum Flags {
        /**
         * Corresponds to PyPARSE_BARRY_AS_BDFL, check whether <> should be used instead != .
         */
        BARRY_AS_BDFL,
        /**
         * Corresponds to PyPARSE_TYPE_COMMENTS.
         */
        TYPE_COMMENTS,

        /**
         * Corresponds to fp_interactive and prompt != NULL in struct tok_state.
         */
        INTERACTIVE_TERMINAL,
        ASYNC_HACKS
    }

    private static final String BARRY_AS_BDFL = "with Barry as BDFL, use '<>' instead of '!='";

    private int currentPos; // position of the mark
    private final ArrayList tokens;
    private final Tokenizer tokenizer;
    private final ErrorCallback errorCb;
    protected final NodeFactory factory;
    private final PythonStringFactory stringFactory;
    private final InputType startRule;

    private final EnumSet flags;
    private final int featureVersion;

    protected int level = 0;
    protected boolean callInvalidRules = false;

    private boolean parsingStarted;

    /**
     * Indicates, whether there was found an error
     */
    protected boolean errorIndicator = false;

    private ExprTy.Name cachedDummyName;

    protected final RuleResultCache cache = new RuleResultCache<>(this);
    protected final ArrayList comments = new ArrayList<>();

    private final Object[][][] reservedKeywords;
    private final String[] softKeywords;

    protected abstract Object[][][] getReservedKeywords();

    protected abstract String[] getSoftKeywords();

    protected abstract SSTNode runParser(InputType inputType);

    AbstractParser(String source, SourceRange sourceRange, PythonStringFactory stringFactory, ErrorCallback errorCb, InputType startRule, EnumSet flags, int featureVersion) {
        this.currentPos = 0;
        this.tokens = new ArrayList<>();
        this.tokenizer = Tokenizer.fromString(errorCb, source, getTokenizerFlags(startRule, flags), sourceRange);
        this.factory = new NodeFactory();
        this.errorCb = errorCb;
        this.stringFactory = stringFactory;
        this.reservedKeywords = getReservedKeywords();
        this.softKeywords = getSoftKeywords();
        this.startRule = startRule;
        this.flags = flags;
        this.featureVersion = featureVersion;
    }

    private static EnumSet getTokenizerFlags(InputType type, EnumSet parserFlags) {
        EnumSet flags = EnumSet.noneOf(Tokenizer.Flag.class);
        if (type == InputType.FILE) {
            flags.add(Tokenizer.Flag.EXEC_INPUT);
        } else if (type == InputType.SINGLE && parserFlags.contains(Flags.INTERACTIVE_TERMINAL)) {
            flags.add(Tokenizer.Flag.INTERACTIVE);
        }
        if (parserFlags.contains(Flags.TYPE_COMMENTS)) {
            flags.add(Tokenizer.Flag.TYPE_COMMENT);
        }
        if (parserFlags.contains(Flags.ASYNC_HACKS)) {
            flags.add(Tokenizer.Flag.ASYNC_HACKS);
        }
        return flags;
    }

    public SSTNode parse() {
        SSTNode res = runParser(startRule);
        if (res == null) {
            resetParserState();
            runParser(startRule);
            if (errorIndicator) {
                // shouldn't we return at least wrong AST based on a option?
                return null;
            }
            int fill = getFill();
            if (fill == 0) {
                raiseSyntaxError("error at start before reading any input");
            } else if (peekToken(fill - 1).type == Token.Kind.ERRORTOKEN && tokenizer.getDone() == Tokenizer.StatusCode.EOF) {
                if (tokenizer.getParensNestingLevel() > 0) {
                    raiseUnclosedParenthesesError();
                } else {
                    raiseSyntaxError("unexpected EOF while parsing");
                }
            } else {
                if (peekToken(fill - 1).type == INDENT) {
                    raiseIndentationError("unexpected indent");
                } else if (peekToken(fill - 1).type == DEDENT) {
                    raiseIndentationError("unexpected unindent");
                } else {
                    raiseSyntaxErrorKnownLocation(peekToken(fill - 1), "invalid syntax");
                }
            }
        }
        if (startRule == InputType.SINGLE && tokenizer.isBadSingleStatement()) {
            return raiseSyntaxError("multiple statements found while compiling a single statement");
        }

        return res;
    }

    private void resetParserState() {
        errorIndicator = false;
        callInvalidRules = true;
        level = 0;
        cache.clear();
        currentPos = 0;
        tokenizer.reportIncompleteSourceIfInteractive = false;
    }

    /**
     * Get position in the tokenizer.
     *
     * @return the position in tokenizer.
     */
    public int mark() {
        return currentPos;
    }

    /**
     * Reset position in the tokenizer
     *
     * @param position where the tokenizer should set the current position
     */
    public void reset(int position) {
        currentPos = position;
    }

    /**
     * Is the expected token on the current position in tokenizer? If there is the expected token,
     * then the current position in tokenizer is changed to the next token.
     *
     * @param tokenKind - the token kind that is expected on the current position
     * @return The expected token or null if the token on the current position is not the expected
     *         one.
     */
    public Token expect(int tokenKind) {
        Token token = getAndInitializeToken();
        if (token.type == tokenKind) {
            currentPos++;
            return token;
        }
        return null;
    }

    /**
     * Is the expected token on the current position in tokenizer? If there is the expected token,
     * then the current position in tokenizer is changed to the next token.
     *
     * @param text - the token on the current position has to have this text
     * @return The expected token or null if the token on the current position is not the expected
     *         one.
     */
    public Token expect(String text) {
        Token token = getAndInitializeToken();
        if (text.equals(getText(token))) {
            currentPos++;
            return token;
        }
        return null;
    }

    /**
     * Check if the next token that'll be read is if the expected kind. This has does not advance
     * the tokenizer, in contrast to {@link #expect(int)}.
     */
    protected boolean lookahead(boolean match, int kind) {
        int pos = mark();
        Token token = expect(kind);
        reset(pos);
        return (token != null) == match;
    }

    /**
     * Check if the next token that'll be read is if the expected kind. This has does not advance
     * the tokenizer, in contrast to {@link #expect(String)}.
     */
    protected boolean lookahead(boolean match, String text) {
        int pos = mark();
        Token token = expect(text);
        reset(pos);
        return (token != null) == match;
    }

    /**
     * Shortcut to Tokenizer.getText(Token)
     */
    public String getText(Token token) {
        if (token == null) {
            return null;
        }
        return tokenizer.getTokenString(token);
    }

    /**
     * equivalent to _PyPegen_fill_token in that it modifies the token, and does not advance
     */
    public Token getAndInitializeToken() {
        if (currentPos < getFill()) {
            return peekToken(currentPos);
        }
        Token token = tokenizer.next();
        while (token.type == Token.Kind.TYPE_IGNORE) {
            String tag = getText(token);
            comments.add(factory.createTypeIgnore(token.sourceRange.startLine, tag, token.sourceRange));
            token = tokenizer.next();
        }

        if (startRule == InputType.SINGLE && token.type == Token.Kind.ENDMARKER && parsingStarted) {
            token.type = Token.Kind.NEWLINE;
            parsingStarted = false;
            if (tokenizer.getCurrentIndentIndex() > 0) {
                tokenizer.setPendingIndents(-tokenizer.getCurrentIndentIndex());
                tokenizer.setCurrentIndentIndex(0);
            }
        } else {
            parsingStarted = true;
        }
        tokens.add(token);
        return initializeToken(token);
    }

    /**
     * _PyPegen_get_last_nonwhitespace_token
     */
    public Token getLastNonWhitespaceToken() {
        Token t = null;
        for (int i = mark() - 1; i >= 0; i--) {
            t = peekToken(i);
            if (t.type != Token.Kind.ENDMARKER && (t.type < Token.Kind.NEWLINE || t.type > DEDENT)) {
                break;
            }
        }
        return t;
    }

    /**
     * _PyPegen_name_token
     */
    public ExprTy.Name name_token() {
        Token t = expect(Token.Kind.NAME);
        if (t != null) {
            return factory.createVariable(getText(t), t.sourceRange);
        } else {
            return null;
        }
    }

    /**
     * _PyPegen_seq_count_dots
     */
    public int countDots(Token[] tokenArray) {
        int cnt = 0;
        for (Token t : tokenArray) {
            if (t.type == Token.Kind.ELLIPSIS) {
                cnt += 3;
            } else {
                assert t.type == Token.Kind.DOT;
                cnt += 1;
            }
        }
        return cnt;
    }

    /**
     * _PyPegen_expect_soft_keyword
     */
    protected ExprTy.Name expect_SOFT_KEYWORD(String keyword) {
        Token t = getAndInitializeToken();
        if (t.type == Token.Kind.NAME && getText(t).equals(keyword)) {
            currentPos++;
            return factory.createVariable(getText(t), t.sourceRange);
        }
        return null;
    }

    /**
     * IMPORTANT! _PyPegen_string_token returns (through void*) a Token*. We are trying to be type
     * safe, so we create a container.
     */
    public Token string_token() {
        return expect(Token.Kind.STRING);
    }

    /**
     * _PyPegen_number_token
     */
    public ExprTy number_token() {
        Token t = expect(Token.Kind.NUMBER);
        if (t == null) {
            return null;
        }

        String number = getText(t);
        if (number.contains("_")) {
            if (featureVersion < 6) {
                raiseSyntaxError("Underscores in numeric literals are only supported in Python 3.6 and greater");
            }
            number = number.replace("_", "");
        }
        int base = 10;
        int start = 0;
        boolean isFloat = false;
        boolean isComplex = false;

        if (number.startsWith("0")) {
            if (number.startsWith("0x") || number.startsWith("0X")) {
                base = 16;
                start = 2;
            } else if (number.startsWith("0o") || number.startsWith("0O")) {
                base = 8;
                start = 2;
            } else if (number.startsWith("0b") || number.startsWith("0B")) {
                base = 2;
                start = 2;
            }
        }
        if (base == 10) {
            isComplex = number.endsWith("j") || number.endsWith("J");
            if (!isComplex) {
                isFloat = number.contains(".") || number.contains("e") || number.contains("E");
            }
        }

        if (isComplex) {
            double imag = Double.parseDouble(number.substring(0, number.length() - 1));
            return factory.createConstant(ConstantValue.ofComplex(0.0, imag), t.sourceRange);
        }
        if (isFloat) {
            return factory.createConstant(ConstantValue.ofDouble(Double.parseDouble(number)), t.sourceRange);
        }
        final long max = Long.MAX_VALUE;
        final long moltmax = max / base;
        int i = start;
        long result = 0;
        int lastD;
        boolean overunder = false;
        while (i < number.length()) {
            lastD = digitValue(number.charAt(i));

            long next = result;
            if (next > moltmax) {
                overunder = true;
            } else {
                next *= base;
                if (next > (max - lastD)) {
                    overunder = true;
                } else {
                    next += lastD;
                }
            }
            if (overunder) {
                // overflow
                BigInteger bigResult = BigInteger.valueOf(result);
                BigInteger bigBase = BigInteger.valueOf(base);
                while (i < number.length()) {
                    bigResult = bigResult.multiply(bigBase).add(BigInteger.valueOf(digitValue(number.charAt(i))));
                    i++;
                }
                return factory.createConstant(ConstantValue.ofBigInteger(bigResult), t.sourceRange);
            }
            result = next;
            i++;
        }
        return factory.createConstant(ConstantValue.ofLong(result), t.sourceRange);
    }

    private static int digitValue(char ch) {
        if (ch >= '0' && ch <= '9') {
            return ch - '0';
        } else if (ch >= 'a' && ch <= 'f') {
            return ch - 'a' + 10;
        } else {
            assert ch >= 'A' && ch <= 'f';
            return ch - 'A' + 10;
        }
    }

    /**
     * _PyPegen_expect_forced_token
     */
    public Token expect_forced_token(int kind, String expected) {
        Token t = getAndInitializeToken();
        if (t.type != kind) {
            raiseSyntaxErrorKnownLocation(t, "expected '%s'", expected);
            return null;
        }
        currentPos++;
        return t;
    }

    public ExprTy.Name name_from_token(Token t) {
        if (t == null) {
            return null;
        }
        String id = getText(t);
        return factory.createVariable(id, t.sourceRange);
    }

    /**
     * _PyPegen_soft_keyword_token
     */
    public ExprTy.Name soft_keyword_token() {
        Token t = expect(Token.Kind.NAME);
        if (t == null) {
            return null;
        }
        String txt = getText(t);
        for (String s : softKeywords) {
            if (s.equals(txt)) {
                return name_from_token(t);
            }
        }
        return null;
    }

    /**
     * _PyPegen_dummy_name
     */
    public ExprTy.Name dummyName(@SuppressWarnings("unused") Object... args) {
        if (cachedDummyName != null) {
            return cachedDummyName;
        }
        cachedDummyName = factory.createVariable("", SourceRange.ARTIFICIAL_RANGE);
        return cachedDummyName;
    }

    /**
     * _PyPegen_join_names_with_dot
     */
    public SSTNode joinNamesWithDot(ExprTy a, ExprTy b) {
        String id = ((ExprTy.Name) a).id + "." + ((ExprTy.Name) b).id;
        return factory.createVariable(id, a.getSourceRange().withEnd(b.getSourceRange()));
    }

    /**
     * _PyPegen_seq_insert_in_front
     */
    @SuppressWarnings("unchecked")
    public  T[] insertInFront(T element, T[] seq, Class clazz) {
        T[] result;
        if (seq == null) {
            result = (T[]) Array.newInstance(clazz, 1);
        } else {
            result = Arrays.copyOf(seq, seq.length + 1);
            System.arraycopy(seq, 0, result, 1, seq.length);
        }
        result[0] = element;
        return result;
    }

    public ExprTy[] insertInFront(ExprTy element, ExprTy[] seq) {
        return insertInFront(element, seq, ExprTy.class);
    }

    public PatternTy[] insertInFront(PatternTy element, PatternTy[] seq) {
        return insertInFront(element, seq, PatternTy.class);
    }

    /**
     * _PyPegen_seq_append_to_end
     */
    @SuppressWarnings("unchecked")
    public  T[] appendToEnd(T[] seq, T element, Class clazz) {
        T[] result;
        if (seq == null) {
            result = (T[]) Array.newInstance(clazz, 1);
            result[0] = element;
        } else {
            result = Arrays.copyOf(seq, seq.length + 1);
            result[seq.length] = element;
        }
        return result;
    }

    public ExprTy[] appendToEnd(ExprTy[] seq, ExprTy element) {
        return appendToEnd(seq, element, ExprTy.class);
    }

    /**
     * _PyPegen_concatenate_strings
     */
    public SSTNode concatenateStrings(Token[] tokenArray) {
        int n = tokenArray.length;
        String[] values = new String[n];
        SourceRange[] sourceRanges = new SourceRange[n];
        for (int i = 0; i < n; i++) {
            Token t = tokenArray[i];
            values[i] = getText(t);
            sourceRanges[i] = t.sourceRange;
        }
        FExprParser fexprParser = (code, sourceRange) -> (ExprTy) new Parser(code, sourceRange, stringFactory,
                        new ErrorCallback() {
                            @Override
                            public void reportIncompleteSource(int line) {
                                errorCb.reportIncompleteSource(line);
                            }

                            @Override
                            public void onError(ErrorType errorType, SourceRange srcRange, String message) {
                                errorCb.onError(errorType, srcRange, "f-string: " + message);
                            }

                            @Override
                            public void onWarning(WarningType warningType, SourceRange srcRange, String message) {
                                errorCb.onWarning(warningType, srcRange, message);
                            }
                        }, InputType.FSTRING, flags, featureVersion).parse();
        return factory.createString(values, sourceRanges, fexprParser, errorCb, stringFactory, featureVersion);
    }

    /**
     * _PyPegen_check_barry_as_flufl
     */
    public boolean checkBarryAsFlufl(Token token) {
        if (flags.contains(Flags.BARRY_AS_BDFL) && !getText(token).equals("<>")) {
            errorCb.onError(token.sourceRange, BARRY_AS_BDFL);
            return true;
        }
        if (!flags.contains(Flags.BARRY_AS_BDFL) && !getText(token).equals("!=")) {
            // no explicit error message here, the parser will just fail to match the input
            // producing the generic 'invalid syntax' error
            return true;
        }
        return false;
    }

    /**
     * _PyPegen_check_legacy_stmt
     */
    public boolean checkLegacyStmt(ExprTy name) {
        if (!(name instanceof ExprTy.Name)) {
            return false;
        }
        String[] candidates = {"print", "exec"};
        for (String candidate : candidates) {
            if (candidate.equals(((ExprTy.Name) name).id)) {
                return true;
            }
        }
        return false;
    }

    /**
     * _PyPegen_get_expr_name
     */
    public String getExprName(ExprTy e) {
        if (e instanceof ExprTy.Attribute || e instanceof ExprTy.Subscript || e instanceof ExprTy.Starred || e instanceof ExprTy.Name || e instanceof ExprTy.Tuple || e instanceof ExprTy.List ||
                        e instanceof ExprTy.Lambda) {
            return e.getClass().getSimpleName().toLowerCase();
        }
        if (e instanceof ExprTy.Call) {
            return "function call";
        }
        if (e instanceof ExprTy.BoolOp || e instanceof ExprTy.BinOp || e instanceof ExprTy.UnaryOp) {
            return "expression";
        }
        if (e instanceof ExprTy.GeneratorExp) {
            return "generator expression";
        }
        if (e instanceof ExprTy.Yield || e instanceof ExprTy.YieldFrom) {
            return "yield expression";
        }
        if (e instanceof ExprTy.Await) {
            return "await expression";
        }
        if (e instanceof ExprTy.ListComp) {
            return "list comprehension";
        }
        if (e instanceof ExprTy.SetComp) {
            return "set comprehension";
        }
        if (e instanceof ExprTy.DictComp) {
            return "dict comprehension";
        }
        if (e instanceof ExprTy.Dict) {
            return "dict literal";
        }
        if (e instanceof ExprTy.Set) {
            return "set display";
        }
        if (e instanceof ExprTy.JoinedStr || e instanceof ExprTy.FormattedValue) {
            return "f-string expression";
        }
        if (e instanceof ExprTy.Constant) {
            ExprTy.Constant constant = (ExprTy.Constant) e;
            switch (constant.value.kind) {
                case NONE:
                    return "None";
                case BOOLEAN:
                    return constant.value.getBoolean() ? "True" : "False";
                case ELLIPSIS:
                    return "ellipsis";
            }
            return "literal";
        }
        if (e instanceof ExprTy.Compare) {
            return "comparision";
        }
        if (e instanceof ExprTy.IfExp) {
            return "conditional expression";
        }
        if (e instanceof ExprTy.NamedExpr) {
            return "named expression";
        }
        assert false : "unexpected expression " + e.getClass() + " in assignment";
        return null;
    }

    /**
     * equivalent to initialize_token
     */
    private Token initializeToken(Token token) {
        if (token.type == Token.Kind.NAME) {
            String txt = getText(token);
            int l = txt.length();
            Object[][] kwlist;
            if (l < reservedKeywords.length && (kwlist = reservedKeywords[l]) != null) {
                for (Object[] kwAssoc : kwlist) {
                    if (txt.equals(kwAssoc[0])) {
                        token.type = (int) kwAssoc[1];
                        break;
                    }
                }
            }
        }
        if (token.type == ERRORTOKEN) {
            tokenizerError(token);
        }
        return token;
    }

    /**
     * _PyPegen_new_type_comment
     */
    protected String newTypeComment(Object token) {
        return getText((Token) token);
    }

    /**
     * _PyPegen_join_sequences
     *
     */
    protected  T[] join(T[] a, T[] b) {
        if (a == null && b != null) {
            return b;
        }
        if (a != null && b == null) {
            return a;
        }

        if (a != null) {
            T[] result = Arrays.copyOf(a, a.length + b.length);
            System.arraycopy(b, 0, result, a.length, b.length);
            return result;
        }
        return null;
    }

    /**
     * _PyPegen_set_expr_context
     *
     * TODO: (tfel) We should try to avoid having to walk the parse tree so often. The git history
     * includes an attempt with a symbol and a scope stream synchronized to the token stream, but it
     * doesn't really work with the pegen generator.
     */
    protected ExprTy setExprContext(ExprTy node, ExprContextTy context) {
        return node.accept(new CopyWithContextVisitor(context));
    }

    // debug methods
    private void indent(StringBuffer sb) {
        for (int i = 0; i < level; i++) {
            sb.append("  ");
        }
    }

    void debugMessageln(String text, Object... args) {
        StringBuffer sb = new StringBuffer();
        indent(sb);
        sb.append(String.format(text, args));
        System.out.println(sb);
    }

    // Helper classes that are not really meaningful parts of the AST, just containers to move the
    // data where we need it.

    public static final class CmpopExprPair {
        final CmpOpTy op;
        final ExprTy expr;

        CmpopExprPair(CmpOpTy op, ExprTy expr) {
            this.op = op;
            this.expr = expr;
        }
    }

    public static final class KeyValuePair {
        final ExprTy key;
        final ExprTy value;

        KeyValuePair(ExprTy key, ExprTy value) {
            this.key = key;
            this.value = value;
        }

    }

    static ExprTy[] extractKeys(KeyValuePair[] l) {
        int len = l == null ? 0 : l.length;
        ExprTy[] keys = new ExprTy[len];
        for (int i = 0; i < len; i++) {
            keys[i] = l[i].key;
        }
        return keys;
    }

    static ExprTy[] extractValues(KeyValuePair[] l) {
        int len = l == null ? 0 : l.length;
        ExprTy[] values = new ExprTy[len];
        for (int i = 0; i < len; i++) {
            values[i] = l[i].value;
        }
        return values;
    }

    public static final class KeyPatternPair {
        final ExprTy key;
        final PatternTy pattern;

        KeyPatternPair(ExprTy key, PatternTy pattern) {
            this.key = key;
            this.pattern = pattern;
        }
    }

    static ExprTy[] extractKeys(KeyPatternPair[] l) {
        int len = l == null ? 0 : l.length;
        ExprTy[] keys = new ExprTy[len];
        for (int i = 0; i < len; i++) {
            keys[i] = l[i].key;
        }
        return keys;
    }

    static PatternTy[] extractPatterns(KeyPatternPair[] l) {
        int len = l == null ? 0 : l.length;
        PatternTy[] values = new PatternTy[len];
        for (int i = 0; i < len; i++) {
            values[i] = l[i].pattern;
        }
        return values;
    }

    public static final class NameDefaultPair {
        final ArgTy name;
        final ExprTy def;

        NameDefaultPair(ArgTy name, ExprTy def) {
            this.name = name;
            this.def = def;
        }
    }

    public static final class SlashWithDefault {
        final ArgTy[] plainNames;
        final NameDefaultPair[] namesWithDefaults;

        SlashWithDefault(ArgTy[] plainNames, NameDefaultPair[] namesWithDefaults) {
            this.plainNames = plainNames;
            this.namesWithDefaults = namesWithDefaults;
        }
    }

    public static final class StarEtc {
        final ArgTy varArg;
        final NameDefaultPair[] kwOnlyArgs;
        final ArgTy kwArg;

        StarEtc(ArgTy varArg, NameDefaultPair[] kwOnlyArgs, ArgTy kwArg) {
            this.varArg = varArg;
            this.kwOnlyArgs = kwOnlyArgs;
            this.kwArg = kwArg;
        }
    }

    public static final class KeywordOrStarred {
        final SSTNode element;
        final boolean isKeyword;

        KeywordOrStarred(SSTNode element, boolean isKeyword) {
            this.element = element;
            this.isKeyword = isKeyword;
        }
    }

    /**
     * _PyPegen_seq_extract_starred_exprs
     */
    static ExprTy[] extractStarredExpressions(KeywordOrStarred[] kwds) {
        List list = new ArrayList<>();
        for (KeywordOrStarred n : kwds) {
            if (!n.isKeyword) {
                ExprTy element = (ExprTy) n.element;
                list.add(element);
            }
        }
        return list.toArray(new ExprTy[0]);
    }

    /**
     * _PyPegen_seq_delete_starred_exprs
     */
    static KeywordTy[] deleteStarredExpressions(KeywordOrStarred[] kwds) {
        List list = new ArrayList<>();
        for (KeywordOrStarred n : kwds) {
            if (n.isKeyword) {
                KeywordTy element = (KeywordTy) n.element;
                list.add(element);
            }
        }
        return list.toArray(new KeywordTy[0]);
    }

    /**
     * _PyPegen_map_names_to_ids
     */
    static String[] extractNames(ExprTy[] seq) {
        List list = new ArrayList<>();
        for (ExprTy e : seq) {
            String id = ((ExprTy.Name) e).id;
            list.add(id);
        }
        return list.toArray(new String[0]);
    }

    /**
     * _PyPegen_collect_call_seqs
     */
    final ExprTy collectCallSequences(ExprTy[] a, KeywordOrStarred[] b, SourceRange sourceRange) {
        if (b == null) {
            return factory.createCall(dummyName(), a, EMPTY_KEYWORD_ARRAY, sourceRange);
        } else {
            ExprTy[] starred = extractStarredExpressions(b);
            ExprTy[] args;
            if (starred.length > 0) {
                args = Arrays.copyOf(a, a.length + starred.length);
                System.arraycopy(starred, 0, args, a.length, starred.length);
            } else {
                args = a;
            }
            return factory.createCall(dummyName(), args, deleteStarredExpressions(b), sourceRange);
        }
    }

    private ExprTy visitContainer(ExprTy[] elements, TargetsType type) {
        if (elements == null) {
            return null;
        }
        ExprTy child;
        for (ExprTy expr : elements) {
            child = getInvalidTarget(expr, type);
            if (child != null) {
                return child;
            }
        }
        return null;
    }

    private ExprTy getInvalidTarget(ExprTy expr, TargetsType type) {
        if (expr == null) {
            return null;
        }
        if (expr instanceof ExprTy.List) {
            return visitContainer(((ExprTy.List) expr).elements, type);
        }
        if (expr instanceof ExprTy.Tuple) {
            return visitContainer(((ExprTy.Tuple) expr).elements, type);
        }
        if (expr instanceof ExprTy.Starred) {
            if (type == TargetsType.DEL_TARGETS) {
                return expr;
            }
            return getInvalidTarget(((ExprTy.Starred) expr).value, type);
        }
        if (expr instanceof ExprTy.Compare) {
            if (type == TargetsType.FOR_TARGETS) {
                ExprTy.Compare compare = (ExprTy.Compare) expr;
                if (compare.ops[0] == CmpOpTy.In) {
                    return getInvalidTarget(compare.left, type);
                }
                return null;
            }
            return expr;
        }
        if (expr instanceof ExprTy.Name || expr instanceof ExprTy.Subscript || expr instanceof ExprTy.Attribute) {
            return null;
        }
        return expr;
    }

    /**
     * _PyPegen_nonparen_genexp_in_call
     */
    SSTNode nonparenGenexpInCall(ExprTy args, ComprehensionTy[] comprehensions) {
        assert args instanceof ExprTy.Call;
        ExprTy.Call call = (ExprTy.Call) args;
        int len = call.args.length;
        if (len <= 1) {
            return null;
        }
        ComprehensionTy lastComprehension = comprehensions[comprehensions.length - 1];
        return raiseSyntaxErrorKnownRange(call.args[len - 1], getLastComprehensionItem(lastComprehension),
                        "Generator expression must be parenthesized");
    }

    /**
     * RAISE_SYNTAX_ERROR_INVALID_TARGET
     */
    SSTNode raiseSyntaxErrorInvalidTarget(TargetsType type, ExprTy expr) {
        ExprTy invalidTarget = getInvalidTarget(expr, type);
        if (invalidTarget != null) {
            String message = (type == TargetsType.STAR_TARGETS || type == TargetsType.FOR_TARGETS)
                            ? "cannot assign to %s"
                            : "cannot delete %s";
            raiseSyntaxErrorKnownLocation(invalidTarget, message, getExprName(invalidTarget));
        }
        return raiseSyntaxError("invalid syntax");
    }

    /**
     * RAISE_SYNTAX_ERROR
     */
    SSTNode raiseSyntaxError(String msg, Object... arguments) {
        Token errorToken = peekToken();
        return raiseErrorKnownLocation(ErrorCallback.ErrorType.Syntax, errorToken.sourceRange, msg, arguments);
    }

    /**
     * RAISE_ERROR_KNOWN_LOCATION the first param is a token, where error begins
     */
    SSTNode raiseSyntaxErrorKnownLocation(Token errorToken, String msg, Object... arguments) {
        return raiseErrorKnownLocation(ErrorCallback.ErrorType.Syntax, errorToken.sourceRange, msg, arguments);
    }

    /**
     * RAISE_ERROR_KNOWN_LOCATION
     */
    SSTNode raiseErrorKnownLocation(ErrorCallback.ErrorType typeError, SourceRange where, String msgIn, Object... argument) {
        String msg = msgIn;
        if (startRule == InputType.FSTRING) {
            msg = "f-string: " + msgIn;
        }
        errorIndicator = true;
        errorCb.onError(typeError, where, msg, argument);
        return null;
    }

    /**
     * RAISE_ERROR_KNOWN_LOCATION the first param is node, where error begins
     */
    SSTNode raiseSyntaxErrorKnownLocation(SSTNode where, String msg, Object... arguments) {
        return raiseErrorKnownLocation(ErrorCallback.ErrorType.Syntax, where.getSourceRange(), msg, arguments);
    }

    /**
     * RAISE_ERROR_KNOWN_LOCATION
     */
    SSTNode raiseErrorKnownLocation(ErrorCallback.ErrorType errorType, SSTNode where, String msg, Object... arguments) {
        return raiseErrorKnownLocation(errorType, where.getSourceRange(), msg, arguments);
    }

    /**
     * RAISE_ERROR_KNOWN_RANGE
     */
    SSTNode raiseSyntaxErrorKnownRange(Token startToken, Token endToken, String msg, Object... arguments) {
        return raiseErrorKnownLocation(ErrorCallback.ErrorType.Syntax, startToken.sourceRange.withEnd(endToken.sourceRange), msg, arguments);
    }

    /**
     * RAISE_ERROR_KNOWN_RANGE
     */
    SSTNode raiseSyntaxErrorKnownRange(SSTNode startNode, SSTNode endNode, String msg, Object... arguments) {
        return raiseErrorKnownLocation(ErrorCallback.ErrorType.Syntax, startNode.getSourceRange().withEnd(endNode.getSourceRange()), msg, arguments);
    }

    /**
     * RAISE_ERROR_KNOWN_RANGE
     */
    SSTNode raiseSyntaxErrorKnownRange(SSTNode startNode, Token endToken, String msg, Object... arguments) {
        return raiseErrorKnownLocation(ErrorCallback.ErrorType.Syntax, startNode.getSourceRange().withEnd(endToken.sourceRange), msg, arguments);
    }

    /**
     * RAISE_SYNTAX_ERROR_STARTING_FROM
     */
    SSTNode raiseSyntaxErrorStartingFrom(Token where, String msg, Object... arguments) {
        return raiseErrorKnownLocation(ErrorCallback.ErrorType.Syntax, tokenizer.extendRangeToCurrentPosition(where.sourceRange), msg, arguments);
    }

    /**
     * RAISE_SYNTAX_ERROR_STARTING_FROM
     */
    SSTNode raiseSyntaxErrorStartingFrom(SSTNode where, String msg, Object... arguments) {
        return raiseErrorKnownLocation(ErrorCallback.ErrorType.Syntax, tokenizer.extendRangeToCurrentPosition(where.getSourceRange()), msg, arguments);
    }

    /**
     * _PyPegen_arguments_parsing_error
     */
    SSTNode raiseArgumentsParsingError(ExprTy e) {
        for (KeywordTy keyword : ((ExprTy.Call) e).keywords) {
            if (keyword.arg == null) {
                return raiseSyntaxError("positional argument follows keyword argument unpacking");
            }
        }
        return raiseSyntaxError("positional argument follows keyword argument");
    }

    /**
     * RAISE_INDENTATION_ERROR
     */
    SSTNode raiseIndentationError(String msg, Object... arguments) {
        Token errorToken = peekToken();
        return raiseErrorKnownLocation(ErrorCallback.ErrorType.Indentation, errorToken.sourceRange, msg, arguments);
    }

    /**
     * raise_unclosed_parentheses_error
     */
    void raiseUnclosedParenthesesError() {
        int nestingLevel = tokenizer.getParensNestingLevel();
        assert nestingLevel > 0;
        int errorLineno = tokenizer.getParensLineNumberStack()[nestingLevel - 1];
        int errorCol = tokenizer.getParensColumnsStack()[nestingLevel - 1];
        // TODO unknown source offsets
        raiseErrorKnownLocation(ErrorCallback.ErrorType.Syntax,
                        new SourceRange(errorLineno, errorCol, errorLineno, -1),
                        "'%c' was never closed", tokenizer.getParensStack()[nestingLevel - 1]);
    }

    /**
     * tokenizer_error
     */
    void tokenizerError(Token token) {
        if (token.type == ERRORTOKEN && tokenizer.getDone() == Tokenizer.StatusCode.SYNTAX_ERROR) {
            raiseErrorKnownLocation(ErrorCallback.ErrorType.Syntax, token.getSourceRange(), (String) token.extraData);
        }
        ErrorCallback.ErrorType errorType = ErrorCallback.ErrorType.Syntax;
        String msg;
        int colOffset = -1;
        switch (tokenizer.getDone()) {
            case BAD_TOKEN:
                msg = "invalid token";
                break;
            case EOF:
                if (tokenizer.getParensNestingLevel() > 0) {
                    raiseUnclosedParenthesesError();
                } else {
                    raiseSyntaxError("unexpected EOF while parsing");
                }
                return;
            case DEDENT_INVALID:
                raiseIndentationError("unindent does not match any outer indentation level");
                return;
            case TABS_SPACES_INCONSISTENT:
                errorType = ErrorCallback.ErrorType.Tab;
                msg = "inconsistent use of tabs and spaces in indentation";
                break;
            case TOO_DEEP_INDENTATION:
                errorType = ErrorCallback.ErrorType.Indentation;
                msg = "too many levels of indentation";
                break;
            case LINE_CONTINUATION_ERROR:
                msg = "unexpected character after line continuation character";
                colOffset = tokenizer.getNextCharIndex() - tokenizer.getLineStartIndex();
                break;
            default:
                msg = "unknown parsing error";
                break;
        }
        // TODO unknown source offsets
        raiseErrorKnownLocation(errorType, new SourceRange(tokenizer.getCurrentLineNumber(),
                        colOffset >= 0 ? colOffset : 0, tokenizer.getCurrentLineNumber(), -1), msg);
    }

    // Equivalent of _PyPegen_interactive_exit
    SSTNode interactiveExit() {
        // This causes the corresponding rule to always fail. CPython also sets a variable to E_EOF
        // which is later checked in PyRun_InteractiveOneObjectEx to end the REPL. Our REPL handles
        // it differently - it won't call the parser with an empty string at all. We still need to
        // fail here in case someone calls compile('', 'single'), but we don't need the error code.
        return null;
    }

     T lastItem(T[] seq) {
        return seq[seq.length - 1];
    }

    ExprTy getLastComprehensionItem(ComprehensionTy comprehension) {
        if (comprehension.ifs == null || comprehension.ifs.length == 0) {
            return comprehension.iter;
        }
        return lastItem(comprehension.ifs);
    }

    ExprTy ensureReal(ExprTy e) {
        if (!(e instanceof ExprTy.Constant) || ((ExprTy.Constant) e).value.kind == Kind.COMPLEX) {
            raiseSyntaxErrorKnownLocation(e, "real number required in complex literal");
        }
        return e;
    }

    ExprTy ensureImaginary(ExprTy e) {
        if (!(e instanceof ExprTy.Constant) || ((ExprTy.Constant) e).value.kind != Kind.COMPLEX) {
            raiseSyntaxErrorKnownLocation(e, "imaginary number required in complex literal");
        }
        return e;
    }

    ModTy makeModule(StmtTy[] statements, SourceRange sourceRange) {
        return factory.createModule(statements, comments.toArray(TypeIgnoreTy[]::new), sourceRange);
    }

    /**
     * CHECK Simple check whether the node is not null.
     */
     T check(T node) {
        if (node == null) {
            errorIndicator = true;
        }
        return node;
    }

     T checkVersion(int version, String msg, T node) {
        checkVersion(version, msg);
        return node;
    }

     T checkVersion(int version, String msg, Supplier node) {
        checkVersion(version, msg);
        return node.get();
    }

    private void checkVersion(int version, String msg) {
        if (featureVersion < version) {
            raiseSyntaxError("%s only supported in Python 3.%d and greater", msg, version);
        }
    }

    private int getFill() {
        return tokens.size();
    }

    private Token peekToken() {
        if (currentPos == tokens.size()) {
            Token t = tokenizer.next();
            if (t.type != Token.Kind.TYPE_IGNORE) {
                tokens.add(t);
            }
            return t;
        }
        return tokens.get(currentPos);
    }

    protected final Token peekToken(int position) {
        assert position < tokens.size();
        return tokens.get(position);
    }
}