io.deephaven.lang.parse.ParsedDocument Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of deephaven-open-api-lang-parser Show documentation
The 'open-api-lang-parser' project
There is a newer version: 0.36.1
Show newest version
/**
 * Copyright (c) 2016-2022 Deephaven Data Labs and Patent Pending
 */
package io.deephaven.lang.parse;

import io.deephaven.base.Lazy;
import io.deephaven.base.verify.Assert;
import io.deephaven.io.logger.Logger;
import io.deephaven.lang.generated.*;
import io.deephaven.proto.backplane.script.grpc.CompletionItem;
import io.deephaven.proto.backplane.script.grpc.DocumentRange;
import io.deephaven.proto.backplane.script.grpc.Position;
import io.deephaven.proto.backplane.script.grpc.TextEdit;
import io.deephaven.web.shared.fu.MappedIterable;

import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.regex.Pattern;

/**
 * Represents a parsed document.
 *
 * For now, we will be re-parsing the entire string document every time, but in the future, we would like to be able to
 * update only ranges of changed code.
 */
public class ParsedDocument {

    private static class AnchorNode extends SimpleNode {

        AnchorNode(int index) {
            super(-1);
            Token cheat = new Token(-1, "") {
                @Override
                public int compareTo(BaseToken o) {
                    if (o.getStartIndex() == index) {
                        return index - o.getEndIndex();
                    }
                    return index - o.getStartIndex();
                }
            };
            cheat.startIndex = index;
            cheat.endIndex = index;
            jjtSetFirstToken(cheat);
            jjtSetLastToken(cheat);
        }

        @Override
        public boolean containsIndex(int i) {
            return i == jjtGetFirstToken().startIndex;
        }

    }

    private static final Comparator CMP = (a, b) -> {
        if (a == b) {
            return 0;
        }
        final Token aStart = a.jjtGetFirstToken();
        final Token bStart = b.jjtGetFirstToken();

        int cmp;
        if (b instanceof AnchorNode) {
            // let the anchor node do the comparisons...
            Assert.eqFalse(a instanceof AnchorNode, "You may only use one AnchorNode at a time");
            cmp = -bStart.compareTo(aStart);
        } else {
            cmp = aStart.compareTo(bStart);
        }
        if (cmp != 0) {
            return cmp;
        }
        final Token aEnd = a.jjtGetLastToken();
        final Token bEnd = b.jjtGetLastToken();
        if (b instanceof AnchorNode) {
            cmp = -bEnd.compareTo(aEnd);
        } else {
            cmp = aEnd.compareTo(bEnd);
        }
        if (cmp != 0) {
            return cmp;
        }


        // bah. whoever is the most derived should go _last_.
        // We will be looking backwards from cursor, so we should want the end-most item
        // to be the final node, to easily find it later...

        if (a.isChildOf(b)) {
            return 1;
        }

        assert b.isChildOf(a) : "Nodes occupying the same tokenspace were not in a parent-child relationship "
                + a + " does not contain " + b + " (or vice versa)";

        return -1;
    };
    /**
     * TODO: enforce clients to only send \n. We don't want to mess around with \r\n taking up two chars. IDS-1517-26
     */
    private static final Pattern NEW_LINE_PATTERN = Pattern.compile("\\r?\\n");
    private final ChunkerDocument doc;

    private final Lazy> statements;
    private final String src;
    private String errorSource;
    private ParseException error;
    private final Map computedPositions;
    private final Map> assignments;

    public ParsedDocument(ChunkerDocument doc, String document) {
        this.doc = doc;
        this.src = document;
        computedPositions = new ConcurrentHashMap<>(4);
        assignments = new ConcurrentHashMap<>(12);
        statements = new Lazy<>(() -> {
            final LinkedHashSet stmts = new LinkedHashSet<>();
            doc.childrenAccept(new ChunkerDefaultVisitor() {
                @Override
                public Object visitChunkerStatement(ChunkerStatement node, Object data) {
                    stmts.add(node);
                    // only want to add top level statements; we don't have a good solution for nested ranges yet.
                    return null;
                }
            }, null);
            return stmts;
        });
    }

    public Node findNode(int p) {

        if (doc.jjtGetFirstToken() == doc.jjtGetLastToken() && doc.jjtGetFirstToken().kind == ChunkerConstants.EOF) {
            return doc;
        }

        // our nodes are 0-indexed...
        final Node best;
        if (p >= doc.getEndIndex()) {
            // cursor is at the end of the document. Just find the deepest tail node.
            best = findLast(doc, Math.max(0, Math.min(p - 1, doc.getEndIndex())));
        } else {
            best = findDeepest(doc, Math.max(0, p));
        }

        return best;
    }

    private Node findDeepest(Node best, int i) {
        Assert.leq(best.getStartIndex(), "node.startIndex", i);
        Assert.geq(best.getEndIndex(), "node.endIndex", i);
        // TODO: consider binary search here instead of linear;
        // it is likely better to actually search linearly from the end
        // since user is most likely to be editing at the end. IDS-1517-27
        for (int c = best.jjtGetNumChildren(); c-- > 0;) {
            final Node child = best.jjtGetChild(c);
            if (child.containsIndex(i) ||
                    (best instanceof ChunkerStatement && i == child.getEndIndex())) {
                return findDeepest(child, i);
            }
        }
        // none of our children (if any) contained the specified index; return us.
        return best;
    }

    private Node findLast(Node best, int i) {
        Assert.leq(best.getStartIndex(), "node.startIndex", i);
        Assert.geq(best.getEndIndex(), "node.endIndex", i);

        int c = best.jjtGetNumChildren() - 1;
        if (c == -1) {
            return best;
        }
        final Node child = best.jjtGetChild(c);
        if (child.getEndIndex() >= i) {
            if (child.jjtGetNumChildren() == 0) {
                return child;
            }
            return findLast(child, i);
        }
        return best;
    }

    public ChunkerDocument getDoc() {
        return doc;
    }

    /**
     * When a parse fails, we do not throw away our last-good document.
     *
     * We do, however, record the failure information, which you should check via {@link #isError()}.
     *
     * @param src The source with an error
     * @param e The parse exception. May make this any exception type.
     * @return this, for chaining. We may need to make copies later, but for now, we'll use it as-is.
     */
    public ParsedDocument withError(String src, ParseException e) {
        this.errorSource = src;
        this.error = e;
        return this;
    }

    public String getSource() {
        return errorSource == null ? src : errorSource;
    }

    public String getLastGoodSource() {
        return src;
    }

    public boolean isError() {
        return errorSource != null || error != null;
    }

    public void resetFailure() {
        this.errorSource = null;
        this.error = null;
    }

    public void logErrors(Logger log) {
        if (error != null) {
            log.warn("Invalid document; use trace logging for more details", error);
            log.trace(errorSource);
        }
    }

    @Override
    public String toString() {
        return "ParsedDocument{" +
                "doc=" + doc +
                ", errorSource='" + errorSource + '\'' +
                ", error=" + error +
                '}';
    }

    public Position.Builder findEditRange(DocumentRange replaceRange) {
        final Token end = doc.jjtGetLastToken();
        // Our document and thus, our token is 1-indexed
        // Our range which we send back through lsp protocol is 0-indexed.
        // So, this less than is really <= after correcting array indexing.
        assert replaceRange.getEnd().getLine() < end.endLine;
        assert replaceRange.getEnd().getCharacter() <= end.endColumn;

        // Most definitely want to cache this very expensive operation.
        return computedPositions.computeIfAbsent(replaceRange, r -> findFromNodes(replaceRange, doc));
    }

    private Position.Builder findFromNodes(DocumentRange replaceRange, Node startNode) {
        return findFromNodes(replaceRange, startNode, null);
    }

    private Position.Builder findFromNodes(DocumentRange replaceRange, Node startNode, Node endNode) {

        if (startNode.jjtGetNumChildren() == 0) {
            // we are the winner node!
            if (endNode != startNode) {
                endNode = refineEndNode(replaceRange, endNode == null ? startNode : endNode);
            }
            assert startsBefore(startNode, replaceRange.getStart());
            // Note, we're intentionally sending the first token of both the start and end node,
            // as we need to search forward-only when examining tokens.
            return findFromTokens(replaceRange, startNode.jjtGetFirstToken(), endNode.jjtGetFirstToken());
        } else {
            // we are going to look through children backwards,
            // as the user is most likely to be editing the end of the document.
            for (Node kid : MappedIterable.reversed(startNode.getChildren())) {
                // look backwards until we find the starting range
                // we eventually rely on tokens instead of nodes,
                // which only have forward links; thus, we must anchor to start node.
                if (endNode == null || endNode == startNode) {
                    if (startsBefore(kid, replaceRange.getEnd())) {
                        endNode = kid;
                    }
                }
                if (startsBefore(kid, replaceRange.getStart())) {
                    return findFromNodes(replaceRange, kid, endNode);
                }
            }
            if (endNode != startNode) {
                endNode = refineEndNode(replaceRange, endNode == null ? startNode : endNode);
            }
            return findFromTokens(replaceRange, startNode.jjtGetFirstToken(), endNode.jjtGetFirstToken());
        }
    }

    private boolean startsBefore(Node kid, Position start) {
        return LspTools.lessOrEqual(kid.jjtGetFirstToken().positionStart(), start);
    }

    private Node refineEndNode(DocumentRange replaceRange, Node endNode) {
        if (endNode.jjtGetNumChildren() == 0) {
            return endNode;
        }
        for (Node kid : MappedIterable.reversed(endNode.getChildren())) {
            if (startsBefore(kid, replaceRange.getEnd())) {
                return refineEndNode(replaceRange, kid);
            }
        }
        return endNode;
    }

    @SuppressWarnings("Duplicates")
    private Position.Builder findFromTokens(DocumentRange replaceRange, Token start, Token end) {
        // while it would be nice to actually iterate backwards here,
        // we only maintain forward links, and it would be a hassle / O(n) operation
        // to setup backlinks.
        final Position.Builder startPos = start.positionStart();
        final Position.Builder endPos = end.positionStart();
        // both asserts are >= because both start and end are the earliest token-containing-our-range we could find.
        assert LspTools.greaterOrEqual(replaceRange.getStart(), startPos);
        assert LspTools.greaterOrEqual(replaceRange.getEnd(), endPos);

        int startInd = findFromToken(replaceRange.getStart(), start, true);
        int endInd = findFromToken(replaceRange.getEnd(), end, false);
        return Position.newBuilder().setLine(startInd).setCharacter(endInd);
    }

    private int findFromToken(Position pos, Token tok, boolean start) {
        final Token startTok = tok;
        if (start && tok.positionStart().equals(pos)) {
            return tok.tokenBegin;
        }
        if (!start && tok.positionEnd(true).equals(pos)) {
            return tok.tokenBegin + Math.min(1, tok.image.length());
        }
        while (tok.next != null) {
            if (tok.containsPosition(pos)) {
                int ind = tok.tokenBegin;// start ? tok.tokenBegin : tok.startIndex;
                final Position.Builder candidate = tok.positionStart();
                final String[] lines = NEW_LINE_PATTERN.split(tok.image);
                // multi-line tokens rare, but not illegal.
                for (int linePos = 0; linePos < lines.length; linePos++) {
                    String line = lines[linePos];
                    if (candidate.getLine() == pos.getLine()) {
                        // we're down to the same line.
                        ind += pos.getCharacter() - candidate.getCharacter();
                        return ind;
                    } else {
                        candidate.setLine(candidate.getLine() + 1);
                        candidate.setCharacter(0);
                        // TODO: make monaco force \n only instead of \r\n, and blow up if client gives us \r\ns
                        // so the +1 we are doing here for the line split is always valid. IDS-1517-26
                        ind += line.length() + 1;
                    }
                }
                return ind;
            } else {
                tok = tok.next;
            }
        }
        throw new IllegalArgumentException("Token " + startTok + " does not contain position " + pos);
    }

    public void extendEnd(CompletionItem.Builder item, Position requested, Node node) {
        Token tok = node.findToken(item.getTextEditBuilder().getRangeBuilder().getEndBuilder());
        while (LspTools.lessThan(item.getTextEdit().getRange().getEnd(), requested)) {
            tok = extendEnd(tok, requested, item);
            if (tok == null) {
                item.getTextEditBuilder().getRangeBuilder()
                        .setEnd(LspTools.plus(requested, 0, 1));
                break;
            }
        }
    }

    private Token extendEnd(Token tok, Position requested, CompletionItem.Builder edit) {
        if (tok.beginLine == tok.endLine) {
            // most common case (almost everything)
            final TextEdit.Builder textEdit = edit.getTextEditBuilder();
            int moved = LspTools.extend(textEdit.getRangeBuilder().getEndBuilder(), tok.positionEnd());
            String txt = tok.image;
            textEdit.setText(textEdit.getText() +
                    txt.substring(txt.length() - moved));
            edit.setTextEdit(textEdit);
            return tok.next;
        } else {
            // ick. multi-line tokens are the devil.
            // we should probably reduce this to be only-the-newline-token,
            // and instead run more productions on the contents of multi-line strings (the other possible line-spanning
            // token)
            throw new UnsupportedOperationException("Multi-line edits not supported yet; cannot extendEnd over " + tok);
        }
    }

    public Map> getAssignments() {
        return assignments;
    }
}