All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.vmware.antlr4c3.CodeCompletionCore Maven / Gradle / Ivy

/*
 * Copyright © 2017 VMware, Inc. All Rights Reserved.
 *
 * SPDX-License-Identifier: MIT
 *
 * See LICENSE file for more info.
 */
/*
 * This file has modifications, see KJJ comments.
 */
package com.vmware.antlr4c3;

import java.util.*;
import java.util.logging.Level;
import java.util.logging.Logger;

import org.antlr.v4.runtime.Parser;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.TokenStream;
import org.antlr.v4.runtime.Vocabulary;
import org.antlr.v4.runtime.atn.ATN;
import org.antlr.v4.runtime.atn.ATNState;
import org.antlr.v4.runtime.atn.PredicateTransition;
import org.antlr.v4.runtime.atn.RuleStopState;
import org.antlr.v4.runtime.atn.RuleTransition;
import org.antlr.v4.runtime.atn.Transition;
import org.antlr.v4.runtime.misc.IntervalSet;

/**
 * Port of antlr-c3 javascript library to java
 * 

* The c3 engine is able to provide code completion candidates useful for * editors with ANTLR generated parsers, independent of the actual * language/grammar used for the generation. */ public class CodeCompletionCore { public final static Logger logger = Logger.getLogger(CodeCompletionCore.class.getName()); /** * JDO returning information about matching tokens and rules */ public static class CandidatesCollection { /** * Collection of Token ID candidates, each with a follow-on List of * subsequent tokens */ public Map> tokens = new HashMap<>(); /** * Collection of Rule candidates, each with the callstack of rules to * reach the candidate */ public Map> rules = new HashMap<>(); /** * Collection of matched Preferred Rules each with their start and end * offsets */ public Map> rulePositions = new HashMap<>(); @Override public String toString() { return "CandidatesCollection{" + "tokens=" + tokens + ", rules=" + rules + ", ruleStrings=" + rulePositions + '}'; } } public static class FollowSetWithPath { public IntervalSet intervals; public List path; public List following; } public static class FollowSetsHolder { public List sets; public IntervalSet combined; } public static class PipelineEntry { public PipelineEntry(ATNState state, Integer tokenIndex) { this.state = state; this.tokenIndex = tokenIndex; } ATNState state; Integer tokenIndex; } private boolean showResult = true; private boolean showDebugOutput = true; private boolean debugOutputWithTransitions = true; private boolean showRuleStack = true; private Set ignoredTokens = new HashSet<>(); private Set preferredRules = new HashSet<>(); private Parser parser; private ATN atn; private Vocabulary vocabulary; private String[] ruleNames; private List tokens; private int tokenStartIndex = 0; private int statesProcessed = 0; // A mapping of rule index to token stream position to end token positions. // A rule which has been visited before with the same input position will always produce the same output positions. private final Map>> shortcutMap = new HashMap<>(); private final CandidatesCollection candidates = new CandidatesCollection(); // The collected candidates (rules and tokens). private final static Map> followSetsByATN = new HashMap<>(); public CodeCompletionCore(Parser parser, Set preferredRules, Set ignoredTokens) { this.parser = parser; this.atn = parser.getATN(); this.vocabulary = parser.getVocabulary(); this.ruleNames = parser.getRuleNames(); if (preferredRules != null) { this.preferredRules = preferredRules; } if (ignoredTokens != null) { this.ignoredTokens = ignoredTokens; } } public Set getPreferredRules() { return Collections.unmodifiableSet(preferredRules); } public void setPreferredRules(Set preferredRules) { this.preferredRules = new HashSet<>(preferredRules); } /** * This is the main entry point. The caret token index specifies the token stream index for the token which currently * covers the caret (or any other position you want to get code completion candidates for). * Optionally you can pass in a parser rule context which limits the ATN walk to only that or called rules. This can significantly * speed up the retrieval process but might miss some candidates (if they are outside of the given context). */ public CandidatesCollection collectCandidates(int caretTokenIndex, ParserRuleContext context, int maxStates) { this.shortcutMap.clear(); this.candidates.rules.clear(); this.candidates.tokens.clear(); this.statesProcessed = 0; this.tokenStartIndex = context != null ? context.start.getTokenIndex() : 0; TokenStream tokenStream = this.parser.getInputStream(); int currentIndex = tokenStream.index(); tokenStream.seek(this.tokenStartIndex); this.tokens = new ArrayList<>(tokenStream.size()); int offset = 0; while (true) { /* KJJ: Optimised loop, the original use of CommonTokenStream.LT(...) was very slow for large inputs */ Token token = tokenStream.get(offset++); if (token.getChannel() == 0) { this.tokens.add(token); if (token.getTokenIndex() >= caretTokenIndex || token.getType() == Token.EOF) { break; } } /* KJJ: Original code, with offset starting at 1 Token token = tokenStream.LT(offset++); this.tokens.add(token); if (token.getTokenIndex() >= caretTokenIndex || token.getType() == Token.EOF) { break; }*/ } tokenStream.seek(currentIndex); LinkedList callStack = new LinkedList<>(); int startRule = context != null ? context.getRuleIndex() : 0; this.processRule(this.atn.ruleToStartState[startRule], 0, callStack, "\n", maxStates); tokenStream.seek(currentIndex); // now post-process the rule candidates and find the last occurrences // of each preferred rule and extract its start and end in the input stream for (int ruleId : preferredRules) { final Map> shortcut = shortcutMap.get(ruleId); if (shortcut == null || shortcut.isEmpty()) { continue; } // select the right-most occurrence final int startToken = Collections.max(shortcut.keySet()); final Set endSet = shortcut.get(startToken); final int endToken; if (endSet.isEmpty()) { endToken = tokens.size() - 1; } else { endToken = Collections.max(shortcut.get(startToken)); } final int startOffset = tokens.get(startToken).getStartIndex(); final int endOffset; if (tokens.get(endToken).getType() == Token.EOF) { // if last token is EOF, include trailing whitespace endOffset = tokens.get(endToken).getStartIndex(); } else { // if last token is not EOF, limit to matching tokens which excludes trailing whitespace endOffset = tokens.get(endToken - 1).getStopIndex() + 1; } final List ruleStartStop = Arrays.asList(startOffset, endOffset); candidates.rulePositions.put(ruleId, ruleStartStop); } if (this.showResult && logger.isLoggable(Level.FINE)) { StringBuilder logMessage = new StringBuilder(); logMessage.append("States processed: ").append(this.statesProcessed).append("\n"); logMessage.append("Collected rules:\n"); for (Map.Entry> entry : this.candidates.rules.entrySet()) { logMessage.append(" ").append(entry.getKey()).append(", path: "); for (Integer token : entry.getValue()) { logMessage.append(this.ruleNames[token]).append(" "); } logMessage.append("\n"); } logMessage.append("Collected Tokens:\n"); for (Map.Entry> entry : this.candidates.tokens.entrySet()) { logMessage.append(" ").append(this.vocabulary.getDisplayName(entry.getKey())); for (Integer following : entry.getValue()) { logMessage.append(" ").append(this.vocabulary.getDisplayName(following)); } logMessage.append("\n"); } logger.log(Level.FINE, logMessage.toString()); } return this.candidates; } /** * Check if the predicate associated with the given transition evaluates to true. */ private boolean checkPredicate(PredicateTransition transition) { return transition.getPredicate().eval(this.parser, ParserRuleContext.EMPTY); } /** * Walks the rule chain upwards to see if that matches any of the preferred rules. * If found, that rule is added to the collection candidates and true is returned. */ private boolean translateToRuleIndex(List ruleStack) { if (this.preferredRules.isEmpty()) return false; // Loop over the rule stack from highest to lowest rule level. This way we properly handle the higher rule // if it contains a lower one that is also a preferred rule. for (int i = 0; i < ruleStack.size(); ++i) { if (this.preferredRules.contains(ruleStack.get(i))) { // Add the rule to our candidates list along with the current rule path, // but only if there isn't already an entry like that. List path = new LinkedList<>(ruleStack.subList(0, i)); boolean addNew = true; for (Map.Entry> entry : this.candidates.rules.entrySet()) { if (!entry.getKey().equals(ruleStack.get(i)) || entry.getValue().size() != path.size()) { continue; } // Found an entry for this rule. Same path? If so don't add a new (duplicate) entry. if (path.equals(entry.getValue())) { addNew = false; break; } } if (addNew) { this.candidates.rules.put(ruleStack.get(i), path); if (showDebugOutput && logger.isLoggable(Level.FINE)) { logger.fine("=====> collected: " + this.ruleNames[i]); } } return true; } } return false; } /** * This method follows the given transition and collects all symbols within the same rule that directly follow it * without intermediate transitions to other rules and only if there is a single symbol for a transition. */ private List getFollowingTokens(Transition initialTransition) { LinkedList result = new LinkedList<>(); LinkedList seen = new LinkedList<>(); // unused but in orig LinkedList pipeline = new LinkedList<>(); pipeline.add(initialTransition.target); while (!pipeline.isEmpty()) { ATNState state = pipeline.removeLast(); for (Transition transition: state.getTransitions()) { if (transition.getSerializationType() == Transition.ATOM) { if (!transition.isEpsilon()) { List list = transition.label().toList(); if (list.size() == 1 && !this.ignoredTokens.contains(list.get(0))) { result.addLast(list.get(0)); pipeline.addLast(transition.target); } } else { pipeline.addLast(transition.target); } } } } return result; } /** * Entry point for the recursive follow set collection function. */ private LinkedList determineFollowSets(ATNState start, ATNState stop){ LinkedList result = new LinkedList<>(); Set seen = new HashSet<>(); LinkedList ruleStack = new LinkedList<>(); this.collectFollowSets(start, stop, result, seen, ruleStack); return result; } /** * Collects possible tokens which could be matched following the given ATN state. This is essentially the same * algorithm as used in the LL1Analyzer class, but here we consider predicates also and use no parser rule context. */ private void collectFollowSets(ATNState s, ATNState stopState, LinkedList followSets, Set seen, LinkedList ruleStack) { if (seen.contains(s)) return; seen.add(s); if (s.equals(stopState) || s.getStateType() == ATNState.RULE_STOP) { FollowSetWithPath set = new FollowSetWithPath(); set.intervals = IntervalSet.of(Token.EPSILON); set.path = new LinkedList(ruleStack); followSets.addLast(set); return; } for (Transition transition : s.getTransitions()) { if (transition.getSerializationType() == Transition.RULE) { RuleTransition ruleTransition = (RuleTransition) transition; if (ruleStack.indexOf(ruleTransition.target.ruleIndex) != -1) { continue; } ruleStack.addLast(ruleTransition.target.ruleIndex); this.collectFollowSets(transition.target, stopState, followSets, seen, ruleStack); ruleStack.removeLast(); } else if (transition.getSerializationType() == Transition.PREDICATE) { if (this.checkPredicate((PredicateTransition) transition)) { this.collectFollowSets(transition.target, stopState, followSets, seen, ruleStack); } } else if (transition.isEpsilon()) { this.collectFollowSets(transition.target, stopState, followSets, seen, ruleStack); } else if (transition.getSerializationType() == Transition.WILDCARD) { FollowSetWithPath set = new FollowSetWithPath(); set.intervals = IntervalSet.of(Token.MIN_USER_TOKEN_TYPE, this.atn.maxTokenType); set.path = new LinkedList(ruleStack); followSets.addLast(set); } else { IntervalSet label = transition.label(); if (label != null && label.size() > 0) { if (transition.getSerializationType() == Transition.NOT_SET) { label = label.complement(IntervalSet.of(Token.MIN_USER_TOKEN_TYPE, this.atn.maxTokenType)); } FollowSetWithPath set = new FollowSetWithPath(); set.intervals = label; set.path = new LinkedList(ruleStack); set.following = this.getFollowingTokens(transition); followSets.addLast(set); } } } } /** * Walks the ATN for a single rule only. It returns the token stream position for each path that could be matched in this rule. * The result can be empty in case we hit only non-epsilon transitions that didn't match the current input or if we * hit the caret position. */ private Set processRule(ATNState startState, int tokenIndex, LinkedList callStack, String indentation, int maxStates) { // Start with rule specific handling before going into the ATN walk. // Check first if we've taken this path with the same input before. Map> positionMap = this.shortcutMap.get(startState.ruleIndex); if (positionMap == null) { positionMap = new HashMap<>(); this.shortcutMap.put(startState.ruleIndex, positionMap); } else { if (positionMap.containsKey(tokenIndex)) { if (showDebugOutput) { logger.fine("=====> shortcut"); } return positionMap.get(tokenIndex); } } Set result = new HashSet<>(); // For rule start states we determine and cache the follow set, which gives us 3 advantages: // 1) We can quickly check if a symbol would be matched when we follow that rule. We can so check in advance // and can save us all the intermediate steps if there is no match. // 2) We'll have all symbols that are collectable already together when we are at the caret when entering a rule. // 3) We get this lookup for free with any 2nd or further visit of the same rule, which often happens // in non trivial grammars, especially with (recursive) expressions and of course when invoking code completion // multiple times. Map setsPerState = followSetsByATN.get(this.parser.getClass().getName()); if (setsPerState == null) { setsPerState = new HashMap<>(); followSetsByATN.put(this.parser.getClass().getName(), setsPerState); } FollowSetsHolder followSets = setsPerState.get(startState.stateNumber); if (followSets == null) { followSets = new FollowSetsHolder(); setsPerState.put(startState.stateNumber, followSets); RuleStopState stop = this.atn.ruleToStopState[startState.ruleIndex]; followSets.sets = this.determineFollowSets(startState, stop); // Sets are split by path to allow translating them to preferred rules. But for quick hit tests // it is also useful to have a set with all symbols combined. IntervalSet combined = new IntervalSet(); for (FollowSetWithPath set: followSets.sets) { combined.addAll(set.intervals); } followSets.combined = combined; } callStack.addLast(startState.ruleIndex); int currentSymbol = this.tokens.get(tokenIndex).getType(); if (tokenIndex >= this.tokens.size() - 1) { // At caret? if (this.preferredRules.contains(startState.ruleIndex)) { // No need to go deeper when collecting entries and we reach a rule that we want to collect anyway. this.translateToRuleIndex(callStack); } else { // Convert all follow sets to either single symbols or their associated preferred rule and add // the result to our candidates list. for (FollowSetWithPath set: followSets.sets) { LinkedList fullPath = new LinkedList<>(callStack); fullPath.addAll(set.path); if (!this.translateToRuleIndex(fullPath)) { for (int symbol : set.intervals.toList()) { if (!this.ignoredTokens.contains(symbol)) { if (showDebugOutput && logger.isLoggable(Level.FINE)) { logger.fine("=====> collected: " + this.vocabulary.getDisplayName(symbol)); } if (!this.candidates.tokens.containsKey(symbol)) this.candidates.tokens.put(symbol, set.following); // Following is empty if there is more than one entry in the set. else { // More than one following list for the same symbol. if (!this.candidates.tokens.get(symbol).equals(set.following)) { // XXX js uses != this.candidates.tokens.put(symbol, new LinkedList()); } } } else { logger.fine("====> collection: Ignoring token: " + symbol); } } } } } callStack.removeLast(); return result; } else { // Process the rule if we either could pass it without consuming anything (epsilon transition) // or if the current input symbol will be matched somewhere after this entry point. // Otherwise stop here. if (!followSets.combined.contains(Token.EPSILON) && !followSets.combined.contains(currentSymbol)) { callStack.removeLast(); return result; } } // The current state execution pipeline contains all yet-to-be-processed ATN states in this rule. // For each such state we store the token index + a list of rules that lead to it. LinkedList statePipeline = new LinkedList<>(); PipelineEntry currentEntry; // Bootstrap the pipeline. statePipeline.add(new PipelineEntry(startState, tokenIndex)); while (!statePipeline.isEmpty()) { currentEntry = statePipeline.removeLast(); ++this.statesProcessed; /* KJJ: Protection against run away processing */ if (this.statesProcessed > maxStates) { break; } currentSymbol = this.tokens.get(currentEntry.tokenIndex).getType(); boolean atCaret = currentEntry.tokenIndex >= this.tokens.size() - 1; if (logger.isLoggable(Level.FINE)) { printDescription(indentation, currentEntry.state, this.generateBaseDescription(currentEntry.state), currentEntry.tokenIndex); if (this.showRuleStack) { printRuleState(callStack); } } switch (currentEntry.state.getStateType()) { case ATNState.RULE_START: // Happens only for the first state in this rule, not subrules. indentation += " "; break; case ATNState.RULE_STOP: { // Record the token index we are at, to report it to the caller. result.add(currentEntry.tokenIndex); continue; } default: break; } Transition[] transitions = currentEntry.state.getTransitions(); for (Transition transition : transitions) { switch (transition.getSerializationType()) { case Transition.RULE: { Set endStatus = this.processRule(transition.target, currentEntry.tokenIndex, callStack, indentation, maxStates); for (Integer position : endStatus) { statePipeline.addLast(new PipelineEntry(((RuleTransition) transition).followState, position)); } break; } case Transition.PREDICATE: { if (this.checkPredicate((PredicateTransition)transition)) { statePipeline.addLast(new PipelineEntry(transition.target, currentEntry.tokenIndex)); } break; } case Transition.WILDCARD: { if (atCaret) { if (!this.translateToRuleIndex(callStack)) { for (Integer token : IntervalSet.of(Token.MIN_USER_TOKEN_TYPE, this.atn.maxTokenType).toList()) { if (!this.ignoredTokens.contains(token)) { this.candidates.tokens.put(token, new LinkedList()); } } } } else { statePipeline.addLast(new PipelineEntry(transition.target, currentEntry.tokenIndex + 1)); } break; } default: { if (transition.isEpsilon()) { // Jump over simple states with a single outgoing epsilon transition. statePipeline.addLast(new PipelineEntry(transition.target, currentEntry.tokenIndex)); continue; } IntervalSet set = transition.label(); if (set != null && set.size() > 0) { if (transition.getSerializationType() == Transition.NOT_SET) { set = set.complement(IntervalSet.of(Token.MIN_USER_TOKEN_TYPE, this.atn.maxTokenType)); } if (atCaret) { if (!this.translateToRuleIndex(callStack)) { List list = set.toList(); boolean addFollowing = list.size() == 1; for (Integer symbol: list) { if (!this.ignoredTokens.contains(symbol)) { if (showDebugOutput && logger.isLoggable(Level.FINE)) { logger.fine("=====> collected: " + this.vocabulary.getDisplayName(symbol)); } if (addFollowing) { this.candidates.tokens.put(symbol, this.getFollowingTokens(transition)); } else { this.candidates.tokens.put(symbol, new LinkedList<>()); } } else { logger.fine("====> collected: Ignoring token: " + symbol); } } } } else { if (set.contains(currentSymbol)) { if (showDebugOutput && logger.isLoggable(Level.FINE)) { logger.fine("=====> consumed: " + this.vocabulary.getDisplayName(currentSymbol)); } statePipeline.addLast(new PipelineEntry(transition.target, currentEntry.tokenIndex + 1)); } } } } } } } callStack.removeLast(); // Cache the result, for later lookup to avoid duplicate walks. positionMap.put(tokenIndex, result); return result; } private String[] atnStateTypeMap = new String[] { "invalid", "basic", "rule start", "block start", "plus block start", "star block start", "token start", "rule stop", "block end", "star loop back", "star loop entry", "plus loop back", "loop end" }; private String generateBaseDescription(ATNState state) { String stateValue = (state.stateNumber == ATNState.INVALID_STATE_NUMBER) ? "Invalid" : Integer.toString(state.stateNumber); return "[" + stateValue + " " + this.atnStateTypeMap[state.getStateType()] + "] in " + this.ruleNames[state.ruleIndex]; } private void printDescription(String currentIndent, ATNState state, String baseDescription, int tokenIndex) { StringBuilder output = new StringBuilder(currentIndent); StringBuilder transitionDescription = new StringBuilder(); if (this.debugOutputWithTransitions && logger.isLoggable(Level.FINER)) { for (Transition transition: state.getTransitions()) { StringBuilder labels = new StringBuilder(); List symbols = (transition.label() != null) ? transition.label().toList() : new LinkedList<>(); if (symbols.size() > 2) { // Only print start and end symbols to avoid large lists in debug output. labels.append(this.vocabulary.getDisplayName(symbols.get(0)) + " .. " + this.vocabulary.getDisplayName(symbols.get(symbols.size() - 1))); } else { for (Integer symbol: symbols) { if (labels.length() > 0) { labels.append(", "); } labels.append(this.vocabulary.getDisplayName(symbol)); } } if (labels.length() == 0) { labels.append("ε"); } transitionDescription. append("\n"). append(currentIndent). append("\t("). append(labels). append(") ["). append(transition.target.stateNumber). append(" "). append(this.atnStateTypeMap[transition.target.getStateType()]). append("] in "). append(this.ruleNames[transition.target.ruleIndex]); } if (tokenIndex >= this.tokens.size() - 1) { output.append("<<").append(this.tokenStartIndex + tokenIndex).append(">> "); } else { output.append("<").append(this.tokenStartIndex + tokenIndex).append("> "); } logger.finer(output + "Current state: " + baseDescription + transitionDescription); } } private void printRuleState(LinkedList stack) { if (stack.isEmpty()) { logger.fine(""); return; } if (logger.isLoggable(Level.FINER)) { StringBuilder sb = new StringBuilder(); for (Integer rule : stack) { sb.append(" ").append(this.ruleNames[rule]).append("\n"); } logger.log(Level.FINER, sb.toString()); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy