org.antlr.v4.runtime.atn.ATNSerializer Maven / Gradle / Ivy
/*
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.misc.IntegerList;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.IntervalSet;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.Locale;
import java.util.Map;
/** This class represents a target neutral serializer for ATNs. An ATN is converted to a list of integers
* that can be converted back to and ATN. We compute the list of integers and then generate an array
* into the target language for a particular lexer or parser. Java is a special case where we must
* generate strings instead of arrays, but that is handled outside of this class.
* See {@link ATNDeserializer#encodeIntsWith16BitWords(IntegerList)} and
* {@link org.antlr.v4.codegen.model.SerializedJavaATN}.
*/
public class ATNSerializer {
public ATN atn;
private final IntegerList data = new IntegerList();
/** Note that we use a LinkedHashMap as a set to mainintain insertion order while deduplicating
entries with the same key. */
private final Map sets = new LinkedHashMap<>();
private final IntegerList nonGreedyStates = new IntegerList();
private final IntegerList precedenceStates = new IntegerList();
public ATNSerializer(ATN atn) {
assert atn.grammarType != null;
this.atn = atn;
}
/** Serialize state descriptors, edge descriptors, and decision→state map
* into list of ints. Likely out of date, but keeping as it could be helpful:
*
* SERIALIZED_VERSION
* UUID (2 longs)
* grammar-type, (ANTLRParser.LEXER, ...)
* max token type,
* num states,
* state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type ruleIndex optional-arg ...
* num rules,
* rule-1-start-state rule-1-args, rule-2-start-state rule-2-args, ...
* (args are token type,actionIndex in lexer else 0,0)
* num modes,
* mode-0-start-state, mode-1-start-state, ... (parser has 0 modes)
* num unicode-bmp-sets
* bmp-set-0-interval-count intervals, bmp-set-1-interval-count intervals, ...
* num unicode-smp-sets
* smp-set-0-interval-count intervals, smp-set-1-interval-count intervals, ...
* num total edges,
* src, trg, edge-type, edge arg1, optional edge arg2 (present always), ...
* num decisions,
* decision-0-start-state, decision-1-start-state, ...
*
* Convenient to pack into unsigned shorts to make as Java string.
*/
public IntegerList serialize() {
addPreamble();
int nedges = addEdges();
addNonGreedyStates();
addPrecedenceStates();
addRuleStatesAndLexerTokenTypes();
addModeStartStates();
Map setIndices = null;
setIndices = addSets();
addEdges(nedges, setIndices);
addDecisionStartStates();
addLexerActions();
return data;
}
private void addPreamble() {
data.add(ATNDeserializer.SERIALIZED_VERSION);
// convert grammar type to ATN const to avoid dependence on ANTLRParser
data.add(atn.grammarType.ordinal());
data.add(atn.maxTokenType);
}
private void addLexerActions() {
if (atn.grammarType == ATNType.LEXER) {
data.add(atn.lexerActions.length);
for (LexerAction action : atn.lexerActions) {
data.add(action.getActionType().ordinal());
switch (action.getActionType()) {
case CHANNEL:
int channel = ((LexerChannelAction)action).getChannel();
data.add(channel);
data.add(0);
break;
case CUSTOM:
int ruleIndex = ((LexerCustomAction)action).getRuleIndex();
int actionIndex = ((LexerCustomAction)action).getActionIndex();
data.add(ruleIndex);
data.add(actionIndex);
break;
case MODE:
int mode = ((LexerModeAction)action).getMode();
data.add(mode);
data.add(0);
break;
case MORE:
data.add(0);
data.add(0);
break;
case POP_MODE:
data.add(0);
data.add(0);
break;
case PUSH_MODE:
mode = ((LexerPushModeAction)action).getMode();
data.add(mode);
data.add(0);
break;
case SKIP:
data.add(0);
data.add(0);
break;
case TYPE:
int type = ((LexerTypeAction)action).getType();
data.add(type);
data.add(0);
break;
default:
String message = String.format(Locale.getDefault(), "The specified lexer action type %s is not valid.", action.getActionType());
throw new IllegalArgumentException(message);
}
}
}
}
private void addDecisionStartStates() {
int ndecisions = atn.decisionToState.size();
data.add(ndecisions);
for (DecisionState decStartState : atn.decisionToState) {
data.add(decStartState.stateNumber);
}
}
private void addEdges(int nedges, Map setIndices) {
data.add(nedges);
for (ATNState s : atn.states) {
if ( s==null ) {
// might be optimized away
continue;
}
if (s.getStateType() == ATNState.RULE_STOP) {
continue;
}
for (int i=0; i addSets() {
serializeSets(data, sets.keySet());
Map setIndices = new HashMap<>();
int setIndex = 0;
for (IntervalSet s : sets.keySet()) {
setIndices.put(s, setIndex++);
}
return setIndices;
}
private void addModeStartStates() {
int nmodes = atn.modeToStartState.size();
data.add(nmodes);
if ( nmodes>0 ) {
for (ATNState modeStartState : atn.modeToStartState) {
data.add(modeStartState.stateNumber);
}
}
}
private void addRuleStatesAndLexerTokenTypes() {
int nrules = atn.ruleToStartState.length;
data.add(nrules);
for (int r=0; r=0; // 0 implies fragment rule, other token types > 0
data.add(atn.ruleToTokenType[r]);
}
}
}
private void addPrecedenceStates() {
data.add(precedenceStates.size());
for (int i = 0; i < precedenceStates.size(); i++) {
data.add(precedenceStates.get(i));
}
}
private void addNonGreedyStates() {
data.add(nonGreedyStates.size());
for (int i = 0; i < nonGreedyStates.size(); i++) {
data.add(nonGreedyStates.get(i));
}
}
private int addEdges() {
int nedges = 0;
data.add(atn.states.size());
for (ATNState s : atn.states) {
if ( s==null ) { // might be optimized away
data.add(ATNState.INVALID_TYPE);
continue;
}
int stateType = s.getStateType();
if (s instanceof DecisionState && ((DecisionState)s).nonGreedy) {
nonGreedyStates.add(s.stateNumber);
}
if (s instanceof RuleStartState && ((RuleStartState)s).isLeftRecursiveRule) {
precedenceStates.add(s.stateNumber);
}
data.add(stateType);
data.add(s.ruleIndex);
if ( s.getStateType() == ATNState.LOOP_END ) {
data.add(((LoopEndState)s).loopBackState.stateNumber);
}
else if ( s instanceof BlockStartState ) {
data.add(((BlockStartState)s).endState.stateNumber);
}
if (s.getStateType() != ATNState.RULE_STOP) {
// the deserializer can trivially derive these edges, so there's no need to serialize them
nedges += s.getNumberOfTransitions();
}
for (int i=0; i sets) {
int nSets = sets.size();
data.add(nSets);
for (IntervalSet set : sets) {
boolean containsEof = set.contains(Token.EOF);
if (containsEof && set.getIntervals().get(0).b == Token.EOF) {
data.add(set.getIntervals().size() - 1);
}
else {
data.add(set.getIntervals().size());
}
data.add(containsEof ? 1 : 0);
for (Interval I : set.getIntervals()) {
if (I.a == Token.EOF) {
if (I.b == Token.EOF) {
continue;
}
else {
data.add(0);
}
}
else {
data.add(I.a);
}
data.add(I.b);
}
}
}
public static IntegerList getSerialized(ATN atn) {
return new ATNSerializer(atn).serialize();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy