org.antlr.v4.runtime.atn.ATNSerializer Maven / Gradle / Ivy
/*
* Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
* Use of this file is governed by the BSD 3-clause license that
* can be found in the LICENSE.txt file in the project root.
*/
package org.antlr.v4.runtime.atn;
import org.antlr.v4.runtime.Token;
import org.antlr.v4.runtime.misc.IntegerList;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.misc.IntervalSet;
import org.antlr.v4.runtime.misc.Utils;
import java.io.InvalidClassException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.UUID;
public class ATNSerializer {
public ATN atn;
private List tokenNames;
private interface CodePointSerializer {
void serializeCodePoint(IntegerList data, int cp);
}
public ATNSerializer(ATN atn) {
assert atn.grammarType != null;
this.atn = atn;
}
public ATNSerializer(ATN atn, List tokenNames) {
assert atn.grammarType != null;
this.atn = atn;
this.tokenNames = tokenNames;
}
/** Serialize state descriptors, edge descriptors, and decision→state map
* into list of ints:
*
* grammar-type, (ANTLRParser.LEXER, ...)
* max token type,
* num states,
* state-0-type ruleIndex, state-1-type ruleIndex, ... state-i-type ruleIndex optional-arg ...
* num rules,
* rule-1-start-state rule-1-args, rule-2-start-state rule-2-args, ...
* (args are token type,actionIndex in lexer else 0,0)
* num modes,
* mode-0-start-state, mode-1-start-state, ... (parser has 0 modes)
* num unicode-bmp-sets
* bmp-set-0-interval-count intervals, bmp-set-1-interval-count intervals, ...
* num unicode-smp-sets
* smp-set-0-interval-count intervals, smp-set-1-interval-count intervals, ...
* num total edges,
* src, trg, edge-type, edge arg1, optional edge arg2 (present always), ...
* num decisions,
* decision-0-start-state, decision-1-start-state, ...
*
* Convenient to pack into unsigned shorts to make as Java string.
*/
public IntegerList serialize() {
IntegerList data = new IntegerList();
data.add(ATNDeserializer.SERIALIZED_VERSION);
serializeUUID(data, ATNDeserializer.SERIALIZED_UUID);
// convert grammar type to ATN const to avoid dependence on ANTLRParser
data.add(atn.grammarType.ordinal());
data.add(atn.maxTokenType);
int nedges = 0;
// Note that we use a LinkedHashMap as a set to
// maintain insertion order while deduplicating
// entries with the same key.
Map sets = new LinkedHashMap<>();
// dump states, count edges and collect sets while doing so
IntegerList nonGreedyStates = new IntegerList();
IntegerList precedenceStates = new IntegerList();
data.add(atn.states.size());
for (ATNState s : atn.states) {
if ( s==null ) { // might be optimized away
data.add(ATNState.INVALID_TYPE);
continue;
}
int stateType = s.getStateType();
if (s instanceof DecisionState && ((DecisionState)s).nonGreedy) {
nonGreedyStates.add(s.stateNumber);
}
if (s instanceof RuleStartState && ((RuleStartState)s).isLeftRecursiveRule) {
precedenceStates.add(s.stateNumber);
}
data.add(stateType);
if (s.ruleIndex == -1) {
data.add(Character.MAX_VALUE);
}
else {
data.add(s.ruleIndex);
}
if ( s.getStateType() == ATNState.LOOP_END ) {
data.add(((LoopEndState)s).loopBackState.stateNumber);
}
else if ( s instanceof BlockStartState ) {
data.add(((BlockStartState)s).endState.stateNumber);
}
if (s.getStateType() != ATNState.RULE_STOP) {
// the deserializer can trivially derive these edges, so there's no need to serialize them
nedges += s.getNumberOfTransitions();
}
for (int i=0; i0 ) {
for (ATNState modeStartState : atn.modeToStartState) {
data.add(modeStartState.stateNumber);
}
}
List bmpSets = new ArrayList<>();
List smpSets = new ArrayList<>();
for (IntervalSet set : sets.keySet()) {
if (set.getMaxElement() <= Character.MAX_VALUE) {
bmpSets.add(set);
}
else {
smpSets.add(set);
}
}
serializeSets(
data,
bmpSets,
new CodePointSerializer() {
@Override
public void serializeCodePoint(IntegerList data, int cp) {
data.add(cp);
}
});
serializeSets(
data,
smpSets,
new CodePointSerializer() {
@Override
public void serializeCodePoint(IntegerList data, int cp) {
serializeInt(data, cp);
}
});
Map setIndices = new HashMap<>();
int setIndex = 0;
for (IntervalSet bmpSet : bmpSets) {
setIndices.put(bmpSet, setIndex++);
}
for (IntervalSet smpSet : smpSets) {
setIndices.put(smpSet, setIndex++);
}
data.add(nedges);
for (ATNState s : atn.states) {
if ( s==null ) {
// might be optimized away
continue;
}
if (s.getStateType() == ATNState.RULE_STOP) {
continue;
}
for (int i=0; i Character.MAX_VALUE) {
throw new UnsupportedOperationException("Serialized ATN data element "+
data.get(i)+
" element "+i+" out of range "+
(int)Character.MIN_VALUE+
".."+
(int)Character.MAX_VALUE);
}
int value = (data.get(i) + 2) & 0xFFFF;
data.set(i, value);
}
return data;
}
private static void serializeSets(
IntegerList data,
Collection sets,
CodePointSerializer codePointSerializer)
{
int nSets = sets.size();
data.add(nSets);
for (IntervalSet set : sets) {
boolean containsEof = set.contains(Token.EOF);
if (containsEof && set.getIntervals().get(0).b == Token.EOF) {
data.add(set.getIntervals().size() - 1);
}
else {
data.add(set.getIntervals().size());
}
data.add(containsEof ? 1 : 0);
for (Interval I : set.getIntervals()) {
if (I.a == Token.EOF) {
if (I.b == Token.EOF) {
continue;
}
else {
codePointSerializer.serializeCodePoint(data, 0);
}
}
else {
codePointSerializer.serializeCodePoint(data, I.a);
}
codePointSerializer.serializeCodePoint(data, I.b);
}
}
}
public String decode(char[] data) {
data = data.clone();
// don't adjust the first value since that's the version number
for (int i = 1; i < data.length; i++) {
data[i] = (char)(data[i] - 2);
}
StringBuilder buf = new StringBuilder();
int p = 0;
int version = ATNDeserializer.toInt(data[p++]);
if (version != ATNDeserializer.SERIALIZED_VERSION) {
String reason = String.format("Could not deserialize ATN with version %d (expected %d).", version, ATNDeserializer.SERIALIZED_VERSION);
throw new UnsupportedOperationException(new InvalidClassException(ATN.class.getName(), reason));
}
UUID uuid = ATNDeserializer.toUUID(data, p);
p += 8;
if (!uuid.equals(ATNDeserializer.SERIALIZED_UUID)) {
String reason = String.format(Locale.getDefault(), "Could not deserialize ATN with UUID %s (expected %s).", uuid, ATNDeserializer.SERIALIZED_UUID);
throw new UnsupportedOperationException(new InvalidClassException(ATN.class.getName(), reason));
}
p++; // skip grammarType
int maxType = ATNDeserializer.toInt(data[p++]);
buf.append("max type ").append(maxType).append("\n");
int nstates = ATNDeserializer.toInt(data[p++]);
for (int i=0; i").append(trg)
.append(" ").append(Transition.serializationNames.get(ttype))
.append(" ").append(arg1).append(",").append(arg2).append(",").append(arg3)
.append("\n");
p += 6;
}
int ndecisions = ATNDeserializer.toInt(data[p++]);
for (int i=0; i0 ) {
buf.append(", ");
}
int a = unicodeDeserializer.readUnicode(data, p);
p += unicodeDeserializer.size();
int b = unicodeDeserializer.readUnicode(data, p);
p += unicodeDeserializer.size();
buf.append(getTokenName(a)).append("..").append(getTokenName(b));
}
buf.append("\n");
}
return p;
}
public String getTokenName(int t) {
if ( t==-1 ) return "EOF";
if ( atn.grammarType == ATNType.LEXER &&
t >= Character.MIN_VALUE && t <= Character.MAX_VALUE )
{
switch (t) {
case '\n':
return "'\\n'";
case '\r':
return "'\\r'";
case '\t':
return "'\\t'";
case '\b':
return "'\\b'";
case '\f':
return "'\\f'";
case '\\':
return "'\\\\'";
case '\'':
return "'\\''";
default:
if ( Character.UnicodeBlock.of((char)t)==Character.UnicodeBlock.BASIC_LATIN &&
!Character.isISOControl((char)t) ) {
return '\''+Character.toString((char)t)+'\'';
}
// turn on the bit above max "\uFFFF" value so that we pad with zeros
// then only take last 4 digits
String hex = Integer.toHexString(t|0x10000).toUpperCase().substring(1,5);
String unicodeStr = "'\\u"+hex+"'";
return unicodeStr;
}
}
if (tokenNames != null && t >= 0 && t < tokenNames.size()) {
return tokenNames.get(t);
}
return String.valueOf(t);
}
/** Used by Java target to encode short/int array as chars in string. */
public static String getSerializedAsString(ATN atn) {
return new String(getSerializedAsChars(atn));
}
public static IntegerList getSerialized(ATN atn) {
return new ATNSerializer(atn).serialize();
}
public static char[] getSerializedAsChars(ATN atn) {
return Utils.toCharArray(getSerialized(atn));
}
public static String getDecoded(ATN atn, List tokenNames) {
IntegerList serialized = getSerialized(atn);
char[] data = Utils.toCharArray(serialized);
return new ATNSerializer(atn, tokenNames).decode(data);
}
private void serializeUUID(IntegerList data, UUID uuid) {
serializeLong(data, uuid.getLeastSignificantBits());
serializeLong(data, uuid.getMostSignificantBits());
}
private void serializeLong(IntegerList data, long value) {
serializeInt(data, (int)value);
serializeInt(data, (int)(value >> 32));
}
private void serializeInt(IntegerList data, int value) {
data.add((char)value);
data.add((char)(value >> 16));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy