Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.avro.io.parsing;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Set;
import org.apache.avro.Schema;
/**
* Symbol is the base of all symbols (terminals and non-terminals) of the
* grammar.
*/
public abstract class Symbol {
/*
* The type of symbol.
*/
public enum Kind {
/** terminal symbols which have no productions */
TERMINAL,
/** Start symbol for some grammar */
ROOT,
/** non-terminal symbol which is a sequence of one or more other symbols */
SEQUENCE,
/** non-terminal to represent the contents of an array or map */
REPEATER,
/** non-terminal to represent the union */
ALTERNATIVE,
/** non-terminal action symbol which are automatically consumed */
IMPLICIT_ACTION,
/** non-terminal action symbol which is explicitly consumed */
EXPLICIT_ACTION
};
/// The kind of this symbol.
public final Kind kind;
/**
* The production for this symbol. If this symbol is a terminal this is
* null. Otherwise this holds the the sequence of the symbols that
* forms the production for this symbol. The sequence is in the reverse order of
* production. This is useful for easy copying onto parsing stack.
*
* Please note that this is a final. So the production for a symbol should be
* known before that symbol is constructed. This requirement cannot be met for
* those symbols which are recursive (e.g. a record that holds union a branch of
* which is the record itself). To resolve this problem, we initialize the
* symbol with an array of nulls. Later we fill the symbols. Not clean, but
* works. The other option is to not have this field a final. But keeping it
* final and thus keeping symbol immutable gives some comfort. See various
* generators how we generate records.
*/
public final Symbol[] production;
/**
* Constructs a new symbol of the given kind kind.
*/
protected Symbol(Kind kind) {
this(kind, null);
}
protected Symbol(Kind kind, Symbol[] production) {
this.production = production;
this.kind = kind;
}
/**
* A convenience method to construct a root symbol.
*/
static Symbol root(Symbol... symbols) {
return new Root(symbols);
}
/**
* A convenience method to construct a sequence.
*
* @param production The constituent symbols of the sequence.
*/
static Symbol seq(Symbol... production) {
return new Sequence(production);
}
/**
* A convenience method to construct a repeater.
*
* @param symsToRepeat The symbols to repeat in the repeater.
*/
static Symbol repeat(Symbol endSymbol, Symbol... symsToRepeat) {
return new Repeater(endSymbol, symsToRepeat);
}
/**
* A convenience method to construct a union.
*/
static Symbol alt(Symbol[] symbols, String[] labels) {
return new Alternative(symbols, labels);
}
/**
* A convenience method to construct an ErrorAction.
*
* @param e
*/
static Symbol error(String e) {
return new ErrorAction(e);
}
/**
* A convenience method to construct a ResolvingAction.
*
* @param w The writer symbol
* @param r The reader symbol
*/
static Symbol resolve(Symbol w, Symbol r) {
return new ResolvingAction(w, r);
}
private static class Fixup {
public final Symbol[] symbols;
public final int pos;
public Fixup(Symbol[] symbols, int pos) {
this.symbols = symbols;
this.pos = pos;
}
}
public Symbol flatten(Map map, Map> map2) {
return this;
}
public int flattenedSize() {
return 1;
}
/**
* Flattens the given sub-array of symbols into an sub-array of symbols. Every
* Sequence in the input are replaced by its production recursively.
* Non-Sequence symbols, they internally have other symbols those
* internal symbols also get flattened. When flattening is done, the only place
* there might be Sequence symbols is in the productions of a Repeater,
* Alternative, or the symToParse and symToSkip in a UnionAdjustAction or
* SkipAction.
*
* Why is this done? We want our parsers to be fast. If we left the grammars
* unflattened, then the parser would be constantly copying the contents of
* nested Sequence productions onto the parsing stack. Instead, because of
* flattening, we have a long top-level production with no Sequences unless the
* Sequence is absolutely needed, e.g., in the case of a Repeater or an
* Alterantive.
*
* Well, this is not exactly true when recursion is involved. Where there is a
* recursive record, that record will be "inlined" once, but any internal (ie,
* recursive) references to that record will be a Sequence for the record. That
* Sequence will not further inline itself -- it will refer to itself as a
* Sequence. The same is true for any records nested in this outer recursive
* record. Recursion is rare, and we want things to be fast in the typical case,
* which is why we do the flattening optimization.
*
*
* The algorithm does a few tricks to handle recursive symbol definitions. In
* order to avoid infinite recursion with recursive symbols, we have a map of
* Symbol->Symbol. Before fully constructing a flattened symbol for a
* Sequence we insert an empty output symbol into the map and then
* start filling the production for the Sequence. If the same
* Sequence is encountered due to recursion, we simply return the
* (empty) output Sequence from the map. Then we actually fill out
* the production for the Sequence. As part of the flattening process
* we copy the production of Sequences into larger arrays. If the
* original Sequence has not not be fully constructed yet, we copy a
* bunch of nulls. Fix-up remembers all those null patches.
* The fix-ups gets finally filled when we know the symbols to occupy those
* patches.
*
* @param in The array of input symbols to flatten
* @param start The position where the input sub-array starts.
* @param out The output that receives the flattened list of symbols. The
* output array should have sufficient space to receive the
* expanded sub-array of symbols.
* @param skip The position where the output input sub-array starts.
* @param map A map of symbols which have already been expanded. Useful for
* handling recursive definitions and for caching.
* @param map2 A map to to store the list of fix-ups.
*/
static void flatten(Symbol[] in, int start, Symbol[] out, int skip, Map map,
Map> map2) {
for (int i = start, j = skip; i < in.length; i++) {
Symbol s = in[i].flatten(map, map2);
if (s instanceof Sequence) {
Symbol[] p = s.production;
List l = map2.get(s);
if (l == null) {
System.arraycopy(p, 0, out, j, p.length);
// Copy any fixups that will be applied to p to add missing symbols
for (List fixups : map2.values()) {
copyFixups(fixups, out, j, p);
}
} else {
l.add(new Fixup(out, j));
}
j += p.length;
} else {
out[j++] = s;
}
}
}
private static void copyFixups(List fixups, Symbol[] out, int outPos, Symbol[] toCopy) {
for (int i = 0, n = fixups.size(); i < n; i += 1) {
Fixup fixup = fixups.get(i);
if (fixup.symbols == toCopy) {
fixups.add(new Fixup(out, fixup.pos + outPos));
}
}
}
/**
* Returns the amount of space required to flatten the given sub-array of
* symbols.
*
* @param symbols The array of input symbols.
* @param start The index where the subarray starts.
* @return The number of symbols that will be produced if one expands the given
* input.
*/
protected static int flattenedSize(Symbol[] symbols, int start) {
int result = 0;
for (int i = start; i < symbols.length; i++) {
if (symbols[i] instanceof Sequence) {
Sequence s = (Sequence) symbols[i];
result += s.flattenedSize();
} else {
result += 1;
}
}
return result;
}
private static class Terminal extends Symbol {
private final String printName;
public Terminal(String printName) {
super(Kind.TERMINAL);
this.printName = printName;
}
@Override
public String toString() {
return printName;
}
}
public static class ImplicitAction extends Symbol {
/**
* Set to true if and only if this implicit action is a trailing
* action. That is, it is an action that follows real symbol. E.g
* {@link Symbol#DEFAULT_END_ACTION}.
*/
public final boolean isTrailing;
private ImplicitAction() {
this(false);
}
private ImplicitAction(boolean isTrailing) {
super(Kind.IMPLICIT_ACTION);
this.isTrailing = isTrailing;
}
}
protected static class Root extends Symbol {
private Root(Symbol... symbols) {
super(Kind.ROOT, makeProduction(symbols));
production[0] = this;
}
private static Symbol[] makeProduction(Symbol[] symbols) {
Symbol[] result = new Symbol[flattenedSize(symbols, 0) + 1];
flatten(symbols, 0, result, 1, new HashMap<>(), new HashMap<>());
return result;
}
}
protected static class Sequence extends Symbol implements Iterable {
private Sequence(Symbol[] productions) {
super(Kind.SEQUENCE, productions);
}
public Symbol get(int index) {
return production[index];
}
public int size() {
return production.length;
}
@Override
public Iterator iterator() {
return new Iterator() {
private int pos = production.length;
@Override
public boolean hasNext() {
return 0 < pos;
}
@Override
public Symbol next() {
if (0 < pos) {
return production[--pos];
} else {
throw new NoSuchElementException();
}
}
@Override
public void remove() {
throw new UnsupportedOperationException();
}
};
}
@Override
public Sequence flatten(Map map, Map> map2) {
Sequence result = map.get(this);
if (result == null) {
result = new Sequence(new Symbol[flattenedSize()]);
map.put(this, result);
List l = new ArrayList<>();
map2.put(result, l);
flatten(production, 0, result.production, 0, map, map2);
for (Fixup f : l) {
System.arraycopy(result.production, 0, f.symbols, f.pos, result.production.length);
}
map2.remove(result);
}
return result;
}
@Override
public final int flattenedSize() {
return flattenedSize(production, 0);
}
}
public static class Repeater extends Symbol {
public final Symbol end;
private Repeater(Symbol end, Symbol... sequenceToRepeat) {
super(Kind.REPEATER, makeProduction(sequenceToRepeat));
this.end = end;
production[0] = this;
}
private static Symbol[] makeProduction(Symbol[] p) {
Symbol[] result = new Symbol[p.length + 1];
System.arraycopy(p, 0, result, 1, p.length);
return result;
}
@Override
public Repeater flatten(Map map, Map> map2) {
Repeater result = new Repeater(end, new Symbol[flattenedSize(production, 1)]);
flatten(production, 1, result.production, 1, map, map2);
return result;
}
}
/**
* Returns true if the Parser contains any Error symbol, indicating that it may
* fail for some inputs.
*/
public static boolean hasErrors(Symbol symbol) {
return hasErrors(symbol, new HashSet<>());
}
private static boolean hasErrors(Symbol symbol, Set visited) {
// avoid infinite recursion
if (visited.contains(symbol)) {
return false;
}
visited.add(symbol);
switch (symbol.kind) {
case ALTERNATIVE:
return hasErrors(symbol, ((Alternative) symbol).symbols, visited);
case EXPLICIT_ACTION:
return false;
case IMPLICIT_ACTION:
if (symbol instanceof ErrorAction) {
return true;
}
if (symbol instanceof UnionAdjustAction) {
return hasErrors(((UnionAdjustAction) symbol).symToParse, visited);
}
return false;
case REPEATER:
Repeater r = (Repeater) symbol;
return hasErrors(r.end, visited) || hasErrors(symbol, r.production, visited);
case ROOT:
case SEQUENCE:
return hasErrors(symbol, symbol.production, visited);
case TERMINAL:
return false;
default:
throw new RuntimeException("unknown symbol kind: " + symbol.kind);
}
}
private static boolean hasErrors(Symbol root, Symbol[] symbols, Set visited) {
if (null != symbols) {
for (Symbol s : symbols) {
if (s == root) {
continue;
}
if (hasErrors(s, visited)) {
return true;
}
}
}
return false;
}
public static class Alternative extends Symbol {
public final Symbol[] symbols;
public final String[] labels;
private Alternative(Symbol[] symbols, String[] labels) {
super(Kind.ALTERNATIVE);
this.symbols = symbols;
this.labels = labels;
}
public Symbol getSymbol(int index) {
return symbols[index];
}
public String getLabel(int index) {
return labels[index];
}
public int size() {
return symbols.length;
}
public int findLabel(String label) {
if (label != null) {
for (int i = 0; i < labels.length; i++) {
if (label.equals(labels[i])) {
return i;
}
}
}
return -1;
}
@Override
public Alternative flatten(Map map, Map> map2) {
Symbol[] ss = new Symbol[symbols.length];
for (int i = 0; i < ss.length; i++) {
ss[i] = symbols[i].flatten(map, map2);
}
return new Alternative(ss, labels);
}
}
public static class ErrorAction extends ImplicitAction {
public final String msg;
private ErrorAction(String msg) {
this.msg = msg;
}
}
public static IntCheckAction intCheckAction(int size) {
return new IntCheckAction(size);
}
public static class IntCheckAction extends Symbol {
public final int size;
@Deprecated
public IntCheckAction(int size) {
super(Kind.EXPLICIT_ACTION);
this.size = size;
}
}
public static EnumAdjustAction enumAdjustAction(int rsymCount, Object[] adj) {
return new EnumAdjustAction(rsymCount, adj);
}
public static class EnumAdjustAction extends IntCheckAction {
public final boolean noAdjustments;
public final Object[] adjustments;
@Deprecated
public EnumAdjustAction(int rsymCount, Object[] adjustments) {
super(rsymCount);
this.adjustments = adjustments;
boolean noAdj = true;
if (adjustments != null) {
int count = Math.min(rsymCount, adjustments.length);
noAdj = (adjustments.length <= rsymCount);
for (int i = 0; noAdj && i < count; i++)
noAdj &= ((adjustments[i] instanceof Integer) && i == (Integer) adjustments[i]);
}
this.noAdjustments = noAdj;
}
}
public static WriterUnionAction writerUnionAction() {
return new WriterUnionAction();
}
public static class WriterUnionAction extends ImplicitAction {
private WriterUnionAction() {
}
}
public static class ResolvingAction extends ImplicitAction {
public final Symbol writer;
public final Symbol reader;
private ResolvingAction(Symbol writer, Symbol reader) {
this.writer = writer;
this.reader = reader;
}
@Override
public ResolvingAction flatten(Map map, Map> map2) {
return new ResolvingAction(writer.flatten(map, map2), reader.flatten(map, map2));
}
}
public static SkipAction skipAction(Symbol symToSkip) {
return new SkipAction(symToSkip);
}
public static class SkipAction extends ImplicitAction {
public final Symbol symToSkip;
@Deprecated
public SkipAction(Symbol symToSkip) {
super(true);
this.symToSkip = symToSkip;
}
@Override
public SkipAction flatten(Map map, Map> map2) {
return new SkipAction(symToSkip.flatten(map, map2));
}
}
public static FieldAdjustAction fieldAdjustAction(int rindex, String fname, Set aliases) {
return new FieldAdjustAction(rindex, fname, aliases);
}
public static class FieldAdjustAction extends ImplicitAction {
public final int rindex;
public final String fname;
public final Set aliases;
@Deprecated
public FieldAdjustAction(int rindex, String fname, Set aliases) {
this.rindex = rindex;
this.fname = fname;
this.aliases = aliases;
}
}
public static FieldOrderAction fieldOrderAction(Schema.Field[] fields) {
return new FieldOrderAction(fields);
}
public static final class FieldOrderAction extends ImplicitAction {
public final boolean noReorder;
public final Schema.Field[] fields;
@Deprecated
public FieldOrderAction(Schema.Field[] fields) {
this.fields = fields;
boolean noReorder = true;
for (int i = 0; noReorder && i < fields.length; i++)
noReorder &= (i == fields[i].pos());
this.noReorder = noReorder;
}
}
public static DefaultStartAction defaultStartAction(byte[] contents) {
return new DefaultStartAction(contents);
}
public static class DefaultStartAction extends ImplicitAction {
public final byte[] contents;
@Deprecated
public DefaultStartAction(byte[] contents) {
this.contents = contents;
}
}
public static UnionAdjustAction unionAdjustAction(int rindex, Symbol sym) {
return new UnionAdjustAction(rindex, sym);
}
public static class UnionAdjustAction extends ImplicitAction {
public final int rindex;
public final Symbol symToParse;
@Deprecated
public UnionAdjustAction(int rindex, Symbol symToParse) {
this.rindex = rindex;
this.symToParse = symToParse;
}
@Override
public UnionAdjustAction flatten(Map map, Map> map2) {
return new UnionAdjustAction(rindex, symToParse.flatten(map, map2));
}
}
/** For JSON. */
public static EnumLabelsAction enumLabelsAction(List symbols) {
return new EnumLabelsAction(symbols);
}
public static class EnumLabelsAction extends IntCheckAction {
public final List symbols;
@Deprecated
public EnumLabelsAction(List symbols) {
super(symbols.size());
this.symbols = symbols;
}
public String getLabel(int n) {
return symbols.get(n);
}
public int findLabel(String l) {
if (l != null) {
for (int i = 0; i < symbols.size(); i++) {
if (l.equals(symbols.get(i))) {
return i;
}
}
}
return -1;
}
}
/**
* The terminal symbols for the grammar.
*/
public static final Symbol NULL = new Symbol.Terminal("null");
public static final Symbol BOOLEAN = new Symbol.Terminal("boolean");
public static final Symbol INT = new Symbol.Terminal("int");
public static final Symbol LONG = new Symbol.Terminal("long");
public static final Symbol FLOAT = new Symbol.Terminal("float");
public static final Symbol DOUBLE = new Symbol.Terminal("double");
public static final Symbol STRING = new Symbol.Terminal("string");
public static final Symbol BYTES = new Symbol.Terminal("bytes");
public static final Symbol FIXED = new Symbol.Terminal("fixed");
public static final Symbol ENUM = new Symbol.Terminal("enum");
public static final Symbol UNION = new Symbol.Terminal("union");
public static final Symbol ARRAY_START = new Symbol.Terminal("array-start");
public static final Symbol ARRAY_END = new Symbol.Terminal("array-end");
public static final Symbol MAP_START = new Symbol.Terminal("map-start");
public static final Symbol MAP_END = new Symbol.Terminal("map-end");
public static final Symbol ITEM_END = new Symbol.Terminal("item-end");
public static final Symbol WRITER_UNION_ACTION = writerUnionAction();
/* a pseudo terminal used by parsers */
public static final Symbol FIELD_ACTION = new Symbol.Terminal("field-action");
public static final Symbol RECORD_START = new ImplicitAction(false);
public static final Symbol RECORD_END = new ImplicitAction(true);
public static final Symbol UNION_END = new ImplicitAction(true);
public static final Symbol FIELD_END = new ImplicitAction(true);
public static final Symbol DEFAULT_END_ACTION = new ImplicitAction(true);
public static final Symbol MAP_KEY_MARKER = new Symbol.Terminal("map-key-marker");
}