io.cdap.wrangler.parser.RecipeVisitor Maven / Gradle / Ivy
/*
* Copyright © 2017-2019 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package io.cdap.wrangler.parser;
import io.cdap.wrangler.api.LazyNumber;
import io.cdap.wrangler.api.RecipeSymbol;
import io.cdap.wrangler.api.SourceInfo;
import io.cdap.wrangler.api.Triplet;
import io.cdap.wrangler.api.parser.Bool;
import io.cdap.wrangler.api.parser.BoolList;
import io.cdap.wrangler.api.parser.ColumnName;
import io.cdap.wrangler.api.parser.ColumnNameList;
import io.cdap.wrangler.api.parser.DirectiveName;
import io.cdap.wrangler.api.parser.Expression;
import io.cdap.wrangler.api.parser.Identifier;
import io.cdap.wrangler.api.parser.Numeric;
import io.cdap.wrangler.api.parser.NumericList;
import io.cdap.wrangler.api.parser.Properties;
import io.cdap.wrangler.api.parser.Ranges;
import io.cdap.wrangler.api.parser.Text;
import io.cdap.wrangler.api.parser.TextList;
import io.cdap.wrangler.api.parser.Token;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.TerminalNode;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* This class RecipeVisitor
implements the visitor pattern
* used during traversal of the AST tree. The ParserTree#Walker
* invokes appropriate methods as call backs with information about the node.
*
* In order to understand what's being invoked, please look at the grammar file
* Directive.g4
.
*
* This class exposes a getTokenGroups
method for retrieving the
* RecipeSymbol
after visiting. The RecipeSymbol
represents
* all the TokenGroup
for all directives in a recipe. Each directive
* will create a TokenGroup
*
* As the ParseTree
is walking through the call graph, it generates
* one TokenGroup
for each directive in the recipe. Each TokenGroup
* contains parsed Tokens
for that directive along with more information like
* SourceInfo
. A collection of TokenGroup
consistutes a RecipeSymbol
* that is returned by this function.
*/
public final class RecipeVisitor extends DirectivesBaseVisitor {
private RecipeSymbol.Builder builder = new RecipeSymbol.Builder();
/**
* Returns a RecipeSymbol
for the recipe being parsed. This
* object has all the tokens that were successfully parsed along with source
* information for each directive in the recipe.
*
* @return An compiled object after parsing the recipe.
*/
public RecipeSymbol getCompiledUnit() {
return builder.build();
}
/**
* A Recipe is made up of Directives and Directives is made up of each individual
* Directive. This method is invoked on every visit to a new directive in the recipe.
*/
@Override
public RecipeSymbol.Builder visitDirective(DirectivesParser.DirectiveContext ctx) {
builder.createTokenGroup(getOriginalSource(ctx));
return super.visitDirective(ctx);
}
/**
* A Directive can include identifiers, this method extracts that token that is being
* identified as token of type Identifier
.
*/
@Override
public RecipeSymbol.Builder visitIdentifier(DirectivesParser.IdentifierContext ctx) {
builder.addToken(new Identifier(ctx.Identifier().getText()));
return super.visitIdentifier(ctx);
}
/**
* A Directive can include properties (which are a collection of key and value pairs),
* this method extracts that token that is being identified as token of type Properties
.
*/
@Override
public RecipeSymbol.Builder visitPropertyList(DirectivesParser.PropertyListContext ctx) {
Map props = new HashMap<>();
List properties = ctx.property();
for (DirectivesParser.PropertyContext property : properties) {
String identifier = property.Identifier().getText();
Token token;
if (property.number() != null) {
token = new Numeric(new LazyNumber(property.number().getText()));
} else if (property.bool() != null) {
token = new Bool(Boolean.valueOf(property.bool().getText()));
} else {
String text = property.text().getText();
token = new Text(text.substring(1, text.length() - 1));
}
props.put(identifier, token);
}
builder.addToken(new Properties(props));
return builder;
}
/**
* A Pragma is an instruction to the compiler to dynamically load the directives being specified
* from the DirectiveRegistry
. These do not affect the data flow.
*
* E.g. #pragma load-directives test1, test2, test3;
will collect the tokens
* test1, test2 and test3 as dynamically loadable directives.
*/
@Override
public RecipeSymbol.Builder visitPragmaLoadDirective(DirectivesParser.PragmaLoadDirectiveContext ctx) {
List identifiers = ctx.identifierList().Identifier();
for (TerminalNode identifier : identifiers) {
builder.addLoadableDirective(identifier.getText());
}
return builder;
}
/**
* A Pragma version is a informational directive to notify compiler about the grammar that is should
* be using to parse the directives below.
*/
@Override
public RecipeSymbol.Builder visitPragmaVersion(DirectivesParser.PragmaVersionContext ctx) {
builder.addVersion(ctx.Number().getText());
return builder;
}
/**
* A Directive can include number ranges like start:end=value[,start:end=value]*. This
* visitor method allows you to collect all the number ranges and create a token type
* Ranges
.
*/
@Override
public RecipeSymbol.Builder visitNumberRanges(DirectivesParser.NumberRangesContext ctx) {
List> output = new ArrayList<>();
List ranges = ctx.numberRange();
for (DirectivesParser.NumberRangeContext range : ranges) {
List numbers = range.Number();
String text = range.value().getText();
if (text.startsWith("'") && text.endsWith("'")) {
text = text.substring(1, text.length() - 1);
}
Triplet val =
new Triplet<>(new Numeric(new LazyNumber(numbers.get(0).getText())),
new Numeric(new LazyNumber(numbers.get(1).getText())),
text
);
output.add(val);
}
builder.addToken(new Ranges(output));
return builder;
}
/**
* This visitor method extracts the custom directive name specified. The custom
* directives are specified with a bang (!) at the start.
*/
@Override
public RecipeSymbol.Builder visitEcommand(DirectivesParser.EcommandContext ctx) {
builder.addToken(new DirectiveName(ctx.Identifier().getText()));
return builder;
}
/**
* A Directive can consist of column specifiers. These are columns that the directive
* would operate on. When a token of type column is visited, it would generate a token
* type of type ColumnName
.
*/
@Override
public RecipeSymbol.Builder visitColumn(DirectivesParser.ColumnContext ctx) {
builder.addToken(new ColumnName(ctx.Column().getText().substring(1)));
return builder;
}
/**
* A Directive can consist of text field. These type of fields are enclosed within
* a single-quote or a double-quote. This visitor method extracts the string value
* within the quotes and creates a token type Text
.
*/
@Override
public RecipeSymbol.Builder visitText(DirectivesParser.TextContext ctx) {
String value = ctx.String().getText();
builder.addToken(new Text(value.substring(1, value.length() - 1)));
return builder;
}
/**
* A Directive can consist of numeric field. This visitor method extracts the
* numeric value Numeric
.
*/
@Override
public RecipeSymbol.Builder visitNumber(DirectivesParser.NumberContext ctx) {
LazyNumber number = new LazyNumber(ctx.Number().getText());
builder.addToken(new Numeric(number));
return builder;
}
/**
* A Directive can consist of Bool field. The Bool field is represented as
* either true or false. This visitor method extract the bool value into a
* token type Bool
.
*/
@Override
public RecipeSymbol.Builder visitBool(DirectivesParser.BoolContext ctx) {
builder.addToken(new Bool(Boolean.valueOf(ctx.Bool().getText())));
return builder;
}
/**
* A Directive can include a expression or a condition to be evaluated. When
* such a token type is found, the visitor extracts the expression and generates
* a token type Expression
to be added to the TokenGroup
*/
@Override
public RecipeSymbol.Builder visitCondition(DirectivesParser.ConditionContext ctx) {
int childCount = ctx.getChildCount();
StringBuilder sb = new StringBuilder();
for (int i = 1; i < childCount - 1; ++i) {
ParseTree child = ctx.getChild(i);
sb.append(child.getText()).append(" ");
}
builder.addToken(new Expression(sb.toString()));
return builder;
}
/**
* A Directive has name and in the parsing context it's called a command.
* This visitor methods extracts the command and creates a toke type DirectiveName
*/
@Override
public RecipeSymbol.Builder visitCommand(DirectivesParser.CommandContext ctx) {
builder.addToken(new DirectiveName(ctx.Identifier().getText()));
return builder;
}
/**
* This visitor methods extracts the list of columns specified. It creates a token
* type ColumnNameList
to be added to TokenGroup
.
*/
@Override
public RecipeSymbol.Builder visitColList(DirectivesParser.ColListContext ctx) {
List columns = ctx.Column();
List names = new ArrayList<>();
for (TerminalNode column : columns) {
names.add(column.getText().substring(1));
}
builder.addToken(new ColumnNameList(names));
return builder;
}
/**
* This visitor methods extracts the list of numeric specified. It creates a token
* type NumericList
to be added to TokenGroup
.
*/
@Override
public RecipeSymbol.Builder visitNumberList(DirectivesParser.NumberListContext ctx) {
List numbers = ctx.Number();
List numerics = new ArrayList<>();
for (TerminalNode number : numbers) {
numerics.add(new LazyNumber(number.getText()));
}
builder.addToken(new NumericList(numerics));
return builder;
}
/**
* This visitor methods extracts the list of booleans specified. It creates a token
* type BoolList
to be added to TokenGroup
.
*/
@Override
public RecipeSymbol.Builder visitBoolList(DirectivesParser.BoolListContext ctx) {
List bools = ctx.Bool();
List booleans = new ArrayList<>();
for (TerminalNode bool : bools) {
booleans.add(Boolean.parseBoolean(bool.getText()));
}
builder.addToken(new BoolList(booleans));
return builder;
}
/**
* This visitor methods extracts the list of strings specified. It creates a token
* type StringList
to be added to TokenGroup
.
*/
@Override
public RecipeSymbol.Builder visitStringList(DirectivesParser.StringListContext ctx) {
List strings = ctx.String();
List strs = new ArrayList<>();
for (TerminalNode string : strings) {
String text = string.getText();
strs.add(text.substring(1, text.length() - 1));
}
builder.addToken(new TextList(strs));
return builder;
}
private SourceInfo getOriginalSource(ParserRuleContext ctx) {
int a = ctx.getStart().getStartIndex();
int b = ctx.getStop().getStopIndex();
Interval interval = new Interval(a, b);
String text = ctx.start.getInputStream().getText(interval);
int lineno = ctx.getStart().getLine();
int column = ctx.getStart().getCharPositionInLine();
return new SourceInfo(lineno, column, text);
}
}