All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.cdap.wrangler.parser.RecipeVisitor Maven / Gradle / Ivy

There is a newer version: 4.10.1
Show newest version
/*
 * Copyright © 2017-2019 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package io.cdap.wrangler.parser;

import io.cdap.wrangler.api.LazyNumber;
import io.cdap.wrangler.api.RecipeSymbol;
import io.cdap.wrangler.api.SourceInfo;
import io.cdap.wrangler.api.Triplet;
import io.cdap.wrangler.api.parser.Bool;
import io.cdap.wrangler.api.parser.BoolList;
import io.cdap.wrangler.api.parser.ColumnName;
import io.cdap.wrangler.api.parser.ColumnNameList;
import io.cdap.wrangler.api.parser.DirectiveName;
import io.cdap.wrangler.api.parser.Expression;
import io.cdap.wrangler.api.parser.Identifier;
import io.cdap.wrangler.api.parser.Numeric;
import io.cdap.wrangler.api.parser.NumericList;
import io.cdap.wrangler.api.parser.Properties;
import io.cdap.wrangler.api.parser.Ranges;
import io.cdap.wrangler.api.parser.Text;
import io.cdap.wrangler.api.parser.TextList;
import io.cdap.wrangler.api.parser.Token;
import org.antlr.v4.runtime.ParserRuleContext;
import org.antlr.v4.runtime.misc.Interval;
import org.antlr.v4.runtime.tree.ParseTree;
import org.antlr.v4.runtime.tree.TerminalNode;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

/**
 * This class RecipeVisitor implements the visitor pattern
 * used during traversal of the AST tree. The ParserTree#Walker
 * invokes appropriate methods as call backs with information about the node.
 *
 * 

In order to understand what's being invoked, please look at the grammar file * Directive.g4

. * *

This class exposes a getTokenGroups method for retrieving the * RecipeSymbol after visiting. The RecipeSymbol represents * all the TokenGroup for all directives in a recipe. Each directive * will create a TokenGroup

* *

As the ParseTree is walking through the call graph, it generates * one TokenGroup for each directive in the recipe. Each TokenGroup * contains parsed Tokens for that directive along with more information like * SourceInfo. A collection of TokenGroup consistutes a RecipeSymbol * that is returned by this function.

*/ public final class RecipeVisitor extends DirectivesBaseVisitor { private RecipeSymbol.Builder builder = new RecipeSymbol.Builder(); /** * Returns a RecipeSymbol for the recipe being parsed. This * object has all the tokens that were successfully parsed along with source * information for each directive in the recipe. * * @return An compiled object after parsing the recipe. */ public RecipeSymbol getCompiledUnit() { return builder.build(); } /** * A Recipe is made up of Directives and Directives is made up of each individual * Directive. This method is invoked on every visit to a new directive in the recipe. */ @Override public RecipeSymbol.Builder visitDirective(DirectivesParser.DirectiveContext ctx) { builder.createTokenGroup(getOriginalSource(ctx)); return super.visitDirective(ctx); } /** * A Directive can include identifiers, this method extracts that token that is being * identified as token of type Identifier. */ @Override public RecipeSymbol.Builder visitIdentifier(DirectivesParser.IdentifierContext ctx) { builder.addToken(new Identifier(ctx.Identifier().getText())); return super.visitIdentifier(ctx); } /** * A Directive can include properties (which are a collection of key and value pairs), * this method extracts that token that is being identified as token of type Properties. */ @Override public RecipeSymbol.Builder visitPropertyList(DirectivesParser.PropertyListContext ctx) { Map props = new HashMap<>(); List properties = ctx.property(); for (DirectivesParser.PropertyContext property : properties) { String identifier = property.Identifier().getText(); Token token; if (property.number() != null) { token = new Numeric(new LazyNumber(property.number().getText())); } else if (property.bool() != null) { token = new Bool(Boolean.valueOf(property.bool().getText())); } else { String text = property.text().getText(); token = new Text(text.substring(1, text.length() - 1)); } props.put(identifier, token); } builder.addToken(new Properties(props)); return builder; } /** * A Pragma is an instruction to the compiler to dynamically load the directives being specified * from the DirectiveRegistry. These do not affect the data flow. * *

E.g. #pragma load-directives test1, test2, test3; will collect the tokens * test1, test2 and test3 as dynamically loadable directives.

*/ @Override public RecipeSymbol.Builder visitPragmaLoadDirective(DirectivesParser.PragmaLoadDirectiveContext ctx) { List identifiers = ctx.identifierList().Identifier(); for (TerminalNode identifier : identifiers) { builder.addLoadableDirective(identifier.getText()); } return builder; } /** * A Pragma version is a informational directive to notify compiler about the grammar that is should * be using to parse the directives below. */ @Override public RecipeSymbol.Builder visitPragmaVersion(DirectivesParser.PragmaVersionContext ctx) { builder.addVersion(ctx.Number().getText()); return builder; } /** * A Directive can include number ranges like start:end=value[,start:end=value]*. This * visitor method allows you to collect all the number ranges and create a token type * Ranges. */ @Override public RecipeSymbol.Builder visitNumberRanges(DirectivesParser.NumberRangesContext ctx) { List> output = new ArrayList<>(); List ranges = ctx.numberRange(); for (DirectivesParser.NumberRangeContext range : ranges) { List numbers = range.Number(); String text = range.value().getText(); if (text.startsWith("'") && text.endsWith("'")) { text = text.substring(1, text.length() - 1); } Triplet val = new Triplet<>(new Numeric(new LazyNumber(numbers.get(0).getText())), new Numeric(new LazyNumber(numbers.get(1).getText())), text ); output.add(val); } builder.addToken(new Ranges(output)); return builder; } /** * This visitor method extracts the custom directive name specified. The custom * directives are specified with a bang (!) at the start. */ @Override public RecipeSymbol.Builder visitEcommand(DirectivesParser.EcommandContext ctx) { builder.addToken(new DirectiveName(ctx.Identifier().getText())); return builder; } /** * A Directive can consist of column specifiers. These are columns that the directive * would operate on. When a token of type column is visited, it would generate a token * type of type ColumnName. */ @Override public RecipeSymbol.Builder visitColumn(DirectivesParser.ColumnContext ctx) { builder.addToken(new ColumnName(ctx.Column().getText().substring(1))); return builder; } /** * A Directive can consist of text field. These type of fields are enclosed within * a single-quote or a double-quote. This visitor method extracts the string value * within the quotes and creates a token type Text. */ @Override public RecipeSymbol.Builder visitText(DirectivesParser.TextContext ctx) { String value = ctx.String().getText(); builder.addToken(new Text(value.substring(1, value.length() - 1))); return builder; } /** * A Directive can consist of numeric field. This visitor method extracts the * numeric value Numeric. */ @Override public RecipeSymbol.Builder visitNumber(DirectivesParser.NumberContext ctx) { LazyNumber number = new LazyNumber(ctx.Number().getText()); builder.addToken(new Numeric(number)); return builder; } /** * A Directive can consist of Bool field. The Bool field is represented as * either true or false. This visitor method extract the bool value into a * token type Bool. */ @Override public RecipeSymbol.Builder visitBool(DirectivesParser.BoolContext ctx) { builder.addToken(new Bool(Boolean.valueOf(ctx.Bool().getText()))); return builder; } /** * A Directive can include a expression or a condition to be evaluated. When * such a token type is found, the visitor extracts the expression and generates * a token type Expression to be added to the TokenGroup */ @Override public RecipeSymbol.Builder visitCondition(DirectivesParser.ConditionContext ctx) { int childCount = ctx.getChildCount(); StringBuilder sb = new StringBuilder(); for (int i = 1; i < childCount - 1; ++i) { ParseTree child = ctx.getChild(i); sb.append(child.getText()).append(" "); } builder.addToken(new Expression(sb.toString())); return builder; } /** * A Directive has name and in the parsing context it's called a command. * This visitor methods extracts the command and creates a toke type DirectiveName */ @Override public RecipeSymbol.Builder visitCommand(DirectivesParser.CommandContext ctx) { builder.addToken(new DirectiveName(ctx.Identifier().getText())); return builder; } /** * This visitor methods extracts the list of columns specified. It creates a token * type ColumnNameList to be added to TokenGroup. */ @Override public RecipeSymbol.Builder visitColList(DirectivesParser.ColListContext ctx) { List columns = ctx.Column(); List names = new ArrayList<>(); for (TerminalNode column : columns) { names.add(column.getText().substring(1)); } builder.addToken(new ColumnNameList(names)); return builder; } /** * This visitor methods extracts the list of numeric specified. It creates a token * type NumericList to be added to TokenGroup. */ @Override public RecipeSymbol.Builder visitNumberList(DirectivesParser.NumberListContext ctx) { List numbers = ctx.Number(); List numerics = new ArrayList<>(); for (TerminalNode number : numbers) { numerics.add(new LazyNumber(number.getText())); } builder.addToken(new NumericList(numerics)); return builder; } /** * This visitor methods extracts the list of booleans specified. It creates a token * type BoolList to be added to TokenGroup. */ @Override public RecipeSymbol.Builder visitBoolList(DirectivesParser.BoolListContext ctx) { List bools = ctx.Bool(); List booleans = new ArrayList<>(); for (TerminalNode bool : bools) { booleans.add(Boolean.parseBoolean(bool.getText())); } builder.addToken(new BoolList(booleans)); return builder; } /** * This visitor methods extracts the list of strings specified. It creates a token * type StringList to be added to TokenGroup. */ @Override public RecipeSymbol.Builder visitStringList(DirectivesParser.StringListContext ctx) { List strings = ctx.String(); List strs = new ArrayList<>(); for (TerminalNode string : strings) { String text = string.getText(); strs.add(text.substring(1, text.length() - 1)); } builder.addToken(new TextList(strs)); return builder; } private SourceInfo getOriginalSource(ParserRuleContext ctx) { int a = ctx.getStart().getStartIndex(); int b = ctx.getStop().getStopIndex(); Interval interval = new Interval(a, b); String text = ctx.start.getInputStream().getText(interval); int lineno = ctx.getStart().getLine(); int column = ctx.getStart().getCharPositionInLine(); return new SourceInfo(lineno, column, text); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy