All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
io.cdap.wrangler.parser.SimpleTextParser Maven / Gradle / Ivy
/*
* Copyright © 2016-2019 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package io.cdap.wrangler.parser;
import io.cdap.wrangler.api.Directive;
import io.cdap.wrangler.api.DirectiveContext;
import io.cdap.wrangler.api.DirectiveParseException;
import io.cdap.wrangler.api.Executor;
import io.cdap.wrangler.api.RecipeParser;
import io.cdap.wrangler.api.RecipePipeline;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;
/**
* Parses the DSL into specification containing stepRegistry for wrangling.
*
* Following are some of the commands and format that {@link SimpleTextParser}
* will handle.
*/
public class SimpleTextParser implements RecipeParser {
private static final Logger LOG = LoggerFactory.getLogger(SimpleTextParser.class);
// directives for wrangling.
private String[] directives;
// Usage Registry
private static final UsageRegistry usageRegistry = new UsageRegistry();
// Specifies the context for directive parsing.
private DirectiveContext context;
public SimpleTextParser(String[] directives) {
this.directives = directives;
this.context = new NoOpDirectiveContext();
}
public SimpleTextParser(String directives) {
this(directives.split("\n"));
}
public SimpleTextParser(List directives) {
this(directives.toArray(new String[directives.size()]));
}
/**
* Parses the DSL to generate a sequence of stepRegistry to be executed by {@link RecipePipeline}.
*
* The transformation parsing here needs a better solution. It has many limitations and having different way would
* allow us to provide much more advanced semantics for directives.
*
* @return List of stepRegistry to be executed.
* @throws ParseException
*/
@Override
public List parse() throws DirectiveParseException {
List directives = new ArrayList<>();
// Split directive by EOL
int lineno = 1;
// Iterate through each directive and create necessary stepRegistry.
for (String directive : this.directives) {
directive = directive.trim();
if (directive.isEmpty() || directive.startsWith("//") || directive.startsWith("#")) {
continue;
}
StringTokenizer tokenizer = new StringTokenizer(directive, " ");
String command = tokenizer.nextToken();
// Check if a directive has been aliased and if it's aliased then retrieve root command it's mapped
// to.
String root = command;
if (context.hasAlias(root)) {
root = context.getAlias(command);
}
// Checks if the directive has been excluded from being used.
if (!root.equals(command) && context.isExcluded(command)) {
throw new DirectiveParseException(
command, String.format("Aliased directive '%s' has been configured as restricted directive and "
+ "is hence unavailable. Please contact your administrator", command)
);
}
if (context.isExcluded(root)) {
throw new DirectiveParseException(
command, String.format("Executor '%s' has been configured as restricted directive and is hence " +
"unavailable. Please contact your administrator", command));
}
}
return directives;
}
// If there are more tokens, then it proceeds with parsing, else throws exception.
public static String getNextToken(StringTokenizer tokenizer, String directive,
String field, int lineno) throws DirectiveParseException {
return getNextToken(tokenizer, null, directive, field, lineno, false);
}
public static String getNextToken(StringTokenizer tokenizer, String delimiter,
String directive, String field, int lineno) throws DirectiveParseException {
return getNextToken(tokenizer, delimiter, directive, field, lineno, false);
}
public static String getNextToken(StringTokenizer tokenizer, String delimiter,
String directive, String field, int lineno, boolean optional)
throws DirectiveParseException {
String value = null;
if (tokenizer.hasMoreTokens()) {
if (delimiter == null) {
value = tokenizer.nextToken().trim();
} else {
value = tokenizer.nextToken(delimiter).trim();
}
} else {
if (!optional) {
String usage = usageRegistry.getUsage(directive);
throw new DirectiveParseException(
directive, String.format("Missing field '%s' at line number %d (usage: %s)", field, lineno, usage));
}
}
return value;
}
/**
* Initialises the directive with a {@link DirectiveContext}.
*
* @param context
*/
@Override
public void initialize(DirectiveContext context) {
if (context == null) {
this.context = new NoOpDirectiveContext();
} else {
this.context = context;
}
}
}