io.cdap.directives.transformation.ColumnExpression Maven / Gradle / Ivy
/*
* Copyright © 2017-2019 Cask Data, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package io.cdap.directives.transformation;
import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.wrangler.api.Arguments;
import io.cdap.wrangler.api.Directive;
import io.cdap.wrangler.api.DirectiveExecutionException;
import io.cdap.wrangler.api.DirectiveParseException;
import io.cdap.wrangler.api.ExecutorContext;
import io.cdap.wrangler.api.Row;
import io.cdap.wrangler.api.annotations.Categories;
import io.cdap.wrangler.api.lineage.Lineage;
import io.cdap.wrangler.api.lineage.Many;
import io.cdap.wrangler.api.lineage.Mutation;
import io.cdap.wrangler.api.parser.ColumnName;
import io.cdap.wrangler.api.parser.Expression;
import io.cdap.wrangler.api.parser.TokenType;
import io.cdap.wrangler.api.parser.UsageDefinition;
import io.cdap.wrangler.expression.EL;
import io.cdap.wrangler.expression.ELContext;
import io.cdap.wrangler.expression.ELException;
import io.cdap.wrangler.expression.ELResult;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
* A directive for apply an expression to store the result in a column.
*
* The expressions are specified in JEXL format (http://commons.apache.org/proper/commons-jexl/reference/syntax.html)
* Executor is response for executing only one expression for each {@link Row} record that is
* being passed. The result of expression either adds a new column or replaces the value of
* the existing column.
*
*
* Executor step = new ColumnExpression(lineno, directive, column, "if (age > 24 ) { 'adult' } else { 'teen' }");
*
*/
@Plugin(type = Directive.TYPE)
@Name(ColumnExpression.NAME)
@Categories(categories = { "transform"})
@Description("Sets a column by evaluating a JEXL expression.")
public class ColumnExpression implements Directive, Lineage {
public static final String NAME = "set-column";
// Column to which the result of experience is applied to.
private String column;
// The actual expression
private String expression;
// Properties associated with pipeline
private final Map properties = new HashMap<>();
private final EL el = new EL(new EL.DefaultFunctions());
@Override
public UsageDefinition define() {
UsageDefinition.Builder builder = UsageDefinition.builder(NAME);
builder.define("column", TokenType.COLUMN_NAME);
builder.define("expression", TokenType.EXPRESSION);
return builder.build();
}
@Override
public void initialize(Arguments args) throws DirectiveParseException {
this.column = ((ColumnName) args.value("column")).value();
this.expression = ((Expression) args.value("expression")).value();
try {
el.compile(expression);
} catch (ELException e) {
throw new DirectiveParseException(NAME, e.getMessage(), e);
}
}
@Override
public void destroy() {
// no-op
}
@Override
public List execute(List rows, ExecutorContext context) throws DirectiveExecutionException {
for (Row row : rows) {
// Move the fields from the row into the context.
ELContext ctx = new ELContext(context);
ctx.set("this", row);
for (String var : el.variables()) {
ctx.set(var, row.getValue(var));
}
// Transient variables are added.
if (context != null) {
for (String variable : context.getTransientStore().getVariables()) {
ctx.set(variable, context.getTransientStore().get(variable));
}
}
// Execution of the script / expression based on the row data
// mapped into context.
try {
ELResult result = el.execute(ctx);
int idx = row.find(this.column);
if (idx == -1) {
row.add(this.column, result.getObject());
} else {
row.setValue(idx, result.getObject());
}
} catch (ELException e) {
throw new DirectiveExecutionException(NAME, e.getMessage(), e);
}
}
return rows;
}
@Override
public Mutation lineage() {
Mutation.Builder builder = Mutation.builder()
.readable("Mapped result of expression '%s' to column '%s'", expression, column);
builder.relation(Many.of(el.variables()), column);
el.variables().forEach(col -> {
if (!col.equals(column)) {
builder.relation(col, col);
}
});
return builder.build();
}
}