com.exasol.adapter.document.UdfCallBuilder Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of virtual-schema-common-document Show documentation
Show all versions of virtual-schema-common-document Show documentation
Common module of Exasol Virtual Schema Adapters for Document Data Sources.
The newest version!
package com.exasol.adapter.document;
import static com.exasol.sql.expression.ExpressionTerm.column;
import static com.exasol.sql.expression.ExpressionTerm.stringLiteral;
import static com.exasol.utils.StringSerializer.serializeToString;
import java.io.IOException;
import java.util.Comparator;
import java.util.List;
import java.util.logging.Logger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import com.exasol.adapter.document.documentfetcher.DocumentFetcher;
import com.exasol.adapter.document.mapping.ColumnMapping;
import com.exasol.adapter.document.mapping.SchemaMappingRequest;
import com.exasol.adapter.document.queryplan.*;
import com.exasol.adapter.document.queryplanning.RemoteTableQuery;
import com.exasol.adapter.document.querypredicate.*;
import com.exasol.adapter.metadata.DataType;
import com.exasol.datatype.type.*;
import com.exasol.datatype.type.Boolean;
import com.exasol.errorreporting.ExaError;
import com.exasol.sql.*;
import com.exasol.sql.dql.select.Select;
import com.exasol.sql.dql.select.rendering.SelectRenderer;
import com.exasol.sql.expression.BooleanExpression;
import com.exasol.sql.expression.function.exasol.CastExasolFunction;
import com.exasol.sql.expression.literal.BooleanLiteral;
import com.exasol.sql.expression.literal.NullLiteral;
import com.exasol.sql.rendering.ColumnsDefinitionRenderer;
import com.exasol.sql.rendering.StringRendererConfig;
/**
* This class builds push down SQL statement with a UDF call to {@link GenericUdfCallHandler}.
*
*
* The push down statement consists of three cascaded statements.
*
* Consider the following example:
*
*
* SELECT COL1 FROM (
*
* SELECT UDF(PARAMS) EMITS (COL1, COL2) FROM VALUES ((v1, 1), (v2, 2), (v3, 3)) AS P1, C GROUP BY C
*
* ) WHERE COL2 = X
*
*/
public class UdfCallBuilder {
private static final Logger LOG = Logger.getLogger(UdfCallBuilder.class.getName());
private static final String DATA_LOADER_COLUMN = "DATA_LOADER";
private static final String FRAGMENT_ID_COLUMN = "FRAGMENT_ID";
private final String connectionName;
private final String adapterSchema;
private final String adapterName;
/**
* Create an instance of {@link UdfCallBuilder}.
*
* @param connectionName connectionName that is passed to the UDF
* @param adapterSchema schema of the adapter
* @param adapterName name of the adapter
*/
public UdfCallBuilder(final String connectionName, final String adapterSchema, final String adapterName) {
this.connectionName = connectionName;
this.adapterSchema = adapterSchema;
this.adapterName = adapterName;
}
/**
* Build push down SQL statement with a UDF call to {@link GenericUdfCallHandler}. Each document fetcher gets a row
* that is passed to a UDF. Since it is not possible to pass data to all UDF calls also the query is added to each
* row, even though it is the same for all rows.
*
* @param queryPlan plan for the query
* @param query document query that is passed to the UDF
* @return built SQL statement
*/
public String getUdfCallSql(final QueryPlan queryPlan, final RemoteTableQuery query) {
final List selectList = query.getSelectList();
if (queryPlan instanceof EmptyQueryPlan) {
LOG.fine("Build pushdown query for empty query plan");
final Select select = StatementFactory.getInstance().select().all();
final ValueTableRow.Builder valueTableRow = ValueTableRow.builder(select);
valueTableRow.add(selectList.stream() //
.map(this::convertColumnForNullCast) //
.collect(Collectors.toList()));
select.from().valueTable(new ValueTable(select).appendRow(valueTableRow.build()));
select.where(BooleanLiteral.of(false));
return renderStatement(select);
} else {
final FetchQueryPlan fetchPlan = (FetchQueryPlan) queryPlan;
LOG.fine("Build pushdown query for fetch query query plan");
final Select udfCallStatement = buildUdfCallStatement(query, fetchPlan);
final Select pushDownSelect = wrapStatementInStatementWithPostSelectionAndProjection(selectList,
fetchPlan.getPostSelection(), udfCallStatement);
return renderStatement(pushDownSelect);
}
}
private CastExasolFunction convertColumnForNullCast(final ColumnMapping column) {
final com.exasol.datatype.type.DataType convertedType = convertDataType(column.getExasolDataType());
LOG.fine(() -> "Using NULL CAST to " + renderDataType(convertedType) + " (" + convertedType + ") for column "
+ column.getExasolColumnName() + "(" + column.getExasolDataType() + ")");
return CastExasolFunction.of(NullLiteral.nullLiteral(), convertedType);
}
/**
* Wrap the given {@code SELECT} statement in a new {@code SELECT} statement that adds the post selection as
* {@code WHERE} clause and the projection as select {@code SELECT} clause.
*
* @implNote The post selection can't be applied directly to statement containing the UDF calls as Exasol does not
* recognize the column names correctly in the same statement.
*/
private Select wrapStatementInStatementWithPostSelectionAndProjection(final List selectList,
final QueryPredicate postSelection, final Select doubleNestedSelect) {
final Select statement = getSelectForColumns(selectList);
statement.from().select(doubleNestedSelect);
final BooleanExpression whereClause = new QueryPredicateToBooleanExpressionConverter().convert(postSelection);
statement.where(whereClause);
return statement;
}
private Select getSelectForColumns(final List selectList) {
final Select select = StatementFactory.getInstance().select();
for (final ColumnMapping columnMapping : selectList) {
select.field(columnMapping.getExasolColumnName());
}
return select;
}
/**
* Build the {@code SELECT} statement that contains the call to the UDF and distributes them using a GROUP BY
* statement.
*/
private Select buildUdfCallStatement(final RemoteTableQuery query, final FetchQueryPlan queryPlan) {
final Select udfCallSelect = StatementFactory.getInstance().select();
final List requiredColumns = getRequiredColumns(query, queryPlan);
final List emitsColumns = buildColumnDefinitions(requiredColumns, udfCallSelect);
final SchemaMappingRequest schemaMappingRequest = new SchemaMappingRequest(
query.getFromTable().getPathInRemoteTable(), requiredColumns);
final String serializedSchemaMappingRequest = serializeSchemaMappingRequest(schemaMappingRequest);
udfCallSelect.udf("\"" + this.adapterSchema + "\"." + GenericUdfCallHandler.UDF_PREFIX + this.adapterName,
new ColumnsDefinition(emitsColumns), column(DATA_LOADER_COLUMN),
stringLiteral(serializedSchemaMappingRequest), stringLiteral(this.connectionName));
final ValueTable valueTable = buildValueTable(queryPlan.getDocumentFetcher(), udfCallSelect);
udfCallSelect.from().valueTableAs(valueTable, "T", DATA_LOADER_COLUMN, FRAGMENT_ID_COLUMN);
udfCallSelect.groupBy(column(FRAGMENT_ID_COLUMN));
return udfCallSelect;
}
private List getRequiredColumns(final RemoteTableQuery query, final FetchQueryPlan queryPlan) {
final List postSelectionsColumns = new InvolvedColumnCollector()
.collectInvolvedColumns(queryPlan.getPostSelection());
return Stream.concat(postSelectionsColumns.stream(), query.getSelectList().stream()) //
.distinct() //
.sorted(Comparator.comparing(ColumnMapping::getExasolColumnName)) //
.collect(Collectors.toList());
}
private List buildColumnDefinitions(final List requiredColumns, final Select udfCallSelect) {
return requiredColumns.stream() //
.map(column -> createColumn(udfCallSelect, column)) //
.collect(Collectors.toList());
}
private Column createColumn(final Select udfCallSelect, final ColumnMapping column) {
final String columnName = column.getExasolColumnName();
final DataType type = column.getExasolDataType();
final com.exasol.datatype.type.DataType convertedType = convertDataType(type);
LOG.fine(() -> "Using type " + renderDataType(convertedType) + " (" + convertedType + ") for column "
+ columnName + " (" + type + ") / " + column);
return new Column(udfCallSelect, columnName, convertedType);
}
private ValueTable buildValueTable(final List documentFetchers, final Select select) {
final ValueTable valueTable = new ValueTable(select);
int rowCounter = 0;
for (final DocumentFetcher documentFetcher : documentFetchers) {
final String serializeDocumentFetcher = serializeDocumentFetcher(documentFetcher);
final ValueTableRow row = ValueTableRow.builder(select).add(serializeDocumentFetcher).add(rowCounter) //
.build();
valueTable.appendRow(row);
++rowCounter;
}
return valueTable;
}
private String serializeDocumentFetcher(final DocumentFetcher documentFetcher) {
try {
return serializeToString(documentFetcher);
} catch (final IOException exception) {
throw new IllegalStateException(ExaError.messageBuilder("F-VSD-19")
.message("Internal error (Failed to serialize DocumentFetcher).").ticketMitigation().toString(),
exception);
}
}
private String serializeSchemaMappingRequest(final SchemaMappingRequest schemaMappingRequest) {
try {
return serializeToString(schemaMappingRequest);
} catch (final IOException exception) {
throw new IllegalStateException(ExaError.messageBuilder("F-VSD-18")
.message("Internal error (Failed to serialize SchemaMappingRequest).").ticketMitigation()
.toString(), exception);
}
}
private com.exasol.datatype.type.DataType convertDataType(final DataType adapterDataType) {
switch (adapterDataType.getExaDataType()) {
case DECIMAL:
return new Decimal(adapterDataType.getPrecision(), adapterDataType.getScale());
case DOUBLE:
return new DoublePrecision();
case VARCHAR:
return new Varchar(adapterDataType.getSize());
case CHAR:
return new Char(adapterDataType.getSize());
case DATE:
return new Date();
case TIMESTAMP:
// We ignore "WITH LOCAL TIME ZONE" here since UDFs don't support it and we don't need to support it.
return new Timestamp();
case BOOLEAN:
return new Boolean();
default:
throw new UnsupportedOperationException(ExaError.messageBuilder("F-VSD-69")
.message("Unimplemented conversion of type {{TYPE}}.")
.parameter("TYPE", adapterDataType.getExaDataType().toString()).ticketMitigation().toString());
}
}
private String renderStatement(final Select pushDownSelect) {
final StringRendererConfig config = StringRendererConfig.builder().quoteIdentifiers(true).build();
final SelectRenderer renderer = new SelectRenderer(config);
pushDownSelect.accept(renderer);
return renderer.render();
}
private String renderDataType(final com.exasol.datatype.type.DataType type) {
final ColumnsDefinitionRenderer renderer = new ColumnsDefinitionRenderer(StringRendererConfig.createDefault());
type.accept(renderer);
return renderer.render();
}
}