Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
io.trino.plugin.pinot.query.DynamicTableBuilder Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.pinot.query;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.trino.plugin.pinot.PinotColumnHandle;
import io.trino.plugin.pinot.PinotException;
import io.trino.plugin.pinot.PinotMetadata;
import io.trino.plugin.pinot.PinotTypeConverter;
import io.trino.plugin.pinot.client.PinotClient;
import io.trino.spi.connector.ColumnHandle;
import io.trino.spi.connector.SchemaTableName;
import io.trino.spi.type.ArrayType;
import io.trino.spi.type.Type;
import org.apache.pinot.common.request.BrokerRequest;
import org.apache.pinot.common.request.PinotQuery;
import org.apache.pinot.common.request.context.ExpressionContext;
import org.apache.pinot.common.request.context.FunctionContext;
import org.apache.pinot.common.request.context.OrderByExpressionContext;
import org.apache.pinot.common.utils.DataSchema;
import org.apache.pinot.core.query.aggregation.function.AggregationFunction;
import org.apache.pinot.core.query.reduce.PostAggregationHandler;
import org.apache.pinot.core.query.request.context.QueryContext;
import org.apache.pinot.core.query.request.context.utils.QueryContextConverterUtils;
import org.apache.pinot.segment.spi.AggregationFunctionType;
import org.apache.pinot.sql.parsers.CalciteSqlCompiler;
import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.OptionalLong;
import java.util.Set;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.plugin.pinot.PinotErrorCode.PINOT_EXCEPTION;
import static io.trino.plugin.pinot.query.PinotExpressionRewriter.rewriteExpression;
import static io.trino.plugin.pinot.query.PinotPatterns.WILDCARD;
import static io.trino.plugin.pinot.query.PinotSqlFormatter.formatExpression;
import static io.trino.plugin.pinot.query.PinotSqlFormatter.formatFilter;
import static java.lang.String.format;
import static java.util.Locale.ENGLISH;
import static java.util.Objects.requireNonNull;
import static org.apache.pinot.segment.spi.AggregationFunctionType.COUNT;
import static org.apache.pinot.segment.spi.AggregationFunctionType.DISTINCTCOUNT;
import static org.apache.pinot.segment.spi.AggregationFunctionType.DISTINCTCOUNTHLL;
import static org.apache.pinot.segment.spi.AggregationFunctionType.getAggregationFunctionType;
public final class DynamicTableBuilder
{
public static final String OFFLINE_SUFFIX = "_OFFLINE";
public static final String REALTIME_SUFFIX = "_REALTIME";
private static final Set NON_NULL_ON_EMPTY_AGGREGATIONS = EnumSet.of(COUNT, DISTINCTCOUNT, DISTINCTCOUNTHLL);
private DynamicTableBuilder()
{
}
public static DynamicTable buildFromPql(PinotMetadata pinotMetadata, SchemaTableName schemaTableName, PinotClient pinotClient, PinotTypeConverter typeConverter)
{
requireNonNull(pinotMetadata, "pinotMetadata is null");
requireNonNull(schemaTableName, "schemaTableName is null");
requireNonNull(typeConverter, "typeConverter is null");
String query = schemaTableName.getTableName();
BrokerRequest request = CalciteSqlCompiler.compileToBrokerRequest(query);
PinotQuery pinotQuery = request.getPinotQuery();
QueryContext queryContext = QueryContextConverterUtils.getQueryContext(pinotQuery);
String tableName = request.getQuerySource().getTableName();
String trinoTableName = stripSuffix(tableName).toLowerCase(ENGLISH);
String pinotTableName = pinotClient.getPinotTableNameFromTrinoTableName(trinoTableName);
Optional suffix = getSuffix(tableName);
Map columnHandles = pinotMetadata.getPinotColumnHandles(trinoTableName);
List orderBy = ImmutableList.of();
PinotTypeResolver pinotTypeResolver = new PinotTypeResolver(pinotClient, typeConverter, pinotTableName);
List selectColumns = ImmutableList.of();
Map aggregateTypes = ImmutableMap.of();
if (queryContext.getAggregationFunctions() != null) {
checkState(queryContext.getAggregationFunctions().length > 0, "Aggregation Functions is empty");
aggregateTypes = getAggregateTypes(schemaTableName, queryContext, columnHandles, typeConverter);
}
if (queryContext.getSelectExpressions() != null) {
checkState(!queryContext.getSelectExpressions().isEmpty(), "Pinot selections is empty");
selectColumns = getPinotColumns(schemaTableName, queryContext.getSelectExpressions(), queryContext.getAliasList(), columnHandles, pinotTypeResolver, aggregateTypes);
}
if (queryContext.getOrderByExpressions() != null) {
ImmutableList.Builder orderByBuilder = ImmutableList.builder();
for (OrderByExpressionContext orderByExpressionContext : queryContext.getOrderByExpressions()) {
ExpressionContext expressionContext = orderByExpressionContext.getExpression();
PinotColumnHandle pinotColumnHandle = getPinotColumnHandle(schemaTableName, expressionContext, Optional.empty(), columnHandles, pinotTypeResolver, aggregateTypes);
orderByBuilder.add(new OrderByExpression(pinotColumnHandle.getExpression(), orderByExpressionContext.isAsc()));
}
orderBy = orderByBuilder.build();
}
List groupByColumns = ImmutableList.of();
if (queryContext.getGroupByExpressions() != null) {
groupByColumns = getPinotColumns(schemaTableName, queryContext.getGroupByExpressions(), ImmutableList.of(), columnHandles, pinotTypeResolver, aggregateTypes);
}
Optional havingExpression = Optional.empty();
if (queryContext.getHavingFilter() != null) {
String formatted = formatFilter(schemaTableName, queryContext.getHavingFilter(), columnHandles);
havingExpression = Optional.of(formatted);
}
Optional filter = Optional.empty();
if (pinotQuery.getFilterExpression() != null) {
String formatted = formatFilter(schemaTableName, queryContext.getFilter(), columnHandles);
filter = Optional.of(formatted);
}
return new DynamicTable(pinotTableName, suffix, selectColumns, filter, groupByColumns, ImmutableList.of(), havingExpression, orderBy, OptionalLong.of(queryContext.getLimit()), getOffset(queryContext), queryContext.getQueryOptions(), query);
}
private static List getPinotColumns(SchemaTableName schemaTableName, List expressions, List aliases, Map columnHandles, PinotTypeResolver pinotTypeResolver, Map aggregateTypes)
{
ImmutableList.Builder pinotColumnsBuilder = ImmutableList.builder();
for (int index = 0; index < expressions.size(); index++) {
ExpressionContext expressionContext = expressions.get(index);
Optional alias = getAlias(aliases, index);
// Only substitute * with columns for top level SELECT *.
// Since Pinot doesn't support subqueries yet, we can only have one occurrence of SELECT *
if (expressionContext.getType() == ExpressionContext.Type.IDENTIFIER && expressionContext.getIdentifier().equals(WILDCARD)) {
pinotColumnsBuilder.addAll(columnHandles.values().stream()
.map(PinotColumnHandle.class::cast)
.map(PinotMetadata::toNonAggregateColumnHandle)
.collect(toImmutableList()));
}
else {
pinotColumnsBuilder.add(getPinotColumnHandle(schemaTableName, expressionContext, alias, columnHandles, pinotTypeResolver, aggregateTypes));
}
}
return pinotColumnsBuilder.build();
}
private static PinotColumnHandle getPinotColumnHandle(SchemaTableName schemaTableName, ExpressionContext expressionContext, Optional alias, Map columnHandles, PinotTypeResolver pinotTypeResolver, Map aggregateTypes)
{
ExpressionContext rewritten = rewriteExpression(schemaTableName, expressionContext, columnHandles);
// If there is no alias, pinot autogenerates the column name:
String columnName = rewritten.toString();
String pinotExpression = formatExpression(schemaTableName, rewritten);
Type trinoType;
boolean isAggregate = hasAggregate(rewritten);
if (isAggregate) {
trinoType = requireNonNull(aggregateTypes.get(columnName).getTrinoType(), format("Unexpected aggregate expression: '%s'", rewritten));
// For aggregation queries, the column name is set by the schema returned from PostAggregationHandler, see getAggregateTypes
columnName = aggregateTypes.get(columnName).getPinotColumnName();
}
else {
trinoType = pinotTypeResolver.resolveExpressionType(rewritten, schemaTableName, columnHandles);
if (!aggregateTypes.isEmpty() && trinoType instanceof ArrayType) {
trinoType = ((ArrayType) trinoType).getElementType();
}
}
return new PinotColumnHandle(alias.orElse(columnName), trinoType, pinotExpression, alias.isPresent(), isAggregate, isReturnNullOnEmptyGroup(expressionContext), Optional.empty(), Optional.empty());
}
private static Optional getAlias(List aliases, int index)
{
// SELECT * is expanded to all columns with no aliases
if (index >= aliases.size()) {
return Optional.empty();
}
return Optional.ofNullable(aliases.get(index));
}
private static boolean isAggregate(ExpressionContext expressionContext)
{
return expressionContext.getType() == ExpressionContext.Type.FUNCTION && expressionContext.getFunction().getType() == FunctionContext.Type.AGGREGATION;
}
private static boolean hasAggregate(ExpressionContext expressionContext)
{
switch (expressionContext.getType()) {
case IDENTIFIER:
case LITERAL:
return false;
case FUNCTION:
if (isAggregate(expressionContext)) {
return true;
}
for (ExpressionContext argument : expressionContext.getFunction().getArguments()) {
if (hasAggregate(argument)) {
return true;
}
}
return false;
}
throw new PinotException(PINOT_EXCEPTION, Optional.empty(), format("Unsupported expression type '%s'", expressionContext.getType()));
}
private static Map getAggregateTypes(SchemaTableName schemaTableName, QueryContext queryContext, Map columnHandles, PinotTypeConverter typeConverter)
{
// A mapping from pinot expression to the returned pinot column name and trino type
// Note: the column name is set by the PostAggregationHandler
List aggregateColumnExpressions = queryContext.getSelectExpressions().stream()
.filter(DynamicTableBuilder::hasAggregate)
.collect(toImmutableList());
queryContext = new QueryContext.Builder()
.setAliasList(queryContext.getAliasList())
.setSelectExpressions(aggregateColumnExpressions)
.build();
DataSchema preAggregationSchema = getPreAggregationDataSchema(queryContext);
PostAggregationHandler postAggregationHandler = new PostAggregationHandler(queryContext, preAggregationSchema);
DataSchema postAggregationSchema = postAggregationHandler.getResultDataSchema();
ImmutableMap.Builder aggregationTypesBuilder = ImmutableMap.builder();
for (int index = 0; index < postAggregationSchema.size(); index++) {
aggregationTypesBuilder.put(
// ExpressionContext#toString performs quoting of literals
// Quoting of identifiers is not done to match the corresponding column name in the ResultTable returned from Pinot. Quoting will be done by `DynamicTablePqlExtractor`.
rewriteExpression(schemaTableName,
aggregateColumnExpressions.get(index),
columnHandles).toString(),
new PinotColumnNameAndTrinoType(
postAggregationSchema.getColumnName(index),
typeConverter.toTrinoType(postAggregationSchema.getColumnDataType(index))));
}
return aggregationTypesBuilder.buildOrThrow();
}
// Extracted from org.apache.pinot.core.query.reduce.AggregationDataTableReducer
private static DataSchema getPreAggregationDataSchema(QueryContext queryContext)
{
AggregationFunction, ?>[] aggregationFunctions = queryContext.getAggregationFunctions();
int numAggregationFunctions = aggregationFunctions.length;
String[] columnNames = new String[numAggregationFunctions];
DataSchema.ColumnDataType[] columnDataTypes = new DataSchema.ColumnDataType[numAggregationFunctions];
for (int i = 0; i < numAggregationFunctions; i++) {
AggregationFunction, ?> aggregationFunction = aggregationFunctions[i];
columnNames[i] = aggregationFunction.getResultColumnName();
columnDataTypes[i] = aggregationFunction.getFinalResultColumnType();
}
return new DataSchema(columnNames, columnDataTypes);
}
// To keep consistent behavior with pushed down aggregates, only return non null on an empty group
// if the top level function is in NON_NULL_ON_EMPTY_AGGREGATIONS.
// For all other cases, keep the same behavior as Pinot, since likely the same results are expected.
private static boolean isReturnNullOnEmptyGroup(ExpressionContext expressionContext)
{
if (isAggregate(expressionContext)) {
return !NON_NULL_ON_EMPTY_AGGREGATIONS.contains(getAggregationFunctionType(expressionContext.getFunction().getFunctionName()));
}
return true;
}
private static OptionalLong getOffset(QueryContext queryContext)
{
if (queryContext.getOffset() > 0) {
return OptionalLong.of(queryContext.getOffset());
}
return OptionalLong.empty();
}
private static String stripSuffix(String tableName)
{
requireNonNull(tableName, "tableName is null");
if (tableName.toUpperCase(ENGLISH).endsWith(OFFLINE_SUFFIX)) {
return tableName.substring(0, tableName.length() - OFFLINE_SUFFIX.length());
}
if (tableName.toUpperCase(ENGLISH).endsWith(REALTIME_SUFFIX)) {
return tableName.substring(0, tableName.length() - REALTIME_SUFFIX.length());
}
return tableName;
}
private static Optional getSuffix(String tableName)
{
requireNonNull(tableName, "tableName is null");
if (tableName.toUpperCase(ENGLISH).endsWith(OFFLINE_SUFFIX)) {
return Optional.of(OFFLINE_SUFFIX);
}
if (tableName.toUpperCase(ENGLISH).endsWith(REALTIME_SUFFIX)) {
return Optional.of(REALTIME_SUFFIX);
}
return Optional.empty();
}
private static class PinotColumnNameAndTrinoType
{
private final String pinotColumnName;
private final Type trinoType;
public PinotColumnNameAndTrinoType(String pinotColumnName, Type trinoType)
{
this.pinotColumnName = requireNonNull(pinotColumnName, "pinotColumnName is null");
this.trinoType = requireNonNull(trinoType, "trinoType is null");
}
public String getPinotColumnName()
{
return pinotColumnName;
}
public Type getTrinoType()
{
return trinoType;
}
}
}