org.apache.druid.sql.calcite.rel.Grouping Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.sql.calcite.rel;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import it.unimi.dsi.fastutil.ints.IntList;
import org.apache.calcite.plan.RelOptUtil;
import org.apache.calcite.rel.core.Project;
import org.apache.calcite.util.ImmutableBitSet;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.sql.calcite.aggregation.Aggregation;
import org.apache.druid.sql.calcite.aggregation.DimensionExpression;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Objects;
import java.util.Set;
import java.util.stream.Collectors;
/**
* Represents Druid's concept of a "group by": dimensions, aggregations, post-aggregations, and 'having' filters. This
* is always something that can be handled by a groupBy query, and in some cases, it may be handleable by a timeseries
* or topN query type as well.
*
* This corresponds to a Calcite Aggregate + optional Filter + optional Project.
*
* It does not include sorting, limiting, or post-sorting projections: for this, see the {@link Sorting} class.
*/
public class Grouping
{
private final List dimensions;
private final Subtotals subtotals;
private final List aggregations;
@Nullable
private final DimFilter havingFilter;
private final RowSignature outputRowSignature;
// Denotes whether the original Grouping had more dimensions which were dropped while applying projection to optimize
// the grouping. Used for returning result which is consistent with most SQL implementations, by correspondingly
// setting/unsetting the SKIP_EMPTY_BUCKETS flag, if the GroupBy query can be reduced to a timeseries query.
private final boolean groupingDimensionsDropped;
private Grouping(
final List dimensions,
final Subtotals subtotals,
final List aggregations,
@Nullable final DimFilter havingFilter,
final RowSignature outputRowSignature
)
{
this(dimensions, subtotals, aggregations, havingFilter, outputRowSignature, false);
}
private Grouping(
final List dimensions,
final Subtotals subtotals,
final List aggregations,
@Nullable final DimFilter havingFilter,
final RowSignature outputRowSignature,
final boolean groupingDimensionsDropped
)
{
this.dimensions = ImmutableList.copyOf(dimensions);
this.subtotals = Preconditions.checkNotNull(subtotals, "subtotals");
this.aggregations = ImmutableList.copyOf(aggregations);
this.havingFilter = havingFilter;
this.outputRowSignature = Preconditions.checkNotNull(outputRowSignature, "outputRowSignature");
this.groupingDimensionsDropped = groupingDimensionsDropped;
// Verify no collisions between dimensions, aggregations, post-aggregations.
final Set seen = new HashSet<>();
for (DimensionExpression dimensionExpression : dimensions) {
if (!seen.add(dimensionExpression.getOutputName())) {
throw new ISE("Duplicate field name: %s", dimensionExpression.getOutputName());
}
}
for (Aggregation aggregation : aggregations) {
for (AggregatorFactory aggregatorFactory : aggregation.getAggregatorFactories()) {
if (!seen.add(aggregatorFactory.getName())) {
throw new ISE("Duplicate field name: %s", aggregatorFactory.getName());
}
}
if (aggregation.getPostAggregator() != null && !seen.add(aggregation.getPostAggregator().getName())) {
throw new ISE("Duplicate field name: %s", aggregation.getPostAggregator().getName());
}
}
// Verify that items in the output signature exist.
for (final String field : outputRowSignature.getColumnNames()) {
if (!seen.contains(field)) {
throw new ISE("Missing field in rowOrder: %s", field);
}
}
}
// This method is private since groupingDimensionsDropped should only be deviated from default in
// applyProject
private static Grouping create(
final List dimensions,
final Subtotals subtotals,
final List aggregations,
@Nullable final DimFilter havingFilter,
final RowSignature outputRowSignature,
final boolean groupingDimensionsDropped
)
{
return new Grouping(
dimensions,
subtotals,
aggregations,
havingFilter,
outputRowSignature,
groupingDimensionsDropped
);
}
public static Grouping create(
final List dimensions,
final Subtotals subtotals,
final List aggregations,
@Nullable final DimFilter havingFilter,
final RowSignature outputRowSignature
)
{
return new Grouping(dimensions, subtotals, aggregations, havingFilter, outputRowSignature);
}
public List getDimensions()
{
return dimensions;
}
public Subtotals getSubtotals()
{
return subtotals;
}
public List getAggregations()
{
return aggregations;
}
@Nullable
public DimFilter getHavingFilter()
{
return havingFilter;
}
public List getDimensionSpecs()
{
return dimensions.stream().map(DimensionExpression::toDimensionSpec).collect(Collectors.toList());
}
public List getAggregatorFactories()
{
return aggregations.stream()
.flatMap(aggregation -> aggregation.getAggregatorFactories().stream())
.collect(Collectors.toList());
}
public List getPostAggregators()
{
return aggregations.stream()
.map(Aggregation::getPostAggregator)
.filter(Objects::nonNull)
.collect(Collectors.toList());
}
public RowSignature getOutputRowSignature()
{
return outputRowSignature;
}
public boolean hasGroupingDimensionsDropped()
{
return groupingDimensionsDropped;
}
/**
* Applies a post-grouping projection.
*
* @see DruidQuery#computeGrouping which uses this
*/
public Grouping applyProject(final PlannerContext plannerContext, final Project project)
{
final List newDimensions = new ArrayList<>();
final List newAggregations = new ArrayList<>(aggregations);
final Subtotals newSubtotals;
final Projection postAggregationProjection = Projection.postAggregation(
project,
plannerContext,
outputRowSignature,
"p"
);
postAggregationProjection.getPostAggregators().forEach(
postAggregator -> newAggregations.add(Aggregation.create(postAggregator))
);
// Remove literal dimensions that did not appear in the projection. This is useful for queries
// like "SELECT COUNT(*) FROM tbl GROUP BY 'dummy'" which some tools can generate, and for which we don't
// actually want to include a dimension 'dummy'.
final ImmutableBitSet aggregateProjectBits = RelOptUtil.InputFinder.bits(project.getProjects(), null);
final int[] newDimIndexes = new int[dimensions.size()];
boolean droppedDimensions = false;
for (int i = 0; i < dimensions.size(); i++) {
final DimensionExpression dimension = dimensions.get(i);
if (plannerContext.parseExpression(dimension.getDruidExpression().getExpression()).isLiteral()
&& !aggregateProjectBits.get(i)) {
droppedDimensions = true;
newDimIndexes[i] = -1;
} else {
newDimIndexes[i] = newDimensions.size();
newDimensions.add(dimension);
}
}
// Renumber subtotals, if needed, to account for removed dummy dimensions.
if (newDimensions.size() != dimensions.size()) {
final List newSubtotalsList = new ArrayList<>();
for (IntList subtotal : subtotals.getSubtotals()) {
final IntList newSubtotal = new IntArrayList();
for (int dimIndex : subtotal) {
final int newDimIndex = newDimIndexes[dimIndex];
if (newDimIndex >= 0) {
newSubtotal.add(newDimIndex);
}
}
newSubtotalsList.add(newSubtotal);
}
newSubtotals = new Subtotals(newSubtotalsList);
} else {
newSubtotals = subtotals;
}
return Grouping.create(
newDimensions,
newSubtotals,
newAggregations,
havingFilter,
postAggregationProjection.getOutputRowSignature(),
droppedDimensions
);
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
Grouping grouping = (Grouping) o;
return dimensions.equals(grouping.dimensions) &&
subtotals.equals(grouping.subtotals) &&
aggregations.equals(grouping.aggregations) &&
Objects.equals(havingFilter, grouping.havingFilter) &&
outputRowSignature.equals(grouping.outputRowSignature) &&
groupingDimensionsDropped == grouping.groupingDimensionsDropped;
}
@Override
public int hashCode()
{
return Objects.hash(
dimensions,
subtotals,
aggregations,
havingFilter,
outputRowSignature,
groupingDimensionsDropped
);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy