org.apache.druid.query.Queries Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of druid-processing Show documentation
A module that is everything required to understands Druid Segments
There is a newer version: 31.0.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.query;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import org.apache.druid.guice.annotations.PublicApi;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.query.aggregation.AggregatorFactory;
import org.apache.druid.query.aggregation.PostAggregator;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.planning.DataSourceAnalysis;
import org.apache.druid.query.spec.MultipleSpecificSegmentSpec;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnHolder;

import javax.annotation.Nullable;

import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

@PublicApi
public class Queries
{
  public static List decoratePostAggregators(
      List postAggs,
      Map aggFactories
  )
  {
    List decorated = Lists.newArrayListWithExpectedSize(postAggs.size());
    for (PostAggregator aggregator : postAggs) {
      decorated.add(aggregator.decorate(aggFactories));
    }
    return decorated;
  }

  /**
   * Like {@link #prepareAggregations(List, List, List)} but with otherOutputNames as an empty list. Deprecated
   * because it makes it easy to forget to include dimensions, etc. in "otherOutputNames".
   *
   * @param aggFactories aggregator factories for this query
   * @param postAggs     post-aggregators for this query
   *
   * @return decorated post-aggregators
   *
   * @throws NullPointerException     if aggFactories is null
   * @throws IllegalArgumentException if there are any output name collisions or missing post-aggregator inputs
   */
  @Deprecated
  public static List prepareAggregations(
      List aggFactories,
      List postAggs
  )
  {
    return prepareAggregations(Collections.emptyList(), aggFactories, postAggs);
  }

  /**
   * Returns decorated post-aggregators, based on original un-decorated post-aggregators. In addition, this method
   * also verifies that there are no output name collisions, and that all of the post-aggregators' required input
   * fields are present.
   *
   * @param otherOutputNames names of fields that will appear in the same output namespace as aggregators and
   *                         post-aggregators, and are also assumed to be valid inputs to post-aggregators. For most
   *                         built-in query types, this is either empty, or the list of dimension output names.
   * @param aggFactories     aggregator factories for this query
   * @param postAggs         post-aggregators for this query
   *
   * @return decorated post-aggregators
   *
   * @throws NullPointerException     if otherOutputNames or aggFactories is null
   * @throws IllegalArgumentException if there are any output name collisions or missing post-aggregator inputs
   */
  public static List prepareAggregations(
      List otherOutputNames,
      List aggFactories,
      List postAggs
  )
  {
    Preconditions.checkNotNull(otherOutputNames, "otherOutputNames cannot be null");
    Preconditions.checkNotNull(aggFactories, "aggregations cannot be null");

    final Set combinedOutputNames = new HashSet<>(otherOutputNames);

    final Map aggsFactoryMap = new HashMap<>();
    for (AggregatorFactory aggFactory : aggFactories) {
      Preconditions.checkArgument(
          combinedOutputNames.add(aggFactory.getName()),
          "[%s] already defined", aggFactory.getName()
      );
      aggsFactoryMap.put(aggFactory.getName(), aggFactory);
    }

    if (postAggs != null && !postAggs.isEmpty()) {
      List decorated = Lists.newArrayListWithExpectedSize(postAggs.size());
      for (final PostAggregator postAgg : postAggs) {
        final Set dependencies = postAgg.getDependentFields();
        final Set missing = Sets.difference(dependencies, combinedOutputNames);

        Preconditions.checkArgument(
            missing.isEmpty(),
            "Missing fields [%s] for postAggregator [%s]", missing, postAgg.getName()
        );
        Preconditions.checkArgument(
            combinedOutputNames.add(postAgg.getName()),
            "[%s] already defined", postAgg.getName()
        );

        decorated.add(postAgg.decorate(aggsFactoryMap));
      }
      return decorated;
    }

    return postAggs;
  }

  /**
   * Rewrite "query" to refer to some specific segment descriptors.
   *
   * The dataSource for "query" must be based on a single table for this operation to be valid. Otherwise, this
   * function will throw an exception.
   *
   * Unlike the seemingly-similar {@code query.withQuerySegmentSpec(new MultipleSpecificSegmentSpec(descriptors))},
   * this this method will walk down subqueries found within the query datasource, if any, and modify the lowest-level
   * subquery. The effect is that
   * {@code DataSourceAnalysis.forDataSource(query.getDataSource()).getBaseQuerySegmentSpec()} is guaranteed to return
   * either {@code new MultipleSpecificSegmentSpec(descriptors)} or empty.
   *
   * Because {@link BaseQuery#getRunner} is implemented using {@link DataSourceAnalysis#getBaseQuerySegmentSpec}, this
   * method will cause the runner to be a specific-segments runner.
   */
  public static  Query withSpecificSegments(final Query query, final List descriptors)
  {
    final Query retVal;

    if (query.getDataSource() instanceof QueryDataSource) {
      final Query subQuery = ((QueryDataSource) query.getDataSource()).getQuery();
      retVal = query.withDataSource(new QueryDataSource(withSpecificSegments(subQuery, descriptors)));
    } else {
      retVal = query.withQuerySegmentSpec(new MultipleSpecificSegmentSpec(descriptors));
    }

    // Verify preconditions and invariants, just in case.
    final DataSource retDataSource = retVal.getDataSource();
    final DataSourceAnalysis analysis = retDataSource.getAnalysis();

    // Sanity check: query must be based on a single table.
    if (!analysis.getBaseTableDataSource().isPresent()) {
      throw new ISE("Unable to apply specific segments to non-table-based dataSource[%s]", query.getDataSource());
    }

    if (analysis.getBaseQuerySegmentSpec().isPresent()
        && !analysis.getBaseQuerySegmentSpec().get().equals(new MultipleSpecificSegmentSpec(descriptors))) {
      // If you see the error message below, it's a bug in either this function or in DataSourceAnalysis.
      throw new ISE("Unable to apply specific segments to query with dataSource[%s]", query.getDataSource());
    }

    return retVal;
  }

  /**
   * Rewrite "query" to refer to some specific base datasource, instead of the one it currently refers to.
   *
   * Unlike the seemingly-similar {@link Query#withDataSource}, this will walk down the datasource tree and replace
   * only the base datasource (in the sense defined in {@link DataSourceAnalysis}).
   */
  public static  Query withBaseDataSource(final Query query, final DataSource newBaseDataSource)
  {
    return query.withDataSource(query.getDataSource().withUpdatedDataSource(newBaseDataSource));
  }

  /**
   * Helper for implementations of {@link Query#getRequiredColumns()}. Returns the list of columns that will be read
   * out of a datasource by a query that uses the provided objects in the usual way.
   *
   * The returned set always contains {@code __time}, no matter what.
   *
   * If the virtual columns, filter, dimensions, aggregators, or additional columns refer to a virtual column, then the
   * inputs of the virtual column will be returned instead of the name of the virtual column itself. Therefore, the
   * returned list will never contain the names of any virtual columns.
   *
   * @param virtualColumns    virtual columns whose inputs should be included.
   * @param filter            optional filter whose inputs should be included.
   * @param dimensions        dimension specs whose inputs should be included.
   * @param aggregators       aggregators whose inputs should be included.
   * @param additionalColumns additional columns to include. Each of these will be added to the returned set, unless it
   *                          refers to a virtual column, in which case the virtual column inputs will be added instead.
   */
  public static Set computeRequiredColumns(
      final VirtualColumns virtualColumns,
      @Nullable final DimFilter filter,
      final List dimensions,
      final List aggregators,
      final List additionalColumns
  )
  {
    final Set requiredColumns = new HashSet<>();

    // Everyone needs __time (it's used by intervals filters).
    requiredColumns.add(ColumnHolder.TIME_COLUMN_NAME);

    for (VirtualColumn virtualColumn : virtualColumns.getVirtualColumns()) {
      for (String column : virtualColumn.requiredColumns()) {
        if (!virtualColumns.exists(column)) {
          requiredColumns.addAll(virtualColumn.requiredColumns());
        }
      }
    }

    if (filter != null) {
      for (String column : filter.getRequiredColumns()) {
        if (!virtualColumns.exists(column)) {
          requiredColumns.add(column);
        }
      }
    }

    for (DimensionSpec dimensionSpec : dimensions) {
      if (!virtualColumns.exists(dimensionSpec.getDimension())) {
        requiredColumns.add(dimensionSpec.getDimension());
      }
    }

    for (AggregatorFactory aggregator : aggregators) {
      for (String column : aggregator.requiredFields()) {
        if (!virtualColumns.exists(column)) {
          requiredColumns.add(column);
        }
      }
    }

    for (String column : additionalColumns) {
      if (!virtualColumns.exists(column)) {
        requiredColumns.add(column);
      }
    }

    return requiredColumns;
  }

  public static  Query withMaxScatterGatherBytes(Query query, long maxScatterGatherBytesLimit)
  {
    QueryContext context = query.context();
    if (!context.containsKey(QueryContexts.MAX_SCATTER_GATHER_BYTES_KEY)) {
      return query.withOverriddenContext(ImmutableMap.of(QueryContexts.MAX_SCATTER_GATHER_BYTES_KEY, maxScatterGatherBytesLimit));
    }
    context.verifyMaxScatterGatherBytes(maxScatterGatherBytesLimit);
    return query;
  }

  public static  Query withTimeout(Query query, long timeout)
  {
    return query.withOverriddenContext(ImmutableMap.of(QueryContexts.TIMEOUT_KEY, timeout));
  }

  public static  Query withDefaultTimeout(Query query, long defaultTimeout)
  {
    return query.withOverriddenContext(ImmutableMap.of(QueryContexts.DEFAULT_TIMEOUT_KEY, defaultTimeout));
  }
}