org.apache.druid.query.scan.ScanQuery Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of druid-processing Show documentation
Show all versions of druid-processing Show documentation
A module that is everything required to understands Druid Segments
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.query.scan;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonValue;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import com.google.common.collect.Ordering;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.UOE;
import org.apache.druid.query.BaseQuery;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.Druids;
import org.apache.druid.query.Queries;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.operator.OffsetLimit;
import org.apache.druid.query.spec.QuerySegmentSpec;
import org.apache.druid.segment.VirtualColumns;
import org.apache.druid.segment.column.ColumnHolder;
import javax.annotation.Nullable;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
public class ScanQuery extends BaseQuery
{
public enum ResultFormat
{
RESULT_FORMAT_LIST,
RESULT_FORMAT_COMPACTED_LIST,
RESULT_FORMAT_VALUE_VECTOR;
@JsonValue
@Override
public String toString()
{
switch (this) {
case RESULT_FORMAT_LIST:
return "list";
case RESULT_FORMAT_COMPACTED_LIST:
return "compactedList";
case RESULT_FORMAT_VALUE_VECTOR:
return "valueVector";
default:
return "";
}
}
@JsonCreator
public static ResultFormat fromString(String name)
{
switch (name) {
case "compactedList":
return RESULT_FORMAT_COMPACTED_LIST;
case "valueVector":
return RESULT_FORMAT_VALUE_VECTOR;
case "list":
return RESULT_FORMAT_LIST;
default:
throw new UOE("Scan query result format [%s] is not supported.", name);
}
}
}
public static class OrderBy
{
private final String columnName;
private final Order order;
@JsonCreator
public OrderBy(
@JsonProperty("columnName") final String columnName,
@JsonProperty("order") final Order order
)
{
this.columnName = Preconditions.checkNotNull(columnName, "columnName");
this.order = Preconditions.checkNotNull(order, "order");
if (order == Order.NONE) {
throw new IAE("Order required for column [%s]", columnName);
}
}
@JsonProperty
public String getColumnName()
{
return columnName;
}
@JsonProperty
public Order getOrder()
{
return order;
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
OrderBy that = (OrderBy) o;
return Objects.equals(columnName, that.columnName) && order == that.order;
}
@Override
public int hashCode()
{
return Objects.hash(columnName, order);
}
@Override
public String toString()
{
return StringUtils.format("%s %s", columnName, order == Order.ASCENDING ? "ASC" : "DESC");
}
}
public enum Order
{
ASCENDING,
DESCENDING,
NONE;
@JsonValue
@Override
public String toString()
{
return StringUtils.toLowerCase(this.name());
}
@JsonCreator
public static Order fromString(String name)
{
return valueOf(StringUtils.toUpperCase(name));
}
}
/**
* This context flag corresponds to whether the query is running on the "outermost" process (i.e. the process
* the query is sent to).
*/
public static final String CTX_KEY_OUTERMOST = "scanOutermost";
public static final int DEFAULT_BATCH_SIZE = 4096 * 5;
private final VirtualColumns virtualColumns;
private final ResultFormat resultFormat;
private final int batchSize;
private final long scanRowsOffset;
private final long scanRowsLimit;
private final DimFilter dimFilter;
private final List columns;
private final Boolean legacy;
private final Order timeOrder;
private final List orderBys;
private final Integer maxRowsQueuedForOrdering;
private final Integer maxSegmentPartitionsOrderedInMemory;
@JsonCreator
public ScanQuery(
@JsonProperty("dataSource") DataSource dataSource,
@JsonProperty("intervals") QuerySegmentSpec querySegmentSpec,
@JsonProperty("virtualColumns") VirtualColumns virtualColumns,
@JsonProperty("resultFormat") ResultFormat resultFormat,
@JsonProperty("batchSize") int batchSize,
@JsonProperty("offset") long scanRowsOffset,
@JsonProperty("limit") long scanRowsLimit,
@JsonProperty("order") Order orderFromUser,
@JsonProperty("orderBy") List orderBysFromUser,
@JsonProperty("filter") DimFilter dimFilter,
@JsonProperty("columns") List columns,
@JsonProperty("legacy") Boolean legacy,
@JsonProperty("context") Map context
)
{
super(dataSource, querySegmentSpec, false, context);
this.virtualColumns = VirtualColumns.nullToEmpty(virtualColumns);
this.resultFormat = (resultFormat == null) ? ResultFormat.RESULT_FORMAT_LIST : resultFormat;
this.batchSize = (batchSize == 0) ? DEFAULT_BATCH_SIZE : batchSize;
Preconditions.checkArgument(
this.batchSize > 0,
"batchSize must be greater than 0"
);
this.scanRowsOffset = scanRowsOffset;
Preconditions.checkArgument(
this.scanRowsOffset >= 0,
"offset must be greater than or equal to 0"
);
this.scanRowsLimit = (scanRowsLimit == 0) ? Long.MAX_VALUE : scanRowsLimit;
Preconditions.checkArgument(
this.scanRowsLimit > 0,
"limit must be greater than 0"
);
this.dimFilter = dimFilter;
this.columns = columns;
this.legacy = legacy;
final Pair, Order> ordering = verifyAndReconcileOrdering(orderBysFromUser, orderFromUser);
this.orderBys = Preconditions.checkNotNull(ordering.lhs);
this.timeOrder = ordering.rhs;
if (this.columns != null && this.columns.size() > 0) {
// Validate orderBy. (Cannot validate when signature is empty, since that means "discover at runtime".)
for (final OrderBy orderByColumn : this.orderBys) {
if (!this.columns.contains(orderByColumn.getColumnName())) {
// Error message depends on how the user originally specified ordering.
if (orderBysFromUser != null) {
throw new IAE("Column [%s] from 'orderBy' must also appear in 'columns'.", orderByColumn.getColumnName());
} else {
throw new IllegalArgumentException("The __time column must be selected if the results are time-ordered.");
}
}
}
}
this.maxRowsQueuedForOrdering = validateAndGetMaxRowsQueuedForOrdering();
this.maxSegmentPartitionsOrderedInMemory = validateAndGetMaxSegmentPartitionsOrderedInMemory();
}
/**
* Verifies that the ordering of a query is solely determined by {@link #getTimeOrder()}. Required to actually
* execute queries, because {@link #getOrderBys()} is not yet understood by the query engines.
*
* @throws IllegalStateException if the ordering is not solely determined by {@link #getTimeOrder()}
*/
public static void verifyOrderByForNativeExecution(final ScanQuery query)
{
if (query.getTimeOrder() == Order.NONE && !query.getOrderBys().isEmpty()) {
throw new ISE("Cannot execute query with orderBy %s", query.getOrderBys());
}
}
private Integer validateAndGetMaxRowsQueuedForOrdering()
{
final Integer maxRowsQueuedForOrdering =
context().getInt(ScanQueryConfig.CTX_KEY_MAX_ROWS_QUEUED_FOR_ORDERING);
Preconditions.checkArgument(
maxRowsQueuedForOrdering == null || maxRowsQueuedForOrdering > 0,
"maxRowsQueuedForOrdering must be greater than 0"
);
return maxRowsQueuedForOrdering;
}
private Integer validateAndGetMaxSegmentPartitionsOrderedInMemory()
{
final Integer maxSegmentPartitionsOrderedInMemory =
context().getInt(ScanQueryConfig.CTX_KEY_MAX_SEGMENT_PARTITIONS_FOR_ORDERING);
Preconditions.checkArgument(
maxSegmentPartitionsOrderedInMemory == null || maxSegmentPartitionsOrderedInMemory > 0,
"maxRowsQueuedForOrdering must be greater than 0"
);
return maxSegmentPartitionsOrderedInMemory;
}
@JsonProperty
@Override
@JsonInclude(value = JsonInclude.Include.CUSTOM, valueFilter = VirtualColumns.JsonIncludeFilter.class)
public VirtualColumns getVirtualColumns()
{
return virtualColumns;
}
@JsonProperty
public ResultFormat getResultFormat()
{
return resultFormat;
}
@JsonProperty
@JsonInclude(value = JsonInclude.Include.CUSTOM, valueFilter = BatchSizeJsonIncludeFilter.class)
public int getBatchSize()
{
return batchSize;
}
/**
* Offset for this query; behaves like SQL "OFFSET". Zero means no offset. Negative values are invalid.
*/
@JsonProperty("offset")
@JsonInclude(JsonInclude.Include.NON_DEFAULT)
public long getScanRowsOffset()
{
return scanRowsOffset;
}
/**
* Limit for this query; behaves like SQL "LIMIT". Will always be positive. {@link Long#MAX_VALUE} is used in
* situations where the user wants an effectively unlimited result set.
*/
@JsonProperty("limit")
@JsonInclude(value = JsonInclude.Include.CUSTOM, valueFilter = ScanRowsLimitJsonIncludeFilter.class)
public long getScanRowsLimit()
{
return scanRowsLimit;
}
public OffsetLimit getOffsetLimit()
{
return new OffsetLimit(scanRowsOffset, scanRowsLimit);
}
/**
* Returns whether this query is limited or not. Because {@link Long#MAX_VALUE} is used to signify unlimitedness,
* this is equivalent to {@code getScanRowsLimit() != Long.Max_VALUE}.
*
* @see #getScanRowsLimit()
*/
public boolean isLimited()
{
return scanRowsLimit != Long.MAX_VALUE;
}
/**
* If this query is purely-time-ordered, returns a value of the enum {@link Order}. Otherwise, returns
* {@link Order#NONE}. If the returned value is {@link Order#NONE} it may not agree with {@link #getOrderBys()}.
*/
@JsonProperty("order")
@JsonInclude(value = JsonInclude.Include.CUSTOM, valueFilter = ScanTimeOrderJsonIncludeFilter.class)
public Order getTimeOrder()
{
return timeOrder;
}
public List getOrderBys()
{
return orderBys;
}
@Nullable
@JsonProperty("orderBy")
@JsonInclude(JsonInclude.Include.NON_EMPTY)
List getOrderBysForJson()
{
// Return "orderBy" if necessary (meaning: if it is nonempty and nontime). Prevents polluting JSONs with
// redundant "orderBy" and "order" fields.
if (orderBys.size() > 1
|| (orderBys.size() == 1
&& !Iterables.getOnlyElement(orderBys).getColumnName().equals(ColumnHolder.TIME_COLUMN_NAME))) {
return orderBys;
} else {
return null;
}
}
@Nullable
@JsonIgnore
public Integer getMaxRowsQueuedForOrdering()
{
return maxRowsQueuedForOrdering;
}
@Nullable
@JsonIgnore
public Integer getMaxSegmentPartitionsOrderedInMemory()
{
return maxSegmentPartitionsOrderedInMemory;
}
@Override
public boolean hasFilters()
{
return dimFilter != null;
}
@Override
@Nullable
@JsonProperty
@JsonInclude(JsonInclude.Include.NON_NULL)
public DimFilter getFilter()
{
return dimFilter;
}
@Override
public String getType()
{
return SCAN;
}
@Nullable
@JsonProperty
@JsonInclude(JsonInclude.Include.NON_EMPTY)
public List getColumns()
{
return columns;
}
/**
* Compatibility mode with the legacy scan-query extension.
*
* True, false, and null have different meanings: true/false mean "legacy" and "not legacy"; null means use the
* default set by {@link ScanQueryConfig#isLegacy()}. The method {@link #withNonNullLegacy} is provided to help
* with this.
*/
@Nullable
@JsonProperty
@JsonInclude(JsonInclude.Include.NON_NULL)
public Boolean isLegacy()
{
return legacy;
}
@Override
public Ordering getResultOrdering()
{
// No support yet for actually executing queries with non-time orderBy.
verifyOrderByForNativeExecution(this);
if (timeOrder == Order.NONE) {
return Ordering.natural();
}
return Ordering.from(
new ScanResultValueTimestampComparator(this).thenComparing(
timeOrder == Order.ASCENDING
? Comparator.naturalOrder()
: Comparator.naturalOrder().reversed()
)
);
}
@Nullable
@Override
public Set getRequiredColumns()
{
if (columns == null || columns.isEmpty()) {
// We don't know what columns we require. We'll find out when the segment shows up.
return null;
} else {
return Queries.computeRequiredColumns(
virtualColumns,
dimFilter,
Collections.emptyList(),
Collections.emptyList(),
columns
);
}
}
public ScanQuery withOffset(final long newOffset)
{
return Druids.ScanQueryBuilder.copy(this).offset(newOffset).build();
}
public ScanQuery withLimit(final long newLimit)
{
return Druids.ScanQueryBuilder.copy(this).limit(newLimit).build();
}
public ScanQuery withNonNullLegacy(final ScanQueryConfig scanQueryConfig)
{
return Druids.ScanQueryBuilder.copy(this).legacy(legacy != null ? legacy : scanQueryConfig.isLegacy()).build();
}
@Override
public ScanQuery withQuerySegmentSpec(QuerySegmentSpec querySegmentSpec)
{
return Druids.ScanQueryBuilder.copy(this).intervals(querySegmentSpec).build();
}
@Override
public ScanQuery withDataSource(DataSource dataSource)
{
return Druids.ScanQueryBuilder.copy(this).dataSource(dataSource).build();
}
@Override
public ScanQuery withOverriddenContext(Map contextOverrides)
{
return Druids.ScanQueryBuilder.copy(this).context(computeOverriddenContext(getContext(), contextOverrides)).build();
}
@Override
public boolean equals(Object o)
{
if (this == o) {
return true;
}
if (o == null || getClass() != o.getClass()) {
return false;
}
if (!super.equals(o)) {
return false;
}
final ScanQuery scanQuery = (ScanQuery) o;
return batchSize == scanQuery.batchSize &&
scanRowsOffset == scanQuery.scanRowsOffset &&
scanRowsLimit == scanQuery.scanRowsLimit &&
Objects.equals(legacy, scanQuery.legacy) &&
Objects.equals(virtualColumns, scanQuery.virtualColumns) &&
Objects.equals(resultFormat, scanQuery.resultFormat) &&
Objects.equals(dimFilter, scanQuery.dimFilter) &&
Objects.equals(columns, scanQuery.columns) &&
Objects.equals(orderBys, scanQuery.orderBys);
}
@Override
public int hashCode()
{
return Objects.hash(
super.hashCode(),
virtualColumns,
resultFormat,
batchSize,
scanRowsOffset,
scanRowsLimit,
dimFilter,
columns,
orderBys,
legacy
);
}
@Override
public String toString()
{
return "ScanQuery{" +
"dataSource='" + getDataSource() + '\'' +
", querySegmentSpec=" + getQuerySegmentSpec() +
", virtualColumns=" + getVirtualColumns() +
", resultFormat='" + resultFormat + '\'' +
", batchSize=" + batchSize +
", offset=" + scanRowsOffset +
", limit=" + scanRowsLimit +
", dimFilter=" + dimFilter +
", columns=" + columns +
(orderBys.isEmpty() ? "" : ", orderBy=" + orderBys) +
(legacy == null ? "" : ", legacy=" + legacy) +
", context=" + getContext() +
'}';
}
/**
* Verify and reconcile the two ways of specifying ordering: "orderBy", which can refer to any column, and
* "order", which refers to the __time column.
*
* If only "order" is provided, it is returned as-is, along with an equivalent "orderBy".
*
* If only "orderBy" is provided, it is returned as-is. If it can be converted into an equivalent "order", then that
* equivalent "order" is also returned. Otherwise, "orderBy" is returned as-is and "order" is returned as NONE.
*
* If both "orderBy" and "order" are provided, this returns them as-is if they are compatible, or throws an
* exception if they are incompatible.
*
* @param orderByFromUser "orderBy" specified by the user (can refer to any column)
* @param orderFromUser "order" specified by the user (refers to time order)
*/
private static Pair, Order> verifyAndReconcileOrdering(
@Nullable final List orderByFromUser,
@Nullable final Order orderFromUser
)
{
final List orderByRetVal;
final Order orderRetVal;
// Compute the returned orderBy.
if (orderByFromUser != null) {
orderByRetVal = orderByFromUser;
} else if (orderFromUser == null || orderFromUser == Order.NONE) {
orderByRetVal = Collections.emptyList();
} else {
orderByRetVal = Collections.singletonList(new OrderBy(ColumnHolder.TIME_COLUMN_NAME, orderFromUser));
}
// Compute the returned order.
orderRetVal = computeTimeOrderFromOrderBys(orderByRetVal);
// Verify compatibility, if the user specified both kinds of ordering.
if (orderFromUser != null && orderFromUser != Order.NONE && orderRetVal != orderFromUser) {
throw new IAE("Cannot provide 'order' incompatible with 'orderBy'");
}
return Pair.of(orderByRetVal, orderRetVal);
}
/**
* Compute time ordering based on a list of orderBys.
*
* Returns {@link Order#ASCENDING} or {@link Order#DESCENDING} if the ordering is time-based; returns
* {@link Order#NONE} otherwise. Importantly, this means that the returned order is not necessarily compatible
* with the input orderBys.
*/
@Nullable
private static Order computeTimeOrderFromOrderBys(final List orderBys)
{
if (orderBys.size() == 1) {
final OrderBy orderByColumn = Iterables.getOnlyElement(orderBys);
if (ColumnHolder.TIME_COLUMN_NAME.equals(orderByColumn.getColumnName())) {
return orderByColumn.getOrder();
}
}
return Order.NONE;
}
/**
* {@link JsonInclude} filter for {@link #getTimeOrder()}.
*
* This API works by "creative" use of equals. It requires warnings to be suppressed and also requires spotbugs
* exclusions (see spotbugs-exclude.xml).
*/
@SuppressWarnings({"EqualsAndHashcode", "EqualsHashCode"})
static class ScanTimeOrderJsonIncludeFilter // lgtm [java/inconsistent-equals-and-hashcode]
{
@Override
public boolean equals(Object obj)
{
return obj instanceof Order && Order.NONE.equals(obj);
}
}
/**
* {@link JsonInclude} filter for {@link #getScanRowsLimit()}.
*
* This API works by "creative" use of equals. It requires warnings to be suppressed and also requires spotbugs
* exclusions (see spotbugs-exclude.xml).
*/
@SuppressWarnings({"EqualsAndHashcode", "EqualsHashCode"})
static class ScanRowsLimitJsonIncludeFilter // lgtm [java/inconsistent-equals-and-hashcode]
{
@Override
public boolean equals(Object obj)
{
return obj instanceof Long && (long) obj == Long.MAX_VALUE;
}
}
/**
* {@link JsonInclude} filter for {@link #getBatchSize()}.
*
* This API works by "creative" use of equals. It requires warnings to be suppressed and also requires spotbugs
* exclusions (see spotbugs-exclude.xml).
*/
@SuppressWarnings({"EqualsAndHashcode", "EqualsHashCode"})
static class BatchSizeJsonIncludeFilter // lgtm [java/inconsistent-equals-and-hashcode]
{
@Override
public boolean equals(Object obj)
{
return obj instanceof Integer && (int) obj == DEFAULT_BATCH_SIZE;
}
}
}