org.apache.druid.segment.virtual.ExpressionSelectors Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of druid-processing Show documentation
Show all versions of druid-processing Show documentation
A module that is everything required to understands Druid Segments
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.virtual;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Supplier;
import com.google.common.collect.Iterables;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.java.util.common.NonnullPair;
import org.apache.druid.math.expr.Evals;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExpressionProcessing;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.math.expr.InputBindings;
import org.apache.druid.query.dimension.DefaultDimensionSpec;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.ConstantExprEvalSelector;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.NilColumnValueSelector;
import org.apache.druid.segment.RowIdSupplier;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.data.IndexedInts;
import javax.annotation.Nullable;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
public class ExpressionSelectors
{
private ExpressionSelectors()
{
// No instantiation.
}
/**
* Makes a ColumnValueSelector whose getObject method returns an Object that is the value computed by
* an {@link ExprEval}.
*
* @see ExpressionSelectors#makeExprEvalSelector(ColumnSelectorFactory, Expr)
*/
public static ColumnValueSelector makeColumnValueSelector(
ColumnSelectorFactory columnSelectorFactory,
Expr expression
)
{
final ColumnValueSelector baseSelector = makeExprEvalSelector(columnSelectorFactory, expression);
return new ColumnValueSelector()
{
@Override
public double getDouble()
{
// No Assert for null handling as baseSelector already have it.
return baseSelector.getDouble();
}
@Override
public float getFloat()
{
// No Assert for null handling as baseSelector already have it.
return baseSelector.getFloat();
}
@Override
public long getLong()
{
// No Assert for null handling as baseSelector already have it.
return baseSelector.getLong();
}
@Override
public boolean isNull()
{
return baseSelector.isNull();
}
@Nullable
@Override
public Object getObject()
{
// No need for null check on getObject() since baseSelector impls will never return null.
ExprEval eval = baseSelector.getObject();
return eval.valueOrDefault();
}
@Override
public Class classOfObject()
{
return Object.class;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("baseSelector", baseSelector);
}
};
}
public static ColumnValueSelector makeStringColumnValueSelector(
ColumnSelectorFactory columnSelectorFactory,
Expr expression
)
{
final ColumnValueSelector baseSelector = makeExprEvalSelector(columnSelectorFactory, expression);
return new ColumnValueSelector()
{
@Override
public double getDouble()
{
// No Assert for null handling as baseSelector already have it.
return baseSelector.getDouble();
}
@Override
public float getFloat()
{
// No Assert for null handling as baseSelector already have it.
return baseSelector.getFloat();
}
@Override
public long getLong()
{
// No Assert for null handling as baseSelector already have it.
return baseSelector.getLong();
}
@Override
public boolean isNull()
{
return baseSelector.isNull();
}
@Nullable
@Override
public Object getObject()
{
// No need for null check on getObject() since baseSelector impls will never return null.
ExprEval eval = baseSelector.getObject();
return coerceEvalToObjectOrList(eval);
}
@Override
public Class classOfObject()
{
return Object.class;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
inspector.visit("baseSelector", baseSelector);
}
};
}
/**
* Makes a ColumnValueSelector whose getObject method returns an {@link ExprEval}.
*
* @see ExpressionSelectors#makeColumnValueSelector(ColumnSelectorFactory, Expr)
*/
public static ColumnValueSelector makeExprEvalSelector(
ColumnSelectorFactory columnSelectorFactory,
Expr expression
)
{
ExpressionPlan plan = ExpressionPlanner.plan(
columnSelectorFactory,
Expr.singleThreaded(expression, columnSelectorFactory)
);
final RowIdSupplier rowIdSupplier = columnSelectorFactory.getRowIdSupplier();
if (plan.is(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR)) {
final String column = plan.getSingleInputName();
final ColumnType inputType = plan.getSingleInputType();
if (inputType.is(ValueType.LONG)) {
return new SingleLongInputCachingExpressionColumnValueSelector(
columnSelectorFactory.makeColumnValueSelector(column),
plan.getExpression(),
!ColumnHolder.TIME_COLUMN_NAME.equals(column), // __time doesn't need an LRU cache since it is sorted.
rowIdSupplier
);
} else if (inputType.is(ValueType.STRING)) {
return new SingleStringInputCachingExpressionColumnValueSelector(
columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(column, column, ColumnType.STRING)),
plan.getExpression(),
rowIdSupplier
);
}
}
final Expr.ObjectBinding bindings = createBindings(columnSelectorFactory, plan);
// Optimization for constant expressions
if (bindings.equals(InputBindings.nilBindings())) {
return new ConstantExprEvalSelector(plan.getExpression().eval(bindings));
}
// if any unknown column input types, fall back to an expression selector that examines input bindings on a
// per row basis
if (plan.any(ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.INCOMPLETE_INPUTS)) {
return new RowBasedExpressionColumnValueSelector(plan, bindings, rowIdSupplier);
}
// generic expression value selector for fully known input types
return new ExpressionColumnValueSelector(plan.getAppliedExpression(), bindings, rowIdSupplier);
}
/**
* Makes a single or multi-value {@link DimensionSelector} wrapper around a {@link ColumnValueSelector} created by
* {@link ExpressionSelectors#makeExprEvalSelector(ColumnSelectorFactory, Expr)} as appropriate
*/
public static DimensionSelector makeDimensionSelector(
final ColumnSelectorFactory columnSelectorFactory,
final Expr expression,
@Nullable final ExtractionFn extractionFn
)
{
final ExpressionPlan plan = ExpressionPlanner.plan(
columnSelectorFactory,
Expr.singleThreaded(expression, columnSelectorFactory)
);
if (plan.any(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE)) {
final String column = plan.getSingleInputName();
if (plan.getSingleInputType().is(ValueType.STRING)) {
return new SingleStringInputDeferredEvaluationExpressionDimensionSelector(
columnSelectorFactory.makeDimensionSelector(DefaultDimensionSpec.of(column)),
expression
);
}
}
final ColumnValueSelector baseSelector = makeExprEvalSelector(columnSelectorFactory, expression);
if (baseSelector instanceof ConstantExprEvalSelector) {
// Optimization for dimension selectors on constants.
if (plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT)) {
final Object[] value = baseSelector.getObject().asArray();
final List stringList;
if (value != null) {
stringList = Arrays.stream(value).map(Evals::asString).collect(Collectors.toList());
} else {
stringList = null;
}
return DimensionSelector.multiConstant(stringList, extractionFn);
}
return DimensionSelector.constant(baseSelector.getObject().asString(), extractionFn);
} else if (baseSelector instanceof NilColumnValueSelector) {
// Optimization for null dimension selector.
return DimensionSelector.constant(null);
} else {
if (plan.any(
ExpressionPlan.Trait.NON_SCALAR_OUTPUT,
ExpressionPlan.Trait.NEEDS_APPLIED,
ExpressionPlan.Trait.UNKNOWN_INPUTS,
ExpressionPlan.Trait.INCOMPLETE_INPUTS
)) {
return ExpressionMultiValueDimensionSelector.fromValueSelector(baseSelector, extractionFn);
} else {
return ExpressionSingleValueDimensionSelector.fromValueSelector(baseSelector, extractionFn);
}
}
}
/**
* Returns whether an expression can be applied to unique values of a particular column (like those in a dictionary)
* rather than being applied to each row individually.
*
* This function should only be called if you have already determined that an expression is over a single column,
* and that single column has a dictionary.
*
* @param bindingAnalysis result of calling {@link Expr#analyzeInputs()} on an expression
* @param columnCapabilities {@link ColumnCapabilities} for the input binding
*/
public static boolean canMapOverDictionary(
final Expr.BindingAnalysis bindingAnalysis,
@Nullable final ColumnCapabilities columnCapabilities
)
{
Preconditions.checkState(bindingAnalysis.getRequiredBindings().size() == 1, "requiredBindings.size == 1");
return columnCapabilities != null &&
!columnCapabilities.hasMultipleValues().isUnknown() &&
!bindingAnalysis.hasInputArrays() &&
!bindingAnalysis.isOutputArray();
}
/**
* Create {@link Expr.ObjectBinding} given a {@link ColumnSelectorFactory} and {@link ExpressionPlan} which
* provides the set of identifiers which need a binding (list of required columns), and context of whether or not they
* are used as array or scalar inputs
*/
public static Expr.ObjectBinding createBindings(
ColumnSelectorFactory columnSelectorFactory,
ExpressionPlan plan
)
{
final List columns = plan.getAnalysis().getRequiredBindingsList();
final Map> suppliers = new HashMap<>();
for (String columnName : columns) {
final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(columnName);
final boolean multiVal = capabilities != null && capabilities.hasMultipleValues().isTrue();
final Supplier