org.apache.drill.exec.expr.IsPredicate Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.drill.exec.expr;
import org.apache.drill.common.expression.LogicalExpression;
import org.apache.drill.common.expression.LogicalExpressionBase;
import org.apache.drill.common.expression.TypedFieldExpr;
import org.apache.drill.common.expression.visitors.ExprVisitor;
import org.apache.drill.exec.expr.fn.FunctionGenerationHelper;
import org.apache.drill.exec.expr.stat.RowsMatch;
import org.apache.drill.metastore.statistics.ColumnStatistics;
import org.apache.drill.metastore.statistics.ColumnStatisticsKind;
import org.apache.drill.metastore.statistics.Statistic;
import org.apache.drill.shaded.guava.com.google.common.base.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.function.BiFunction;
public class IsPredicate> extends LogicalExpressionBase implements FilterPredicate {
private static final Logger logger = LoggerFactory.getLogger(IsPredicate.class);
private final LogicalExpression expr;
private final BiFunction, StatisticsProvider, RowsMatch> predicate;
private IsPredicate(LogicalExpression expr,
BiFunction, StatisticsProvider, RowsMatch> predicate) {
super(expr.getPosition());
this.expr = expr;
this.predicate = predicate;
}
@Override
public Iterator iterator() {
final List args = new ArrayList<>();
args.add(expr);
return args.iterator();
}
@Override
public T accept(ExprVisitor visitor, V value) throws E {
return visitor.visitUnknown(this, value);
}
/**
* Apply the filter condition against the meta of the rowgroup.
*/
@Override
public RowsMatch matches(StatisticsProvider evaluator) {
@SuppressWarnings("unchecked")
ColumnStatistics exprStat = (ColumnStatistics) expr.accept(evaluator, null);
return isNullOrEmpty(exprStat) ? RowsMatch.SOME : predicate.apply(exprStat, evaluator);
}
/**
* @param stat statistics object
* @return true if the input stat object is null or has invalid statistics; false otherwise
*/
public static boolean isNullOrEmpty(ColumnStatistics> stat) {
return stat == null
|| !stat.contains(ColumnStatisticsKind.MIN_VALUE)
|| !stat.contains(ColumnStatisticsKind.MAX_VALUE)
|| !stat.contains(ColumnStatisticsKind.NULLS_COUNT)
|| ColumnStatisticsKind.NULLS_COUNT.getFrom(stat) == Statistic.NO_COLUMN_STATS;
}
/**
* After the applying of the filter against the statistics of the rowgroup, if the result is RowsMatch.ALL,
* then we still must know if the rowgroup contains some null values, because it can change the filter result.
* If it contains some null values, then we change the RowsMatch.ALL into RowsMatch.SOME, which sya that maybe
* some values (the null ones) should be disgarded.
*/
private static RowsMatch checkNull(ColumnStatistics> exprStat) {
return hasNoNulls(exprStat) ? RowsMatch.ALL : RowsMatch.SOME;
}
/**
* Checks that column chunk's statistics does not have nulls
*
* @param stat column statistics
* @return true if the statistics does not have nulls and false otherwise
*/
static boolean hasNoNulls(ColumnStatistics> stat) {
return ColumnStatisticsKind.NULLS_COUNT.getFrom(stat) == 0;
}
/**
* IS NULL predicate.
*/
private static > LogicalExpression createIsNullPredicate(LogicalExpression expr) {
return new IsPredicate(expr,
(exprStat, evaluator) -> {
// for arrays we are not able to define exact number of nulls
// [1,2,3] vs [1,2] -> in second case 3 is absent and thus it's null but statistics shows no nulls
if (expr instanceof TypedFieldExpr) {
TypedFieldExpr typedFieldExpr = (TypedFieldExpr) expr;
if (typedFieldExpr.getPath().isArray()) {
return RowsMatch.SOME;
}
}
if (hasNoNulls(exprStat)) {
return RowsMatch.NONE;
}
return isAllNulls(exprStat, evaluator.getRowCount()) ? RowsMatch.ALL : RowsMatch.SOME;
});
}
/**
* Checks that column chunk's statistics has only nulls.
*
* Besides comparing number of nulls, we need to check
* if min and max values are also nulls to cover use cases for arrays,
* since array can hold N number of elements and nulls statistics
* is collected for all elements, thus number of nulls may be greater
* or equal to the number of rows.
*
* Two rows: [null, {"id": 1}], [null, {"id": 2}]
*
* Statistics: rows => 2, nulls => 2, min => 1, max => 2
*
* @param stat column statistics
* @param rowCount number of rows of the specified statistics
* @param type of column values
* @return true if all rows are null, false otherwise
*/
static boolean isAllNulls(ColumnStatistics stat, long rowCount) {
Preconditions.checkArgument(rowCount >= 0, "negative rowCount %d is not valid", rowCount);
return ColumnStatisticsKind.NULLS_COUNT.getFrom(stat) >= rowCount
&& ColumnStatisticsKind.MIN_VALUE.getValueStatistic(stat) == null
&& ColumnStatisticsKind.MAX_VALUE.getValueStatistic(stat) == null;
}
static boolean hasNonNullValues(ColumnStatistics stat, long rowCount) {
return rowCount > ColumnStatisticsKind.NULLS_COUNT.getFrom(stat)
&& ColumnStatisticsKind.MIN_VALUE.getValueStatistic(stat) != null
&& ColumnStatisticsKind.MAX_VALUE.getValueStatistic(stat) != null;
}
/**
* IS NOT NULL predicate.
*/
private static > LogicalExpression createIsNotNullPredicate(LogicalExpression expr) {
return new IsPredicate(expr,
(exprStat, evaluator) -> isAllNulls(exprStat, evaluator.getRowCount()) ? RowsMatch.NONE : checkNull(exprStat)
);
}
/**
* IS TRUE predicate.
*/
private static LogicalExpression createIsTruePredicate(LogicalExpression expr) {
return new IsPredicate(expr, (exprStat, evaluator) -> {
if (isAllNulls(exprStat, evaluator.getRowCount())) {
return RowsMatch.NONE;
}
if (!hasNonNullValues(exprStat, evaluator.getRowCount())) {
return RowsMatch.SOME;
}
if (!ColumnStatisticsKind.MAX_VALUE.getValueStatistic(exprStat)) {
return RowsMatch.NONE;
}
return ColumnStatisticsKind.MIN_VALUE.getValueStatistic(exprStat) ? checkNull(exprStat) : RowsMatch.SOME;
});
}
/**
* IS FALSE predicate.
*/
private static LogicalExpression createIsFalsePredicate(LogicalExpression expr) {
return new IsPredicate(expr, (exprStat, evaluator) -> {
if (isAllNulls(exprStat, evaluator.getRowCount())) {
return RowsMatch.NONE;
}
if (!hasNonNullValues(exprStat, evaluator.getRowCount())) {
return RowsMatch.SOME;
}
if (ColumnStatisticsKind.MIN_VALUE.getValueStatistic(exprStat)) {
return RowsMatch.NONE;
}
return ColumnStatisticsKind.MAX_VALUE.getValueStatistic(exprStat) ? RowsMatch.SOME : checkNull(exprStat);
});
}
/**
* IS NOT TRUE predicate.
*/
private static LogicalExpression createIsNotTruePredicate(LogicalExpression expr) {
return new IsPredicate(expr, (exprStat, evaluator) -> {
if (isAllNulls(exprStat, evaluator.getRowCount())) {
return RowsMatch.ALL;
}
if (!hasNonNullValues(exprStat, evaluator.getRowCount())) {
return RowsMatch.SOME;
}
if (ColumnStatisticsKind.MIN_VALUE.getValueStatistic(exprStat)) {
return hasNoNulls(exprStat) ? RowsMatch.NONE : RowsMatch.SOME;
}
return ColumnStatisticsKind.MAX_VALUE.getValueStatistic(exprStat) ? RowsMatch.SOME : RowsMatch.ALL;
});
}
/**
* IS NOT FALSE predicate.
*/
private static LogicalExpression createIsNotFalsePredicate(LogicalExpression expr) {
return new IsPredicate(expr, (exprStat, evaluator) -> {
if (isAllNulls(exprStat, evaluator.getRowCount())) {
return RowsMatch.ALL;
}
if (!hasNonNullValues(exprStat, evaluator.getRowCount())) {
return RowsMatch.SOME;
}
if (!ColumnStatisticsKind.MAX_VALUE.getValueStatistic(exprStat)) {
return hasNoNulls(exprStat) ? RowsMatch.NONE : RowsMatch.SOME;
}
return ColumnStatisticsKind.MIN_VALUE.getValueStatistic(exprStat) ? RowsMatch.ALL : RowsMatch.SOME;
});
}
public static > LogicalExpression createIsPredicate(String function, LogicalExpression expr) {
switch (function) {
case FunctionGenerationHelper.IS_NULL:
return IsPredicate.createIsNullPredicate(expr);
case FunctionGenerationHelper.IS_NOT_NULL:
return IsPredicate.createIsNotNullPredicate(expr);
case FunctionGenerationHelper.IS_TRUE:
return createIsTruePredicate(expr);
case FunctionGenerationHelper.IS_NOT_TRUE:
return createIsNotTruePredicate(expr);
case FunctionGenerationHelper.IS_FALSE:
case FunctionGenerationHelper.NOT:
return createIsFalsePredicate(expr);
case FunctionGenerationHelper.IS_NOT_FALSE:
return createIsNotFalsePredicate(expr);
default:
logger.warn("Unhandled IS function. Function name: {}", function);
return null;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy