All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.sql.calcite.filtration.CombineAndSimplifyBounds Maven / Gradle / Ivy

There is a newer version: 31.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.sql.calcite.filtration;

import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Range;
import com.google.common.collect.RangeSet;
import it.unimi.dsi.fastutil.Pair;
import it.unimi.dsi.fastutil.ints.IntOpenHashSet;
import it.unimi.dsi.fastutil.objects.ObjectIntPair;
import org.apache.druid.query.filter.AndDimFilter;
import org.apache.druid.query.filter.BoundDimFilter;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.query.filter.FalseDimFilter;
import org.apache.druid.query.filter.NotDimFilter;
import org.apache.druid.query.filter.OrDimFilter;
import org.apache.druid.query.filter.RangeFilter;
import org.apache.druid.segment.column.ColumnType;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class CombineAndSimplifyBounds extends BottomUpTransform
{
  private static final CombineAndSimplifyBounds INSTANCE = new CombineAndSimplifyBounds();

  private CombineAndSimplifyBounds()
  {
  }

  public static CombineAndSimplifyBounds instance()
  {
    return INSTANCE;
  }

  @Override
  public DimFilter process(DimFilter filter)
  {
    if (filter instanceof FalseDimFilter) {
      // we might sometimes come into here with just a false from optimizing impossible conditions
      return filter;
    } else if (filter instanceof AndDimFilter) {
      final List children = getAndFilterChildren((AndDimFilter) filter);
      return doSimplifyAnd(children);
    } else if (filter instanceof OrDimFilter) {
      final List children = getOrFilterChildren((OrDimFilter) filter);
      return doSimplifyOr(children);
    } else if (filter instanceof NotDimFilter) {
      final DimFilter field = ((NotDimFilter) filter).getField();
      return negate(field);
    } else {
      return filter;
    }
  }

  private List getAndFilterChildren(final AndDimFilter filter)
  {
    final List children = new ArrayList<>();
    for (final DimFilter field : filter.getFields()) {
      if (field instanceof AndDimFilter) {
        children.addAll(getAndFilterChildren((AndDimFilter) field));
      } else {
        children.add(field);
      }
    }
    return children;
  }

  private List getOrFilterChildren(final OrDimFilter filter)
  {
    final List children = new ArrayList<>();
    for (final DimFilter field : filter.getFields()) {
      if (field instanceof OrDimFilter) {
        children.addAll(getOrFilterChildren((OrDimFilter) field));
      } else {
        children.add(field);
      }
    }
    return children;
  }

  private static DimFilter doSimplifyAnd(final List children)
  {
    return doSimplify(children, false);
  }

  private static DimFilter doSimplifyOr(final List children)
  {
    return doSimplify(children, true);
  }

  /**
   * Simplify {@link BoundDimFilter} and {@link RangeFilter} that are children of an OR or an AND.
   *
   * @param children    the filters
   * @param disjunction true for OR, false for AND
   *
   * @return simplified filters
   */
  private static DimFilter doSimplify(final List children, boolean disjunction)
  {
    // Copy the list of child filters. We'll modify the copy and eventually return it.
    // Filters we want to add and remove from "children".
    final List childrenToAdd = new ArrayList<>();
    final IntOpenHashSet childrenToRemove = new IntOpenHashSet();

    // Group Bound filters by dimension, extractionFn, and comparator and compute a RangeSet for each one.
    // Each filter is paired with its position in the "children" array.
    final Map>> bounds = new HashMap<>();
    // Group range filters by dimension, extractionFn, and matchValueType and compute a RangeSet for each one.
    // Each filter is paired with its position in the "children" array.
    final Map>> ranges = new HashMap<>();
    final Map leastRestrictiveNumericTypes = new HashMap<>();

    // all and/or filters have at least 1 child
    boolean allFalse = true;
    for (int childIndex = 0; childIndex < children.size(); childIndex++) {
      final DimFilter child = children.get(childIndex);
      if (child instanceof BoundDimFilter) {
        final BoundDimFilter bound = (BoundDimFilter) child;
        final BoundRefKey boundRefKey = BoundRefKey.from(bound);
        final List> filterList =
            bounds.computeIfAbsent(boundRefKey, k -> new ArrayList<>());
        filterList.add(ObjectIntPair.of(bound, childIndex));
        allFalse = false;
      } else if (child instanceof RangeFilter) {
        final RangeFilter range = (RangeFilter) child;
        final RangeRefKey rangeRefKey = RangeRefKey.from(range);
        if (rangeRefKey.getMatchValueType().isNumeric()) {
          leastRestrictiveNumericTypes.compute(
              range.getColumn(),
              (c, existingType) -> ColumnType.leastRestrictiveType(existingType, range.getMatchValueType())
          );
        }

        final List> filterList =
            ranges.computeIfAbsent(rangeRefKey, k -> new ArrayList<>());
        filterList.add(ObjectIntPair.of(range, childIndex));
        allFalse = false;
      } else {
        allFalse = allFalse && (child instanceof FalseDimFilter);
      }
    }

    // short circuit if can never be true
    if (allFalse) {
      return Filtration.matchNothing();
    }

    // Try to simplify "bound" filters within each group of "bounds".
    for (Map.Entry>> entry : bounds.entrySet()) {
      final BoundRefKey boundRefKey = entry.getKey();
      final List> filterList = entry.getValue();

      // Create a RangeSet for this group.
      final RangeSet rangeSet =
          disjunction
          ? RangeSets.unionRanges(Bounds.toRanges(Lists.transform(filterList, Pair::left)))
          : RangeSets.intersectRanges(Bounds.toRanges(Lists.transform(filterList, Pair::left)));

      if (rangeSet.asRanges().size() < filterList.size()) {
        // We found a simplification. Remove the old filters and add new ones.
        for (final ObjectIntPair boundAndChildIndex : filterList) {
          childrenToRemove.add(boundAndChildIndex.rightInt());
        }

        if (rangeSet.asRanges().isEmpty()) {
          // range set matches nothing, equivalent to FALSE
          childrenToAdd.add(Filtration.matchNothing());
        }

        for (final Range range : rangeSet.asRanges()) {
          if (!range.hasLowerBound() && !range.hasUpperBound()) {
            // range matches all, equivalent to TRUE
            childrenToAdd.add(Filtration.matchEverything());
          } else {
            childrenToAdd.add(Bounds.toFilter(boundRefKey, range));
          }
        }
      } else if (disjunction && Range.all().equals(rangeSet.span())) {
        // ranges in disjunction - spanning ALL
        // complementer must be a negated set of ranges
        for (final ObjectIntPair boundAndChildIndex : filterList) {
          childrenToRemove.add(boundAndChildIndex.rightInt());
        }
        Set> newRanges = rangeSet.complement().asRanges();
        List newFilters = new ArrayList<>();
        for (Range range : newRanges) {
          BoundDimFilter filter = Bounds.toFilter(boundRefKey, range);
          newFilters.add(filter);
        }
        childrenToAdd.add(new NotDimFilter(disjunction(newFilters)));
      }
    }

    // Consolidate groups of numeric ranges in "ranges", using the leastRestrictiveNumericTypes computed earlier.
    final Map>> consolidatedRanges =
        Maps.newHashMapWithExpectedSize(ranges.size());
    for (Map.Entry>> entry : ranges.entrySet()) {
      boolean refKeyChanged = false;
      RangeRefKey refKey = entry.getKey();
      if (entry.getKey().getMatchValueType().isNumeric()) {
        ColumnType numericTypeToUse = leastRestrictiveNumericTypes.get(refKey.getColumn());
        if (!numericTypeToUse.equals(refKey.getMatchValueType())) {
          refKeyChanged = true;
          refKey = new RangeRefKey(refKey.getColumn(), numericTypeToUse);
        }
      }
      final List> consolidatedFilterList =
          consolidatedRanges.computeIfAbsent(refKey, k -> new ArrayList<>());

      if (refKeyChanged) {
        for (ObjectIntPair filterAndChildIndex : entry.getValue()) {
          final RangeFilter rewrite =
              Ranges.toFilter(refKey, Ranges.toRange(filterAndChildIndex.left(), refKey.getMatchValueType()));
          consolidatedFilterList.add(ObjectIntPair.of(rewrite, filterAndChildIndex.rightInt()));
        }
      } else {
        consolidatedFilterList.addAll(entry.getValue());
      }
    }

    // Try to simplify "range" filters within each group of "consolidatedRanges" (derived from "ranges").
    for (Map.Entry>> entry : consolidatedRanges.entrySet()) {
      final RangeRefKey rangeRefKey = entry.getKey();
      final List> filterList = entry.getValue();

      // Create a RangeSet for this group.
      final RangeSet rangeSet =
          disjunction
          ? RangeSets.unionRanges(Ranges.toRanges(Lists.transform(filterList, Pair::left)))
          : RangeSets.intersectRanges(Ranges.toRanges(Lists.transform(filterList, Pair::left)));

      if (rangeSet.asRanges().size() < filterList.size()) {
        // We found a simplification. Remove the old filters and add new ones.
        for (final ObjectIntPair rangeAndChildIndex : filterList) {
          childrenToRemove.add(rangeAndChildIndex.rightInt());
        }

        if (rangeSet.asRanges().isEmpty()) {
          // range set matches nothing, equivalent to FALSE
          childrenToAdd.add(Filtration.matchNothing());
        }

        for (final Range range : rangeSet.asRanges()) {
          if (!range.hasLowerBound() && !range.hasUpperBound()) {
            // range matches all, equivalent to TRUE
            childrenToAdd.add(Filtration.matchEverything());
          } else {
            childrenToAdd.add(Ranges.toFilter(rangeRefKey, range));
          }
        }
      } else if (disjunction && Range.all().equals(rangeSet.span())) {
        // ranges in disjunction - spanning ALL
        // complementer must be a negated set of ranges
        for (final ObjectIntPair boundAndChildIndex : filterList) {
          childrenToRemove.add(boundAndChildIndex.rightInt());
        }
        Set> newRanges = rangeSet.complement().asRanges();
        List newFilters = new ArrayList<>();
        for (Range range : newRanges) {
          RangeFilter filter = Ranges.toFilter(rangeRefKey, range);
          newFilters.add(filter);
        }
        childrenToAdd.add(new NotDimFilter(disjunction(newFilters)));
      }
    }

    // Create newChildren.
    final List newChildren =
        new ArrayList<>(children.size() + childrenToAdd.size() - childrenToRemove.size());
    for (int i = 0; i < children.size(); i++) {
      if (!childrenToRemove.contains(i)) {
        newChildren.add(children.get(i));
      }
    }
    newChildren.addAll(childrenToAdd);

    // Finally: Go through newChildren, removing or potentially exiting early based on TRUE / FALSE marker filters.
    Preconditions.checkState(newChildren.size() > 0, "newChildren.size > 0");

    final Iterator iterator = newChildren.iterator();
    while (iterator.hasNext()) {
      final DimFilter newChild = iterator.next();

      if (Filtration.matchNothing().equals(newChild)) {
        // Child matches nothing, equivalent to FALSE
        // OR with FALSE => ignore
        // AND with FALSE => always false, short circuit
        if (disjunction) {
          iterator.remove();
        } else {
          return Filtration.matchNothing();
        }
      } else if (Filtration.matchEverything().equals(newChild)) {
        // Child matches everything, equivalent to TRUE
        // OR with TRUE => always true, short circuit
        // AND with TRUE => ignore
        if (disjunction) {
          return Filtration.matchEverything();
        } else {
          iterator.remove();
        }
      }
    }

    if (newChildren.isEmpty()) {
      // If "newChildren" is empty at this point, it must have consisted entirely of TRUE / FALSE marker filters.
      if (disjunction) {
        // Must have been all FALSE filters (the only kind we would have removed above).
        return Filtration.matchNothing();
      } else {
        // Must have been all TRUE filters (the only kind we would have removed above).
        return Filtration.matchEverything();
      }
    } else if (newChildren.size() == 1) {
      return newChildren.get(0);
    } else {
      return disjunction ? new OrDimFilter(newChildren) : new AndDimFilter(newChildren);
    }
  }

  private static DimFilter disjunction(List operands)
  {
    Preconditions.checkArgument(operands.size() > 0, "invalid number of operands");
    if (operands.size() == 1) {
      return operands.get(0);
    }
    return new OrDimFilter(operands);
  }

  private static DimFilter negate(final DimFilter filter)
  {
    if (Filtration.matchEverything().equals(filter)) {
      return Filtration.matchNothing();
    } else if (Filtration.matchNothing().equals(filter)) {
      return Filtration.matchEverything();
    } else if (filter instanceof NotDimFilter) {
      return ((NotDimFilter) filter).getField();
    } else if (filter instanceof BoundDimFilter) {
      final BoundDimFilter negated = Bounds.not((BoundDimFilter) filter);
      return negated != null ? negated : new NotDimFilter(filter);
    } else if (filter instanceof RangeFilter) {
      final RangeFilter negated = Ranges.not((RangeFilter) filter);
      return negated != null ? negated : new NotDimFilter(filter);
    } else {
      return new NotDimFilter(filter);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy