org.apache.druid.segment.join.lookup.LookupJoinMatcher Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of druid-processing Show documentation
A module that is everything required to understands Druid Segments
There is a newer version: 30.0.1
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.segment.join.lookup;

import com.google.common.base.Preconditions;
import com.google.common.collect.Iterators;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.common.guava.SettableSupplier;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.math.expr.Expr;
import org.apache.druid.query.QueryUnsupportedException;
import org.apache.druid.query.lookup.LookupExtractor;
import org.apache.druid.segment.BaseDoubleColumnValueSelector;
import org.apache.druid.segment.BaseFloatColumnValueSelector;
import org.apache.druid.segment.BaseLongColumnValueSelector;
import org.apache.druid.segment.BaseObjectColumnValueSelector;
import org.apache.druid.segment.ColumnProcessorFactory;
import org.apache.druid.segment.ColumnProcessors;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.DimensionHandlerUtils;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.join.Equality;
import org.apache.druid.segment.join.JoinConditionAnalysis;
import org.apache.druid.segment.join.JoinMatcher;

import javax.annotation.Nullable;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Supplier;
import java.util.stream.Collectors;

public class LookupJoinMatcher implements JoinMatcher
{
  private static final ColumnProcessorFactory> LEFT_KEY_READER =
      new ColumnProcessorFactory>()
      {
        @Override
        public ColumnType defaultType()
        {
          return ColumnType.STRING;
        }

        @Override
        public Supplier makeDimensionProcessor(DimensionSelector selector, boolean multiValue)
        {
          return () -> {
            final IndexedInts row = selector.getRow();

            if (row.size() == 1) {
              return selector.lookupName(row.get(0));
            } else if (row.size() == 0) {
              return null;
            } else {
              // Multi-valued rows are not handled by the join system right now
              // TODO: Remove when https://github.com/apache/druid/issues/9924 is done
              throw new QueryUnsupportedException("Joining against a multi-value dimension is not supported.");
            }
          };
        }

        @Override
        public Supplier makeFloatProcessor(BaseFloatColumnValueSelector selector)
        {
          if (NullHandling.replaceWithDefault()) {
            return () -> DimensionHandlerUtils.convertObjectToString(selector.getFloat());
          } else {
            return () -> selector.isNull() ? null : DimensionHandlerUtils.convertObjectToString(selector.getFloat());
          }
        }

        @Override
        public Supplier makeDoubleProcessor(BaseDoubleColumnValueSelector selector)
        {
          if (NullHandling.replaceWithDefault()) {
            return () -> DimensionHandlerUtils.convertObjectToString(selector.getDouble());
          } else {
            return () -> selector.isNull() ? null : DimensionHandlerUtils.convertObjectToString(selector.getDouble());
          }
        }

        @Override
        public Supplier makeLongProcessor(BaseLongColumnValueSelector selector)
        {
          if (NullHandling.replaceWithDefault()) {
            return () -> DimensionHandlerUtils.convertObjectToString(selector.getLong());
          } else {
            return () -> selector.isNull() ? null : DimensionHandlerUtils.convertObjectToString(selector.getLong());
          }
        }

        @Override
        public Supplier makeArrayProcessor(
            BaseObjectColumnValueSelector selector,
            @Nullable ColumnCapabilities columnCapabilities
        )
        {
          throw new QueryUnsupportedException("Joining against a ARRAY columns is not supported.");
        }

        @Override
        public Supplier makeComplexProcessor(BaseObjectColumnValueSelector selector)
        {
          return () -> null;
        }
      };

  // currentIterator and currentEntry track iteration position through the currently-matched-rows.
  // 1) currentEntry is the entry that our column selector factory is looking at right now.
  // 2) currentIterator contains future matches that it _will_ be looking at after nextMatch() is called.
  @Nullable
  private Iterator> currentIterator = null;
  private final SettableSupplier> currentEntry = new SettableSupplier<>();

  private final LookupExtractor extractor;
  private final JoinConditionAnalysis condition;
  private final List> keySuppliers;
  private final ColumnSelectorFactory selectorFactory = new LookupColumnSelectorFactory(currentEntry::get);

  // matchedKeys and matchingRemainder are used to implement matchRemainder().
  private boolean matchingRemainder = false;
  private final Set matchedKeys;

  private LookupJoinMatcher(
      LookupExtractor extractor,
      ColumnSelectorFactory leftSelectorFactory,
      JoinConditionAnalysis condition,
      @Nullable List keyExprs,
      boolean remainderNeeded
  )
  {
    this.extractor = extractor;
    this.matchedKeys = remainderNeeded && !condition.isAlwaysTrue() && !condition.isAlwaysFalse()
                       ? new HashSet<>()
                       : null;
    this.condition = condition;

    if (keyExprs != null) {
      this.keySuppliers = keyExprs.stream()
                                  .map(
                                      expr ->
                                          ColumnProcessors.makeProcessor(
                                              expr,
                                              ColumnType.STRING,
                                              LEFT_KEY_READER,
                                              leftSelectorFactory
                                          )
                                  )
                                  .collect(Collectors.toList());
    } else {
      // This check is to guard against bugs; users should never see it.
      Preconditions.checkState(
          condition.isAlwaysFalse() || condition.isAlwaysTrue(),
          "Condition must be always true or always false when keySuppliers == null"
      );

      this.keySuppliers = null;
    }

    // Verify that extractor can be iterated when needed.
    if (condition.isAlwaysTrue() || remainderNeeded) {
      Preconditions.checkState(
          extractor.canIterate(),
          "Cannot iterate lookup, but iteration is required for this join"
      );
    }
  }

  public static LookupJoinMatcher create(
      LookupExtractor extractor,
      ColumnSelectorFactory leftSelectorFactory,
      JoinConditionAnalysis condition,
      boolean remainderNeeded
  )
  {
    final List keyExprs;

    if (condition.isAlwaysTrue()) {
      keyExprs = null;
    } else if (condition.isAlwaysFalse()) {
      keyExprs = null;
    } else if (!condition.getNonEquiConditions().isEmpty()) {
      throw new IAE("Cannot join lookup with non-equi condition: %s", condition);
    } else if (!condition.getRightEquiConditionKeys()
                         .stream()
                         .allMatch(LookupColumnSelectorFactory.KEY_COLUMN::equals)) {
      throw new IAE("Cannot join lookup with condition referring to non-key column: %s", condition);
    } else {
      keyExprs = condition.getEquiConditions().stream().map(Equality::getLeftExpr).collect(Collectors.toList());
    }

    return new LookupJoinMatcher(extractor, leftSelectorFactory, condition, keyExprs, remainderNeeded);
  }

  @Override
  public ColumnSelectorFactory getColumnSelectorFactory()
  {
    return selectorFactory;
  }

  @Override
  public void matchCondition()
  {
    currentIterator = null;
    matchingRemainder = false;

    if (condition.isAlwaysFalse()) {
      currentEntry.set(null);
    } else if (condition.isAlwaysTrue()) {
      currentIterator = extractor.iterable().iterator();
      nextMatch();
    } else {
      // Not always true, not always false, it's a normal condition.

      if (keySuppliers.isEmpty()) {
        currentEntry.set(null);
        return;
      }

      Iterator> keySupplierIterator = keySuppliers.iterator();
      String theKey = keySupplierIterator.next().get();

      if (theKey == null) {
        currentEntry.set(null);
        return;
      }

      // In order to match, all keySuppliers must return the same string, which must be a key in the lookup.
      while (keySupplierIterator.hasNext()) {
        if (!theKey.equals(keySupplierIterator.next().get())) {
          currentEntry.set(null);
          return;
        }
      }

      // All keySuppliers matched. Check if they are actually in the lookup.
      checkInLookup(theKey);
    }
  }

  private void checkInLookup(String theKey)
  {
    // All keySuppliers matched. Check if they are actually in the lookup.
    final String theValue = extractor.apply(theKey);

    if (theValue != null) {
      assert theKey != null;
      currentEntry.set(Pair.of(theKey, theValue));

      if (matchedKeys != null) {
        matchedKeys.add(theKey);
      }
    } else {
      currentEntry.set(null);
    }
  }

  @Override
  public void matchRemainder()
  {
    matchingRemainder = true;

    if (condition.isAlwaysFalse()) {
      currentIterator = extractor.iterable().iterator();
    } else if (condition.isAlwaysTrue()) {
      currentIterator = Collections.emptyIterator();
    } else {
      //noinspection ConstantConditions - entry can not be null because extractor.iterable() prevents this
      currentIterator = Iterators.filter(
          extractor.iterable().iterator(),
          entry -> !matchedKeys.contains(entry.getKey())
      );
    }

    nextMatch();
  }

  @Override
  public boolean hasMatch()
  {
    return currentEntry.get() != null;
  }

  @Override
  public boolean matchingRemainder()
  {
    return matchingRemainder;
  }

  @Override
  public void nextMatch()
  {
    if (currentIterator != null && currentIterator.hasNext()) {
      final Map.Entry entry = currentIterator.next();
      currentEntry.set(Pair.of(entry.getKey(), entry.getValue()));
    } else {
      currentIterator = null;
      currentEntry.set(null);
    }
  }

  @Override
  public void reset()
  {
    // Do not reset matchedKeys; we want to remember it across reset() calls so the 'remainder' is anything
    // that was unmatched across _all_ cursor walks.
    currentEntry.set(null);
    currentIterator = null;
    matchingRemainder = false;
  }
}