All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.sql.calcite.rel.DruidJoinQueryRel Maven / Gradle / Ivy

There is a newer version: 31.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.sql.calcite.rel;

import com.fasterxml.jackson.core.JsonProcessingException;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import org.apache.calcite.plan.RelOptCluster;
import org.apache.calcite.plan.RelOptCost;
import org.apache.calcite.plan.RelOptPlanner;
import org.apache.calcite.plan.RelOptRule;
import org.apache.calcite.plan.RelTraitSet;
import org.apache.calcite.plan.volcano.RelSubset;
import org.apache.calcite.rel.RelNode;
import org.apache.calcite.rel.RelWriter;
import org.apache.calcite.rel.core.Filter;
import org.apache.calcite.rel.core.Join;
import org.apache.calcite.rel.core.JoinRelType;
import org.apache.calcite.rel.metadata.RelMetadataQuery;
import org.apache.calcite.rel.type.RelDataType;
import org.apache.calcite.rex.RexNode;
import org.apache.calcite.sql.SqlKind;
import org.apache.druid.error.InvalidSqlInput;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.Pair;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.DataSource;
import org.apache.druid.query.JoinDataSource;
import org.apache.druid.query.QueryDataSource;
import org.apache.druid.query.TableDataSource;
import org.apache.druid.query.filter.DimFilter;
import org.apache.druid.segment.column.RowSignature;
import org.apache.druid.segment.join.JoinConditionAnalysis;
import org.apache.druid.segment.join.JoinType;
import org.apache.druid.sql.calcite.expression.DruidExpression;
import org.apache.druid.sql.calcite.expression.Expressions;
import org.apache.druid.sql.calcite.planner.Calcites;
import org.apache.druid.sql.calcite.planner.PlannerConfig;
import org.apache.druid.sql.calcite.planner.PlannerContext;
import org.apache.druid.sql.calcite.planner.querygen.SourceDescProducer.SourceDesc;
import org.apache.druid.sql.calcite.table.RowSignatures;

import javax.annotation.Nullable;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import java.util.stream.Collectors;

/**
 * DruidRel that uses a {@link JoinDataSource}.
 */
public class DruidJoinQueryRel extends DruidRel
{
  static final TableDataSource DUMMY_DATA_SOURCE = new TableDataSource("__join__")
  {
    @Override
    public boolean isConcrete()
    {
      return false;
    }
  };

  private final Filter leftFilter;
  private final PartialDruidQuery partialQuery;
  private final Join joinRel;
  private final PlannerConfig plannerConfig;
  private RelNode left;
  private RelNode right;

  private DruidJoinQueryRel(
      RelOptCluster cluster,
      RelTraitSet traitSet,
      Join joinRel,
      Filter leftFilter,
      PartialDruidQuery partialQuery,
      PlannerContext plannerContext
  )
  {
    super(cluster, traitSet, plannerContext);
    this.joinRel = joinRel;
    this.left = joinRel.getLeft();
    this.right = joinRel.getRight();
    this.leftFilter = leftFilter;
    this.partialQuery = partialQuery;
    this.plannerConfig = plannerContext.getPlannerConfig();
  }

  /**
   * Create an instance from a Join that is based on two {@link DruidRel} inputs.
   */
  public static DruidJoinQueryRel create(
      final Join joinRel,
      final Filter leftFilter,
      final PlannerContext plannerContext
  )
  {
    return new DruidJoinQueryRel(
        joinRel.getCluster(),
        joinRel.getTraitSet(),
        joinRel,
        leftFilter,
        PartialDruidQuery.create(joinRel),
        plannerContext
    );
  }

  @Override
  public PartialDruidQuery getPartialDruidQuery()
  {
    return partialQuery;
  }

  @Override
  public DruidJoinQueryRel withPartialQuery(final PartialDruidQuery newQueryBuilder)
  {
    return new DruidJoinQueryRel(
        getCluster(),
        newQueryBuilder.getTraitSet(getConvention(), getPlannerContext()),
        joinRel,
        leftFilter,
        newQueryBuilder,
        getPlannerContext()
    );
  }

  private SourceDesc buildLeftSourceDesc()
  {
    final SourceDesc leftDesc;
    final DruidRel leftDruidRel = (DruidRel) left;
    final DruidQuery leftQuery = Preconditions.checkNotNull(leftDruidRel.toDruidQuery(false), "leftQuery");
    final RowSignature leftSignature = leftQuery.getOutputRowSignature();
    final DataSource leftDataSource;
    if (computeLeftRequiresSubquery(getPlannerContext(), leftDruidRel)) {
      leftDataSource = new QueryDataSource(leftQuery.getQuery());
      if (leftFilter != null) {
        throw new ISE("Filter on left table is supposed to be null if left child is a query source");
      }
    } else {
      leftDataSource = leftQuery.getDataSource();
    }
    leftDesc = new SourceDesc(leftDataSource, leftSignature);
    return leftDesc;
  }

  private SourceDesc buildRightSourceDesc()
  {
    final SourceDesc rightDesc;
    final DruidRel rightDruidRel = (DruidRel) right;
    final DruidQuery rightQuery = Preconditions.checkNotNull(rightDruidRel.toDruidQuery(false), "rightQuery");
    final RowSignature rightSignature = rightQuery.getOutputRowSignature();
    final DataSource rightDataSource;
    if (computeRightRequiresSubquery(getPlannerContext(), rightDruidRel)) {
      rightDataSource = new QueryDataSource(rightQuery.getQuery());
    } else {
      rightDataSource = rightQuery.getDataSource();
    }
    rightDesc = new SourceDesc(rightDataSource, rightSignature);
    return rightDesc;
  }

  public static SourceDesc buildJoinSourceDesc(final SourceDesc leftDesc, final SourceDesc rightDesc, PlannerContext plannerContext, Join joinRel, Filter leftFilter)
  {
    final Pair prefixSignaturePair = computeJoinRowSignature(
        leftDesc.rowSignature,
        rightDesc.rowSignature,
        findExistingJoinPrefixes(leftDesc.dataSource, rightDesc.dataSource)
    );

    String prefix = prefixSignaturePair.lhs;
    RowSignature signature = prefixSignaturePair.rhs;

    VirtualColumnRegistry virtualColumnRegistry = VirtualColumnRegistry.create(
        signature,
        plannerContext.getExpressionParser(),
        plannerContext.getPlannerConfig().isForceExpressionVirtualColumns()
    );
    plannerContext.setJoinExpressionVirtualColumnRegistry(virtualColumnRegistry);

    // Generate the condition for this join as a Druid expression.
    final DruidExpression condition = Expressions.toDruidExpression(
        plannerContext,
        signature,
        joinRel.getCondition()
    );

    // Unsetting it to avoid any VC Registry leaks incase there are multiple druid quries for the SQL
    // It should be fixed soon with changes in interface for SqlOperatorConversion and Expressions bridge class
    plannerContext.setJoinExpressionVirtualColumnRegistry(null);

    // DruidJoinRule should not have created us if "condition" is null. Check defensively anyway, which also
    // quiets static code analysis.
    if (condition == null) {
      throw new CannotBuildQueryException(joinRel, joinRel.getCondition());
    }

    JoinDataSource joinDataSource = JoinDataSource.create(
        leftDesc.dataSource,
        rightDesc.dataSource,
        prefix,
        JoinConditionAnalysis.forExpression(
            condition.getExpression(),
            plannerContext.parseExpression(condition.getExpression()),
            prefix
        ),
        toDruidJoinType(joinRel.getJoinType()),
        getDimFilter(plannerContext, leftDesc.rowSignature, leftFilter),
        plannerContext.getJoinableFactoryWrapper()
    );

    SourceDesc sourceDesc = new SourceDesc(joinDataSource, signature, virtualColumnRegistry);
    return sourceDesc;
  }


  @Override
  public DruidQuery toDruidQuery(final boolean finalizeAggregations)
  {
    final SourceDesc leftDesc = buildLeftSourceDesc();
    final SourceDesc rightDesc = buildRightSourceDesc();

    SourceDesc sourceDesc = buildJoinSourceDesc(leftDesc, rightDesc, getPlannerContext(), joinRel, leftFilter);

    return partialQuery.build(
        sourceDesc.dataSource,
        sourceDesc.rowSignature,
        getPlannerContext(),
        getCluster().getRexBuilder(),
        finalizeAggregations,
        sourceDesc.virtualColumnRegistry
    );
  }

  @Override
  public DruidQuery toDruidQueryForExplaining()
  {
    return partialQuery.build(
        DUMMY_DATA_SOURCE,
        RowSignatures.fromRelDataType(
            joinRel.getRowType().getFieldNames(),
            joinRel.getRowType()
        ),
        getPlannerContext(),
        getCluster().getRexBuilder(),
        false
    );
  }

  @Override
  public DruidJoinQueryRel asDruidConvention()
  {
    return new DruidJoinQueryRel(
        getCluster(),
        getTraitSet().replace(DruidConvention.instance()),
        joinRel.copy(
            joinRel.getTraitSet(),
            joinRel.getInputs()
                   .stream()
                   .map(input -> RelOptRule.convert(input, DruidConvention.instance()))
                   .collect(Collectors.toList())
        ),
        leftFilter,
        partialQuery,
        getPlannerContext()
    );
  }

  @Override
  public List getInputs()
  {
    return ImmutableList.of(left, right);
  }

  @Override
  public void replaceInput(int ordinalInParent, RelNode p)
  {
    joinRel.replaceInput(ordinalInParent, p);

    if (ordinalInParent == 0) {
      this.left = p;
    } else if (ordinalInParent == 1) {
      this.right = p;
    } else {
      throw new IndexOutOfBoundsException(StringUtils.format("Invalid ordinalInParent[%s]", ordinalInParent));
    }
  }

  @Override
  public RelNode copy(final RelTraitSet traitSet, final List inputs)
  {
    return new DruidJoinQueryRel(
        getCluster(),
        traitSet,
        joinRel.copy(joinRel.getTraitSet(), inputs),
        leftFilter,
        getPartialDruidQuery(),
        getPlannerContext()
    );
  }

  @Override
  public Set getDataSourceNames()
  {
    final Set retVal = new HashSet<>();
    retVal.addAll(((DruidRel) left).getDataSourceNames());
    retVal.addAll(((DruidRel) right).getDataSourceNames());
    return retVal;
  }

  @Override
  public RelWriter explainTerms(RelWriter pw)
  {
    final String queryString;
    final DruidQuery druidQuery = toDruidQueryForExplaining();

    try {
      queryString = getPlannerContext().getJsonMapper().writeValueAsString(druidQuery.getQuery());
    }
    catch (JsonProcessingException e) {
      throw new RuntimeException(e);
    }

    return joinRel.explainTerms(pw)
                  .item("query", queryString)
                  .item("signature", druidQuery.getOutputRowSignature());
  }

  @Override
  protected RelDataType deriveRowType()
  {
    return partialQuery.getRowType();
  }

  @Override
  public RelOptCost computeSelfCost(final RelOptPlanner planner, final RelMetadataQuery mq)
  {
    double joinCost = partialQuery.estimateCost();

    if (getPlannerContext().getJoinAlgorithm().requiresSubquery()) {
      joinCost *= CostEstimates.MULTIPLIER_OUTER_QUERY;
    } else {
      // Penalize subqueries if we don't have to do them.
      if (computeLeftRequiresSubquery(getPlannerContext(), getSomeDruidChild(left))) {
        joinCost += CostEstimates.COST_SUBQUERY;
      } else {
        if (joinRel.getJoinType() == JoinRelType.INNER && plannerConfig.isComputeInnerJoinCostAsFilter()) {
          joinCost *= CostEstimates.MULTIPLIER_FILTER; // treating inner join like a filter on left table
        }
      }

      if (computeRightRequiresSubquery(getPlannerContext(), getSomeDruidChild(right))) {
        joinCost += CostEstimates.COST_SUBQUERY;
      }
    }

    // Penalize cross joins.
    if (joinRel.getCondition().isA(SqlKind.LITERAL) && !joinRel.getCondition().isAlwaysFalse()) {
      joinCost += CostEstimates.COST_JOIN_CROSS;
    }

    return planner.getCostFactory().makeCost(joinCost, 0, 0);
  }

  public static JoinType toDruidJoinType(JoinRelType calciteJoinType)
  {
    switch (calciteJoinType) {
      case LEFT:
        return JoinType.LEFT;
      case RIGHT:
        return JoinType.RIGHT;
      case FULL:
        return JoinType.FULL;
      case INNER:
        return JoinType.INNER;
      default:
        throw InvalidSqlInput.exception(
            "Cannot handle joinType [%s]",
            calciteJoinType
        );
    }
  }

  public static boolean computeLeftRequiresSubquery(final PlannerContext plannerContext, final DruidRel left)
  {
    if (plannerContext.getJoinAlgorithm().requiresSubquery()) {
      return true;
    }

    // Left requires a subquery unless it's a scan or mapping on top of any table or a join.
    return !DruidRels.isScanOrMapping(left, true);
  }

  public static boolean computeRightRequiresSubquery(final PlannerContext plannerContext, final DruidRel right)
  {
    if (plannerContext.getJoinAlgorithm().requiresSubquery()) {
      return true;
    }

    // Right requires a subquery unless it's a scan or mapping on top of a global datasource.
    // ideally this would involve JoinableFactory.isDirectlyJoinable to check that the global datasources
    // are in fact possibly joinable, but for now isGlobal is coupled to joinability
    return !(DruidRels.isScanOrMapping(right, false)
             && DruidRels.druidTableIfLeafRel(right).filter(table -> table.getDataSource().isGlobal()).isPresent());
  }

  static Set findExistingJoinPrefixes(DataSource... dataSources)
  {
    final ArrayList copy = new ArrayList<>(Arrays.asList(dataSources));

    Set prefixes = new HashSet<>();
    while (!copy.isEmpty()) {
      DataSource current = copy.remove(0);
      copy.addAll(current.getChildren());
      if (current instanceof JoinDataSource) {
        JoinDataSource joiner = (JoinDataSource) current;
        prefixes.add(joiner.getRightPrefix());
      }
    }
    return prefixes;
  }
  /**
   * Returns a Pair of "rightPrefix" (for JoinDataSource) and the signature of rows that will result from
   * applying that prefix.
   */
  static Pair computeJoinRowSignature(
      final RowSignature leftSignature,
      final RowSignature rightSignature,
      final Set prefixes
  )
  {
    final RowSignature.Builder signatureBuilder = RowSignature.builder();

    for (final String column : leftSignature.getColumnNames()) {
      signatureBuilder.add(column, leftSignature.getColumnType(column).orElse(null));
    }

    StringBuilder base = new StringBuilder("j");
    // the prefixes collection contains all known join prefixes, which might be in use for nested queries but not
    // present in the top level row signatures
    // loop until we are sure we got a new prefix
    String maybePrefix;
    do {
      // Need to include the "0" since findUnusedPrefixForDigits only guarantees safety for digit-initiated suffixes
      maybePrefix = Calcites.findUnusedPrefixForDigits(base.toString(), leftSignature.getColumnNames()) + "0.";
      base.insert(0, "_");
    } while (prefixes.contains(maybePrefix));
    final String rightPrefix = maybePrefix;

    for (final String column : rightSignature.getColumnNames()) {
      signatureBuilder.add(rightPrefix + column, rightSignature.getColumnType(column).orElse(null));
    }

    return Pair.of(rightPrefix, signatureBuilder.build());
  }

  public static DruidRel getSomeDruidChild(final RelNode child)
  {
    if (child instanceof DruidRel) {
      return (DruidRel) child;
    } else {
      final RelSubset subset = (RelSubset) child;
      return (DruidRel) Iterables.getFirst(subset.getRels(), null);
    }
  }

  @Nullable
  private static DimFilter getDimFilter(
      final PlannerContext plannerContext,
      final RowSignature rowSignature,
      @Nullable final Filter filter
  )
  {
    if (filter == null) {
      return null;
    }
    final RexNode condition = filter.getCondition();
    final DimFilter dimFilter = Expressions.toFilter(
        plannerContext,
        rowSignature,
        null,
        condition
    );
    if (dimFilter == null) {
      throw new CannotBuildQueryException(filter, condition);
    } else {
      return dimFilter;
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy