org.apache.asterix.optimizer.rules.am.BTreeAccessMethod Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.asterix.optimizer.rules.am;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.commons.lang3.mutable.MutableObject;
import org.apache.asterix.aql.util.FunctionUtils;
import org.apache.asterix.common.annotations.SkipSecondaryIndexSearchExpressionAnnotation;
import org.apache.asterix.common.config.DatasetConfig.DatasetType;
import org.apache.asterix.common.config.DatasetConfig.IndexType;
import org.apache.asterix.metadata.entities.Dataset;
import org.apache.asterix.metadata.entities.Index;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.optimizer.rules.util.EquivalenceClassUtils;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
import org.apache.hyracks.algebricks.core.algebra.expressions.IndexedNLJoinExpressionAnnotation;
import org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
import org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions;
import org.apache.hyracks.algebricks.core.algebra.functions.AlgebricksBuiltinFunctions.ComparisonKind;
import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import org.apache.hyracks.algebricks.core.algebra.functions.IFunctionInfo;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractBinaryJoinOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractDataSourceOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator.ExecutionMode;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.ExternalDataLookupOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.SelectOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.UnnestMapOperator;
/**
* Class for helping rewrite rules to choose and apply BTree indexes.
*/
public class BTreeAccessMethod implements IAccessMethod {
// Describes whether a search predicate is an open/closed interval.
private enum LimitType {
LOW_INCLUSIVE,
LOW_EXCLUSIVE,
HIGH_INCLUSIVE,
HIGH_EXCLUSIVE,
EQUAL
}
// TODO: There is some redundancy here, since these are listed in AlgebricksBuiltinFunctions as well.
private static List funcIdents = new ArrayList();
static {
funcIdents.add(AlgebricksBuiltinFunctions.EQ);
funcIdents.add(AlgebricksBuiltinFunctions.LE);
funcIdents.add(AlgebricksBuiltinFunctions.GE);
funcIdents.add(AlgebricksBuiltinFunctions.LT);
funcIdents.add(AlgebricksBuiltinFunctions.GT);
}
public static BTreeAccessMethod INSTANCE = new BTreeAccessMethod();
@Override
public List getOptimizableFunctions() {
return funcIdents;
}
@Override
public boolean analyzeFuncExprArgs(AbstractFunctionCallExpression funcExpr,
List assignsAndUnnests, AccessMethodAnalysisContext analysisCtx) {
boolean matches = AccessMethodUtils.analyzeFuncExprArgsForOneConstAndVar(funcExpr, analysisCtx);
if (!matches) {
matches = AccessMethodUtils.analyzeFuncExprArgsForTwoVars(funcExpr, analysisCtx);
}
return matches;
}
@Override
public boolean matchAllIndexExprs() {
return false;
}
@Override
public boolean matchPrefixIndexExprs() {
return true;
}
@Override
public boolean applySelectPlanTransformation(Mutable selectRef,
OptimizableOperatorSubTree subTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx,
IOptimizationContext context) throws AlgebricksException {
SelectOperator select = (SelectOperator) selectRef.getValue();
Mutable conditionRef = select.getCondition();
ILogicalOperator primaryIndexUnnestOp = createSecondaryToPrimaryPlan(selectRef, conditionRef, subTree, null,
chosenIndex, analysisCtx, false, false, false, context);
if (primaryIndexUnnestOp == null) {
return false;
}
Mutable opRef = (subTree.assignsAndUnnestsRefs.isEmpty()) ? null
: subTree.assignsAndUnnestsRefs.get(0);
ILogicalOperator op = null;
if (opRef != null) {
op = opRef.getValue();
}
// Generate new select using the new condition.
if (conditionRef.getValue() != null) {
select.getInputs().clear();
if (op != null) {
subTree.dataSourceRef.setValue(primaryIndexUnnestOp);
select.getInputs().add(new MutableObject(op));
} else {
select.getInputs().add(new MutableObject(primaryIndexUnnestOp));
}
} else {
((AbstractLogicalOperator) primaryIndexUnnestOp).setExecutionMode(ExecutionMode.PARTITIONED);
if (op != null) {
subTree.dataSourceRef.setValue(primaryIndexUnnestOp);
selectRef.setValue(op);
} else {
selectRef.setValue(primaryIndexUnnestOp);
}
}
return true;
}
@Override
public boolean applyJoinPlanTransformation(Mutable joinRef,
OptimizableOperatorSubTree leftSubTree, OptimizableOperatorSubTree rightSubTree, Index chosenIndex,
AccessMethodAnalysisContext analysisCtx, IOptimizationContext context, boolean isLeftOuterJoin,
boolean hasGroupBy) throws AlgebricksException {
AbstractBinaryJoinOperator joinOp = (AbstractBinaryJoinOperator) joinRef.getValue();
Mutable conditionRef = joinOp.getCondition();
// Determine if the index is applicable on the left or right side (if both, we arbitrarily prefer the left side).
Dataset dataset = analysisCtx.indexDatasetMap.get(chosenIndex);
// Determine probe and index subtrees based on chosen index.
OptimizableOperatorSubTree indexSubTree = null;
OptimizableOperatorSubTree probeSubTree = null;
if (!isLeftOuterJoin && leftSubTree.hasDataSourceScan()
&& dataset.getDatasetName().equals(leftSubTree.dataset.getDatasetName())) {
indexSubTree = leftSubTree;
probeSubTree = rightSubTree;
} else if (rightSubTree.hasDataSourceScan()
&& dataset.getDatasetName().equals(rightSubTree.dataset.getDatasetName())) {
indexSubTree = rightSubTree;
probeSubTree = leftSubTree;
}
if (indexSubTree == null) {
//This may happen for left outer join case
return false;
}
LogicalVariable newNullPlaceHolderVar = null;
if (isLeftOuterJoin) {
//get a new null place holder variable that is the first field variable of the primary key
//from the indexSubTree's datasourceScanOp
newNullPlaceHolderVar = indexSubTree.getDataSourceVariables().get(0);
}
ILogicalOperator primaryIndexUnnestOp = createSecondaryToPrimaryPlan(joinRef, conditionRef, indexSubTree,
probeSubTree, chosenIndex, analysisCtx, true, isLeftOuterJoin, true, context);
if (primaryIndexUnnestOp == null) {
return false;
}
if (isLeftOuterJoin && hasGroupBy) {
//reset the null place holder variable
AccessMethodUtils.resetLOJNullPlaceholderVariableInGroupByOp(analysisCtx, newNullPlaceHolderVar, context);
}
// If there are conditions left, add a new select operator on top.
indexSubTree.dataSourceRef.setValue(primaryIndexUnnestOp);
if (conditionRef.getValue() != null) {
SelectOperator topSelect = new SelectOperator(conditionRef, isLeftOuterJoin, newNullPlaceHolderVar);
topSelect.getInputs().add(indexSubTree.rootRef);
topSelect.setExecutionMode(ExecutionMode.LOCAL);
context.computeAndSetTypeEnvironmentForOperator(topSelect);
// Replace the original join with the new subtree rooted at the select op.
joinRef.setValue(topSelect);
} else {
joinRef.setValue(indexSubTree.rootRef.getValue());
}
return true;
}
private ILogicalOperator createSecondaryToPrimaryPlan(Mutable topOpRef,
Mutable conditionRef, OptimizableOperatorSubTree indexSubTree,
OptimizableOperatorSubTree probeSubTree, Index chosenIndex, AccessMethodAnalysisContext analysisCtx,
boolean retainInput, boolean retainNull, boolean requiresBroadcast, IOptimizationContext context)
throws AlgebricksException {
Dataset dataset = indexSubTree.dataset;
ARecordType recordType = indexSubTree.recordType;
// we made sure indexSubTree has datasource scan
AbstractDataSourceOperator dataSourceOp = (AbstractDataSourceOperator) indexSubTree.dataSourceRef.getValue();
List> exprAndVarList = analysisCtx.indexExprsAndVars.get(chosenIndex);
List matchedFuncExprs = analysisCtx.matchedFuncExprs;
int numSecondaryKeys = analysisCtx.indexNumMatchedKeys.get(chosenIndex);
// List of function expressions that will be replaced by the secondary-index search.
// These func exprs will be removed from the select condition at the very end of this method.
Set replacedFuncExprs = new HashSet();
// Info on high and low keys for the BTree search predicate.
ILogicalExpression[] lowKeyExprs = new ILogicalExpression[numSecondaryKeys];
ILogicalExpression[] highKeyExprs = new ILogicalExpression[numSecondaryKeys];
LimitType[] lowKeyLimits = new LimitType[numSecondaryKeys];
LimitType[] highKeyLimits = new LimitType[numSecondaryKeys];
boolean[] lowKeyInclusive = new boolean[numSecondaryKeys];
boolean[] highKeyInclusive = new boolean[numSecondaryKeys];
// TODO: For now we don't do any sophisticated analysis of the func exprs to come up with "the best" range predicate.
// If we can't figure out how to integrate a certain funcExpr into the current predicate, we just bail by setting this flag.
boolean couldntFigureOut = false;
boolean doneWithExprs = false;
boolean isEqCondition = false;
// TODO: For now don't consider prefix searches.
BitSet setLowKeys = new BitSet(numSecondaryKeys);
BitSet setHighKeys = new BitSet(numSecondaryKeys);
// Go through the func exprs listed as optimizable by the chosen index,
// and formulate a range predicate on the secondary-index keys.
// checks whether a type casting happened from a real (FLOAT, DOUBLE) value to an INT value
// since we have a round issues when dealing with LT(<) OR GT(>) operator.
boolean realTypeConvertedToIntegerType = false;
for (Pair exprIndex : exprAndVarList) {
// Position of the field of matchedFuncExprs.get(exprIndex) in the chosen index's indexed exprs.
IOptimizableFuncExpr optFuncExpr = matchedFuncExprs.get(exprIndex.first);
int keyPos = indexOf(optFuncExpr.getFieldName(0), chosenIndex.getKeyFieldNames());
if (keyPos < 0) {
if (optFuncExpr.getNumLogicalVars() > 1) {
// If we are optimizing a join, the matching field may be the second field name.
keyPos = indexOf(optFuncExpr.getFieldName(1), chosenIndex.getKeyFieldNames());
}
}
if (keyPos < 0) {
throw new AlgebricksException(
"Could not match optimizable function expression to any index field name.");
}
Pair returnedSearchKeyExpr = AccessMethodUtils.createSearchKeyExpr(
optFuncExpr, indexSubTree, probeSubTree);
ILogicalExpression searchKeyExpr = returnedSearchKeyExpr.first;
realTypeConvertedToIntegerType = returnedSearchKeyExpr.second;
LimitType limit = getLimitType(optFuncExpr, probeSubTree);
// If a DOUBLE or FLOAT constant is converted to an INT type value,
// we need to check a corner case where two real values are located between an INT value.
// For example, for the following query,
//
// for $emp in dataset empDataset
// where $emp.age > double("2.3") and $emp.age < double("3.3")
// return $emp.id;
//
// It should generate a result if there is a tuple that satisfies the condition, which is 3,
// however, it does not generate the desired result since finding candidates
// fail after truncating the fraction part (there is no INT whose value is greater than 2 and less than 3.)
//
// Therefore, we convert LT(<) to LE(<=) and GT(>) to GE(>=) to find candidates.
// This does not change the result of an actual comparison since this conversion is only applied
// for finding candidates from an index.
//
if (realTypeConvertedToIntegerType) {
if (limit == LimitType.HIGH_EXCLUSIVE) {
limit = LimitType.HIGH_INCLUSIVE;
} else if (limit == LimitType.LOW_EXCLUSIVE) {
limit = LimitType.LOW_INCLUSIVE;
}
}
switch (limit) {
case EQUAL: {
if (lowKeyLimits[keyPos] == null && highKeyLimits[keyPos] == null) {
lowKeyLimits[keyPos] = highKeyLimits[keyPos] = limit;
lowKeyInclusive[keyPos] = highKeyInclusive[keyPos] = true;
lowKeyExprs[keyPos] = highKeyExprs[keyPos] = searchKeyExpr;
setLowKeys.set(keyPos);
setHighKeys.set(keyPos);
isEqCondition = true;
} else {
// Has already been set to the identical values. When optimizing join we may encounter the same optimizable expression twice
// (once from analyzing each side of the join)
if (lowKeyLimits[keyPos] == limit && lowKeyInclusive[keyPos] == true
&& lowKeyExprs[keyPos].equals(searchKeyExpr) && highKeyLimits[keyPos] == limit
&& highKeyInclusive[keyPos] == true && highKeyExprs[keyPos].equals(searchKeyExpr)) {
isEqCondition = true;
break;
}
couldntFigureOut = true;
}
// TODO: For now don't consider prefix searches.
// If high and low keys are set, we exit for now.
if (setLowKeys.cardinality() == numSecondaryKeys && setHighKeys.cardinality() == numSecondaryKeys) {
doneWithExprs = true;
}
break;
}
case HIGH_EXCLUSIVE: {
if (highKeyLimits[keyPos] == null || (highKeyLimits[keyPos] != null && highKeyInclusive[keyPos])) {
highKeyLimits[keyPos] = limit;
highKeyExprs[keyPos] = searchKeyExpr;
highKeyInclusive[keyPos] = false;
} else {
// Has already been set to the identical values. When optimizing join we may encounter the same optimizable expression twice
// (once from analyzing each side of the join)
if (highKeyLimits[keyPos] == limit && highKeyInclusive[keyPos] == false
&& highKeyExprs[keyPos].equals(searchKeyExpr)) {
break;
}
couldntFigureOut = true;
doneWithExprs = true;
}
break;
}
case HIGH_INCLUSIVE: {
if (highKeyLimits[keyPos] == null) {
highKeyLimits[keyPos] = limit;
highKeyExprs[keyPos] = searchKeyExpr;
highKeyInclusive[keyPos] = true;
} else {
// Has already been set to the identical values. When optimizing join we may encounter the same optimizable expression twice
// (once from analyzing each side of the join)
if (highKeyLimits[keyPos] == limit && highKeyInclusive[keyPos] == true
&& highKeyExprs[keyPos].equals(searchKeyExpr)) {
break;
}
couldntFigureOut = true;
doneWithExprs = true;
}
break;
}
case LOW_EXCLUSIVE: {
if (lowKeyLimits[keyPos] == null || (lowKeyLimits[keyPos] != null && lowKeyInclusive[keyPos])) {
lowKeyLimits[keyPos] = limit;
lowKeyExprs[keyPos] = searchKeyExpr;
lowKeyInclusive[keyPos] = false;
} else {
// Has already been set to the identical values. When optimizing join we may encounter the same optimizable expression twice
// (once from analyzing each side of the join)
if (lowKeyLimits[keyPos] == limit && lowKeyInclusive[keyPos] == false
&& lowKeyExprs[keyPos].equals(searchKeyExpr)) {
break;
}
couldntFigureOut = true;
doneWithExprs = true;
}
break;
}
case LOW_INCLUSIVE: {
if (lowKeyLimits[keyPos] == null) {
lowKeyLimits[keyPos] = limit;
lowKeyExprs[keyPos] = searchKeyExpr;
lowKeyInclusive[keyPos] = true;
} else {
// Has already been set to the identical values. When optimizing join we may encounter the same optimizable expression twice
// (once from analyzing each side of the join)
if (lowKeyLimits[keyPos] == limit && lowKeyInclusive[keyPos] == true
&& lowKeyExprs[keyPos].equals(searchKeyExpr)) {
break;
}
couldntFigureOut = true;
doneWithExprs = true;
}
break;
}
default: {
throw new IllegalStateException();
}
}
if (!couldntFigureOut) {
// Remember to remove this funcExpr later.
replacedFuncExprs.add(matchedFuncExprs.get(exprIndex.first).getFuncExpr());
}
if (doneWithExprs) {
break;
}
}
if (couldntFigureOut) {
return null;
}
// If the select condition contains mixed open/closed intervals on multiple keys, then we make all intervals closed to obtain a superset of answers and leave the original selection in place.
boolean primaryIndexPostProccessingIsNeeded = false;
for (int i = 1; i < numSecondaryKeys; ++i) {
if (lowKeyInclusive[i] != lowKeyInclusive[0]) {
Arrays.fill(lowKeyInclusive, true);
primaryIndexPostProccessingIsNeeded = true;
break;
}
}
for (int i = 1; i < numSecondaryKeys; ++i) {
if (highKeyInclusive[i] != highKeyInclusive[0]) {
Arrays.fill(highKeyInclusive, true);
primaryIndexPostProccessingIsNeeded = true;
break;
}
}
// determine cases when prefix search could be applied
for (int i = 1; i < lowKeyExprs.length; i++) {
if (lowKeyLimits[0] == null && lowKeyLimits[i] != null || lowKeyLimits[0] != null
&& lowKeyLimits[i] == null || highKeyLimits[0] == null && highKeyLimits[i] != null
|| highKeyLimits[0] != null && highKeyLimits[i] == null) {
numSecondaryKeys--;
primaryIndexPostProccessingIsNeeded = true;
}
}
if (lowKeyLimits[0] == null) {
lowKeyInclusive[0] = true;
}
if (highKeyLimits[0] == null) {
highKeyInclusive[0] = true;
}
// Here we generate vars and funcs for assigning the secondary-index keys to be fed into the secondary-index search.
// List of variables for the assign.
ArrayList keyVarList = new ArrayList();
// List of variables and expressions for the assign.
ArrayList assignKeyVarList = new ArrayList();
ArrayList> assignKeyExprList = new ArrayList>();
int numLowKeys = createKeyVarsAndExprs(numSecondaryKeys, lowKeyLimits, lowKeyExprs, assignKeyVarList,
assignKeyExprList, keyVarList, context);
int numHighKeys = createKeyVarsAndExprs(numSecondaryKeys, highKeyLimits, highKeyExprs, assignKeyVarList,
assignKeyExprList, keyVarList, context);
BTreeJobGenParams jobGenParams = new BTreeJobGenParams(chosenIndex.getIndexName(), IndexType.BTREE,
dataset.getDataverseName(), dataset.getDatasetName(), retainInput, retainNull, requiresBroadcast);
jobGenParams.setLowKeyInclusive(lowKeyInclusive[0]);
jobGenParams.setHighKeyInclusive(highKeyInclusive[0]);
jobGenParams.setIsEqCondition(isEqCondition);
jobGenParams.setLowKeyVarList(keyVarList, 0, numLowKeys);
jobGenParams.setHighKeyVarList(keyVarList, numLowKeys, numHighKeys);
ILogicalOperator inputOp = null;
if (!assignKeyVarList.isEmpty()) {
// Assign operator that sets the constant secondary-index search-key fields if necessary.
AssignOperator assignConstantSearchKeys = new AssignOperator(assignKeyVarList, assignKeyExprList);
// Input to this assign is the EmptyTupleSource (which the dataSourceScan also must have had as input).
assignConstantSearchKeys.getInputs().add(dataSourceOp.getInputs().get(0));
assignConstantSearchKeys.setExecutionMode(dataSourceOp.getExecutionMode());
inputOp = assignConstantSearchKeys;
} else {
// All index search keys are variables.
inputOp = probeSubTree.root;
}
UnnestMapOperator secondaryIndexUnnestOp = AccessMethodUtils.createSecondaryIndexUnnestMap(dataset, recordType,
chosenIndex, inputOp, jobGenParams, context, false, retainInput);
// Generate the rest of the upstream plan which feeds the search results into the primary index.
UnnestMapOperator primaryIndexUnnestOp = null;
boolean isPrimaryIndex = chosenIndex.isPrimaryIndex();
if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
// External dataset
ExternalDataLookupOperator externalDataAccessOp = AccessMethodUtils.createExternalDataLookupUnnestMap(
dataSourceOp, dataset, recordType, secondaryIndexUnnestOp, context, chosenIndex, retainInput,
retainNull);
indexSubTree.dataSourceRef.setValue(externalDataAccessOp);
return externalDataAccessOp;
} else if (!isPrimaryIndex) {
primaryIndexUnnestOp = AccessMethodUtils.createPrimaryIndexUnnestMap(dataSourceOp, dataset, recordType,
secondaryIndexUnnestOp, context, true, retainInput, retainNull, false);
// Replace the datasource scan with the new plan rooted at
// primaryIndexUnnestMap.
indexSubTree.dataSourceRef.setValue(primaryIndexUnnestOp);
} else {
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy