org.apache.asterix.optimizer.rules.IntroduceSecondaryIndexInsertDeleteRule Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.asterix.optimizer.rules;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Set;
import java.util.Stack;
import org.apache.asterix.aql.util.FunctionUtils;
import org.apache.asterix.common.config.DatasetConfig.DatasetType;
import org.apache.asterix.common.config.DatasetConfig.IndexType;
import org.apache.asterix.common.exceptions.AsterixException;
import org.apache.asterix.metadata.declared.AqlDataSource;
import org.apache.asterix.metadata.declared.AqlIndex;
import org.apache.asterix.metadata.declared.AqlMetadataProvider;
import org.apache.asterix.metadata.declared.DatasetDataSource;
import org.apache.asterix.metadata.entities.Dataset;
import org.apache.asterix.metadata.entities.Index;
import org.apache.asterix.metadata.entities.InternalDatasetDetails;
import org.apache.asterix.om.base.AInt32;
import org.apache.asterix.om.base.AOrderedList;
import org.apache.asterix.om.base.AString;
import org.apache.asterix.om.constants.AsterixConstantValue;
import org.apache.asterix.om.functions.AsterixBuiltinFunctions;
import org.apache.asterix.om.typecomputer.base.TypeComputerUtilities;
import org.apache.asterix.om.types.AOrderedListType;
import org.apache.asterix.om.types.ARecordType;
import org.apache.asterix.om.types.ATypeTag;
import org.apache.asterix.om.types.AUnionType;
import org.apache.asterix.om.types.BuiltinType;
import org.apache.asterix.om.types.IAType;
import org.apache.asterix.om.types.hierachy.ATypeHierarchy;
import org.apache.asterix.om.util.NonTaggedFormatUtil;
import org.apache.commons.lang3.ArrayUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.lang3.mutable.Mutable;
import org.apache.commons.lang3.mutable.MutableObject;
import org.apache.hyracks.algebricks.common.exceptions.AlgebricksException;
import org.apache.hyracks.algebricks.common.utils.Pair;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalExpression;
import org.apache.hyracks.algebricks.core.algebra.base.ILogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.base.IOptimizationContext;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalExpressionTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalOperatorTag;
import org.apache.hyracks.algebricks.core.algebra.base.LogicalVariable;
import org.apache.hyracks.algebricks.core.algebra.expressions.AbstractFunctionCallExpression;
import org.apache.hyracks.algebricks.core.algebra.expressions.ConstantExpression;
import org.apache.hyracks.algebricks.core.algebra.expressions.IVariableTypeEnvironment;
import org.apache.hyracks.algebricks.core.algebra.expressions.ScalarFunctionCallExpression;
import org.apache.hyracks.algebricks.core.algebra.expressions.VariableReferenceExpression;
import org.apache.hyracks.algebricks.core.algebra.functions.FunctionIdentifier;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AbstractLogicalOperator.ExecutionMode;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.AssignOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.IndexInsertDeleteOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.InsertDeleteOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.InsertDeleteOperator.Kind;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.ProjectOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.ReplicateOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.TokenizeOperator;
import org.apache.hyracks.algebricks.core.algebra.operators.logical.visitors.VariableUtilities;
import org.apache.hyracks.algebricks.core.rewriter.base.IAlgebraicRewriteRule;
public class IntroduceSecondaryIndexInsertDeleteRule implements IAlgebraicRewriteRule {
@Override
public boolean rewritePre(Mutable opRef, IOptimizationContext context)
throws AlgebricksException {
return false;
}
@Override
public boolean rewritePost(Mutable opRef, IOptimizationContext context)
throws AlgebricksException {
AbstractLogicalOperator op0 = (AbstractLogicalOperator) opRef.getValue();
if (op0.getOperatorTag() != LogicalOperatorTag.SINK) {
return false;
}
AbstractLogicalOperator op1 = (AbstractLogicalOperator) op0.getInputs().get(0).getValue();
if (op1.getOperatorTag() != LogicalOperatorTag.INSERT_DELETE) {
return false;
}
FunctionIdentifier fid = null;
/** find the record variable */
InsertDeleteOperator insertDeleteOp = (InsertDeleteOperator) op1;
ILogicalExpression recordExpr = insertDeleteOp.getPayloadExpression().getValue();
LogicalVariable recordVar = null;
List usedRecordVars = new ArrayList<>();
/** assume the payload is always a single variable expression */
recordExpr.getUsedVariables(usedRecordVars);
if (usedRecordVars.size() == 1)
recordVar = usedRecordVars.get(0);
/**
* op2 is the assign operator which extract primary keys from the record
* variable
*/
AbstractLogicalOperator op2 = (AbstractLogicalOperator) op1.getInputs().get(0).getValue();
if (recordVar == null) {
/**
* For the case primary key-assignment expressions are constant
* expressions, find assign op that creates record to be
* inserted/deleted.
*/
while (fid != AsterixBuiltinFunctions.OPEN_RECORD_CONSTRUCTOR) {
if (op2.getInputs().size() == 0) {
return false;
}
op2 = (AbstractLogicalOperator) op2.getInputs().get(0).getValue();
if (op2.getOperatorTag() != LogicalOperatorTag.ASSIGN) {
continue;
}
AssignOperator assignOp = (AssignOperator) op2;
ILogicalExpression assignExpr = assignOp.getExpressions().get(0).getValue();
if (assignExpr.getExpressionTag() == LogicalExpressionTag.FUNCTION_CALL) {
ScalarFunctionCallExpression funcExpr = (ScalarFunctionCallExpression) assignOp.getExpressions()
.get(0).getValue();
fid = funcExpr.getFunctionIdentifier();
}
}
AssignOperator assignOp2 = (AssignOperator) op2;
recordVar = assignOp2.getVariables().get(0);
}
AqlDataSource datasetSource = (AqlDataSource) insertDeleteOp.getDataSource();
AqlMetadataProvider mp = (AqlMetadataProvider) context.getMetadataProvider();
String dataverseName = datasetSource.getId().getDataverseName();
String datasetName = datasetSource.getId().getDatasourceName();
Dataset dataset = mp.findDataset(dataverseName, datasetName);
if (dataset == null) {
throw new AlgebricksException("Unknown dataset " + datasetName + " in dataverse " + dataverseName);
}
if (dataset.getDatasetType() == DatasetType.EXTERNAL) {
return false;
}
// Create operators for secondary index insert/delete.
String itemTypeName = dataset.getItemTypeName();
IAType itemType = mp.findType(dataset.getDataverseName(), itemTypeName);
if (itemType.getTypeTag() != ATypeTag.RECORD) {
throw new AlgebricksException("Only record types can be indexed.");
}
ARecordType recType = (ARecordType) itemType;
List indexes = mp.getDatasetIndexes(dataset.getDataverseName(), dataset.getDatasetName());
ILogicalOperator currentTop = op1;
boolean hasSecondaryIndex = false;
// Put an n-gram or a keyword index in the later stage of index-update,
// since TokenizeOperator needs to be involved.
Collections.sort(indexes, new Comparator() {
@Override
public int compare(Index o1, Index o2) {
return o1.getIndexType().ordinal() - o2.getIndexType().ordinal();
}
});
// Check whether multiple keyword or n-gram indexes exist
int secondaryIndexTotalCnt = 0;
for (Index index : indexes) {
if (index.isSecondaryIndex())
secondaryIndexTotalCnt++;
}
// Initialize inputs to the SINK operator
if (secondaryIndexTotalCnt > 0) {
op0.getInputs().clear();
}
// Prepare filtering field information
List additionalFilteringField = ((InternalDatasetDetails) dataset.getDatasetDetails()).getFilterField();
List additionalFilteringVars = null;
List> additionalFilteringAssignExpressions = null;
List> additionalFilteringExpressions = null;
AssignOperator additionalFilteringAssign = null;
if (additionalFilteringField != null) {
additionalFilteringVars = new ArrayList();
additionalFilteringAssignExpressions = new ArrayList>();
additionalFilteringExpressions = new ArrayList>();
prepareVarAndExpression(additionalFilteringField, recType.getFieldNames(), recordVar,
additionalFilteringAssignExpressions, additionalFilteringVars, context);
additionalFilteringAssign = new AssignOperator(additionalFilteringVars,
additionalFilteringAssignExpressions);
for (LogicalVariable var : additionalFilteringVars) {
additionalFilteringExpressions
.add(new MutableObject(new VariableReferenceExpression(var)));
}
}
LogicalVariable enforcedRecordVar = recordVar;
if (insertDeleteOp.getOperation() == Kind.INSERT) {
try {
DatasetDataSource ds = (DatasetDataSource) (insertDeleteOp.getDataSource());
ARecordType insertRecType = (ARecordType) ds.getSchemaTypes()[ds.getSchemaTypes().length - 1];
LogicalVariable castVar = context.newVar();
ARecordType enforcedType = createEnforcedType(insertRecType, indexes);
if (!enforcedType.equals(insertRecType)) {
//introduce casting to enforced type
AbstractFunctionCallExpression castFunc = new ScalarFunctionCallExpression(
FunctionUtils.getFunctionInfo(AsterixBuiltinFunctions.CAST_RECORD));
castFunc.getArguments().add(
new MutableObject(insertDeleteOp.getPayloadExpression().getValue()));
TypeComputerUtilities.setRequiredAndInputTypes(castFunc, enforcedType, insertRecType);
AssignOperator castedRecordAssignOperator = new AssignOperator(castVar,
new MutableObject(castFunc));
castedRecordAssignOperator.getInputs().add(new MutableObject(currentTop));
currentTop = castedRecordAssignOperator;
enforcedRecordVar = castVar;
recType = enforcedType;
context.computeAndSetTypeEnvironmentForOperator(castedRecordAssignOperator);
}
} catch (AsterixException e) {
throw new AlgebricksException(e);
}
}
Set projectVars = new HashSet();
VariableUtilities.getUsedVariables(op1, projectVars);
if (enforcedRecordVar != null)
projectVars.add(enforcedRecordVar);
ProjectOperator project = new ProjectOperator(new ArrayList(projectVars));
project.getInputs().add(new MutableObject(currentTop));
context.computeAndSetTypeEnvironmentForOperator(project);
currentTop = project;
// Replicate Operator is applied only when doing the bulk-load.
AbstractLogicalOperator replicateOp = null;
if (secondaryIndexTotalCnt > 1 && insertDeleteOp.isBulkload()) {
// Split the logical plan into "each secondary index update branch"
// to replicate each pair.
replicateOp = new ReplicateOperator(secondaryIndexTotalCnt);
replicateOp.getInputs().add(new MutableObject(currentTop));
replicateOp.setExecutionMode(ExecutionMode.PARTITIONED);
context.computeAndSetTypeEnvironmentForOperator(replicateOp);
currentTop = replicateOp;
}
// Iterate each secondary index and applying Index Update operations.
for (Index index : indexes) {
if (!index.isSecondaryIndex()) {
continue;
}
hasSecondaryIndex = true;
List> secondaryKeyFields = index.getKeyFieldNames();
List secondaryKeyTypes = index.getKeyFieldTypes();
List secondaryKeyVars = new ArrayList();
List> expressions = new ArrayList>();
List> secondaryExpressions = new ArrayList>();
for (List secondaryKey : secondaryKeyFields) {
prepareVarAndExpression(secondaryKey, recType.getFieldNames(), enforcedRecordVar, expressions,
secondaryKeyVars, context);
}
AssignOperator assign = new AssignOperator(secondaryKeyVars, expressions);
ILogicalOperator filterOrAssignOp = null;
if (additionalFilteringAssign != null) {
filterOrAssignOp = additionalFilteringAssign;
assign.getInputs().add(new MutableObject(additionalFilteringAssign));
} else {
filterOrAssignOp = assign;
}
// Only apply replicate operator when doing bulk-load
if (secondaryIndexTotalCnt > 1 && insertDeleteOp.isBulkload())
filterOrAssignOp.getInputs().add(new MutableObject(replicateOp));
else
filterOrAssignOp.getInputs().add(new MutableObject(currentTop));
if (additionalFilteringAssign != null) {
context.computeAndSetTypeEnvironmentForOperator(additionalFilteringAssign);
}
context.computeAndSetTypeEnvironmentForOperator(assign);
currentTop = assign;
// BTree, Keyword, or n-gram index case
if (index.getIndexType() == IndexType.BTREE || index.getIndexType() == IndexType.SINGLE_PARTITION_WORD_INVIX
|| index.getIndexType() == IndexType.SINGLE_PARTITION_NGRAM_INVIX
|| index.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX
|| index.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX) {
for (LogicalVariable secondaryKeyVar : secondaryKeyVars) {
secondaryExpressions.add(
new MutableObject(new VariableReferenceExpression(secondaryKeyVar)));
}
Mutable filterExpression = createFilterExpression(secondaryKeyVars,
context.getOutputTypeEnvironment(currentTop), false);
AqlIndex dataSourceIndex = new AqlIndex(index, dataverseName, datasetName, mp);
// Introduce the TokenizeOperator only when doing bulk-load,
// and index type is keyword or n-gram.
if (index.getIndexType() != IndexType.BTREE && insertDeleteOp.isBulkload()) {
// Check whether the index is length-partitioned or not.
// If partitioned, [input variables to TokenizeOperator,
// token, number of token] pairs will be generated and
// fed into the IndexInsertDeleteOperator.
// If not, [input variables, token] pairs will be generated
// and fed into the IndexInsertDeleteOperator.
// Input variables are passed since TokenizeOperator is not an
// filtering operator.
boolean isPartitioned = false;
if (index.getIndexType() == IndexType.LENGTH_PARTITIONED_WORD_INVIX
|| index.getIndexType() == IndexType.LENGTH_PARTITIONED_NGRAM_INVIX)
isPartitioned = true;
// Create a new logical variable - token
List tokenizeKeyVars = new ArrayList();
List> tokenizeKeyExprs = new ArrayList>();
LogicalVariable tokenVar = context.newVar();
tokenizeKeyVars.add(tokenVar);
tokenizeKeyExprs
.add(new MutableObject(new VariableReferenceExpression(tokenVar)));
// Check the field type of the secondary key.
IAType secondaryKeyType = null;
Pair keyPairType = Index.getNonNullableKeyFieldType(secondaryKeyFields.get(0),
recType);
secondaryKeyType = keyPairType.first;
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy