org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-apache Show documentation
Show all versions of hive-apache Show documentation
Shaded version of Apache Hive for Presto
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.physical;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import com.facebook.presto.hive.$internal.org.apache.commons.logging.Log;
import com.facebook.presto.hive.$internal.org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.GroupByOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.lib.RuleRegExp;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.TableScanDesc;
/**
*
* MetadataOnlyOptimizer determines to which TableScanOperators "metadata only"
* optimization can be applied. Such operator must use only partition columns
* (it is easy to check, because we are after column pruning and all places
* where the data from the operator is used must go through GroupByOperator
* distinct or distinct-like aggregations. Aggregation is distinct-like if
* adding distinct wouldn't change the result, for example min, max.
*
* We cannot apply the optimization without group by, because the results depend
* on the numbers of rows in partitions, for example count(hr) will count all
* rows in matching partitions.
*
*/
public class MetadataOnlyOptimizer implements PhysicalPlanResolver {
static final Log LOG = LogFactory.getLog(MetadataOnlyOptimizer.class.getName());
static class WalkerCtx implements NodeProcessorCtx {
/* operators for which there is chance the optimization can be applied */
private final HashSet possible = new HashSet();
/* operators for which the optimization will be successful */
private final HashSet success = new HashSet();
/**
* Sets operator as one for which there is a chance to apply optimization
*
* @param op
* the operator
*/
public void setMayBeMetadataOnly(TableScanOperator op) {
possible.add(op);
}
/** Convert all possible operators to success */
public void convertMetadataOnly() {
success.addAll(possible);
possible.clear();
}
/**
* Convert all possible operators to banned
*/
public void convertNotMetadataOnly() {
possible.clear();
success.clear();
}
/**
* Returns HashSet of collected operators for which the optimization may be
* applicable.
*/
public HashSet getMayBeMetadataOnlyTableScans() {
return possible;
}
/**
* Returns HashSet of collected operators for which the optimization is
* applicable.
*/
public HashSet getMetadataOnlyTableScans() {
return success;
}
}
static private class TableScanProcessor implements NodeProcessor {
public TableScanProcessor() {
}
@Override
public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
TableScanOperator tsOp = (TableScanOperator) nd;
WalkerCtx walkerCtx = (WalkerCtx) procCtx;
List colIDs = tsOp.getNeededColumnIDs();
TableScanDesc desc = tsOp.getConf();
boolean noColNeeded = (colIDs == null) || (colIDs.isEmpty());
boolean noVCneeded = (desc == null) || (desc.getVirtualCols() == null)
|| (desc.getVirtualCols().isEmpty());
if (noColNeeded && noVCneeded) {
walkerCtx.setMayBeMetadataOnly(tsOp);
}
return nd;
}
}
static private class FileSinkProcessor implements NodeProcessor {
@Override
public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx,
Object... nodeOutputs) throws SemanticException {
WalkerCtx walkerCtx = (WalkerCtx) procCtx;
// There can be atmost one element eligible to be converted to
// metadata only
if (walkerCtx.getMayBeMetadataOnlyTableScans().isEmpty()) {
return nd;
}
for (Node op : stack) {
if (op instanceof GroupByOperator) {
GroupByOperator gby = (GroupByOperator) op;
if (!gby.getConf().isDistinctLike()) {
// GroupBy not distinct like, disabling
walkerCtx.convertNotMetadataOnly();
return nd;
}
}
}
walkerCtx.convertMetadataOnly();
return nd;
}
}
@Override
public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException {
Map opRules = new LinkedHashMap();
opRules.put(new RuleRegExp("R1", TableScanOperator.getOperatorName() + "%"),
new TableScanProcessor());
opRules.put(new RuleRegExp("R2",
GroupByOperator.getOperatorName() + "%.*" + FileSinkOperator.getOperatorName() + "%"),
new FileSinkProcessor());
Dispatcher disp = new NullScanTaskDispatcher(pctx, opRules);
GraphWalker ogw = new DefaultGraphWalker(disp);
ArrayList topNodes = new ArrayList();
topNodes.addAll(pctx.getRootTasks());
ogw.startWalking(topNodes, null);
return pctx;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy