Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.physical;
import java.io.IOException;
import org.apache.hadoop.hive.common.StringInternUtils;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.io.ZeroRowsInputFormat;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Stack;
import com.facebook.presto.hive.$internal.org.slf4j.Logger;
import com.facebook.presto.hive.$internal.org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.exec.Task;
import org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat;
import org.apache.hadoop.hive.ql.io.NullScanFileSystem;
import org.apache.hadoop.hive.ql.io.OneNullRowInputFormat;
import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher;
import org.apache.hadoop.hive.ql.lib.Dispatcher;
import org.apache.hadoop.hive.ql.lib.GraphWalker;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.PreOrderOnceWalker;
import org.apache.hadoop.hive.ql.lib.Rule;
import org.apache.hadoop.hive.ql.optimizer.physical.MetadataOnlyOptimizer.WalkerCtx;
import org.apache.hadoop.hive.ql.parse.ParseContext;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.PartitionDesc;
import org.apache.hadoop.hive.serde.serdeConstants;
import org.apache.hadoop.hive.serde2.NullStructSerDe;
/**
* Iterate over all tasks one by one and removes all input paths from task if conditions as
* defined in rules match.
*/
public class NullScanTaskDispatcher implements Dispatcher {
static final Logger LOG = LoggerFactory.getLogger(NullScanTaskDispatcher.class.getName());
private final PhysicalContext physicalContext;
private final Map rules;
public NullScanTaskDispatcher(PhysicalContext context, Map rules) {
super();
physicalContext = context;
this.rules = rules;
}
private String getAliasForTableScanOperator(MapWork work,
TableScanOperator tso) {
for (Map.Entry> entry :
work.getAliasToWork().entrySet()) {
if (entry.getValue() == tso) {
return entry.getKey();
}
}
return null;
}
private PartitionDesc changePartitionToMetadataOnly(PartitionDesc desc, Path path) {
if (desc == null) return null;
boolean isEmpty = false;
try {
isEmpty = Utilities.isEmptyPath(physicalContext.getConf(), path);
} catch (IOException e) {
LOG.error("Cannot determine if the table is empty", e);
}
desc.setInputFileFormatClass(
isEmpty ? ZeroRowsInputFormat.class : OneNullRowInputFormat.class);
desc.setOutputFileFormatClass(HiveIgnoreKeyTextOutputFormat.class);
desc.getProperties().setProperty(serdeConstants.SERIALIZATION_LIB,
NullStructSerDe.class.getName());
return desc;
}
private void processAlias(MapWork work, Path path, ArrayList aliasesAffected,
ArrayList aliases) {
// the aliases that are allowed to map to a null scan.
ArrayList allowed = new ArrayList();
for (String alias : aliasesAffected) {
if (aliases.contains(alias)) {
allowed.add(alias);
}
}
if (allowed.size() > 0) {
PartitionDesc partDesc = work.getPathToPartitionInfo().get(path).clone();
PartitionDesc newPartition = changePartitionToMetadataOnly(partDesc, path);
// Prefix partition with something to avoid it being a hidden file.
Path fakePath = new Path(NullScanFileSystem.getBase() + newPartition.getTableName()
+ "/part" + encode(newPartition.getPartSpec()));
StringInternUtils.internUriStringsInPath(fakePath);
work.addPathToPartitionInfo(fakePath, newPartition);
work.addPathToAlias(fakePath, new ArrayList<>(allowed));
aliasesAffected.removeAll(allowed);
if (aliasesAffected.isEmpty()) {
work.removePathToAlias(path);
work.removePathToPartitionInfo(path);
}
}
}
private void processAlias(MapWork work, HashSet tableScans) {
ArrayList aliases = new ArrayList();
for (TableScanOperator tso : tableScans) {
// use LinkedHashMap>
// getAliasToWork()
// should not apply this for non-native table
if (tso.getConf().getTableMetadata().getStorageHandler() != null) {
continue;
}
String alias = getAliasForTableScanOperator(work, tso);
aliases.add(alias);
tso.getConf().setIsMetadataOnly(true);
}
// group path alias according to work
LinkedHashMap> candidates = new LinkedHashMap<>();
for (Path path : work.getPaths()) {
ArrayList aliasesAffected = work.getPathToAliases().get(path);
if (aliasesAffected != null && aliasesAffected.size() > 0) {
candidates.put(path, aliasesAffected);
}
}
for (Entry> entry : candidates.entrySet()) {
processAlias(work, entry.getKey(), entry.getValue(), aliases);
}
}
// considered using URLEncoder, but it seemed too much
private String encode(Map partSpec) {
return partSpec.toString().replaceAll("[{}:/#\\?, ]+", "_");
}
@Override
public Object dispatch(Node nd, Stack stack, Object... nodeOutputs)
throws SemanticException {
Task extends Serializable> task = (Task extends Serializable>) nd;
// create a the context for walking operators
ParseContext parseContext = physicalContext.getParseContext();
WalkerCtx walkerCtx = new WalkerCtx();
List mapWorks = new ArrayList(task.getMapWork());
Collections.sort(mapWorks, new Comparator() {
@Override
public int compare(MapWork o1, MapWork o2) {
return o1.getName().compareTo(o2.getName());
}
});
for (MapWork mapWork : mapWorks) {
LOG.debug("Looking at: "+mapWork.getName());
Collection> topOperators
= mapWork.getAliasToWork().values();
if (topOperators.size() == 0) {
LOG.debug("No top operators");
return null;
}
LOG.debug("Looking for table scans where optimization is applicable");
// The dispatcher fires the processor corresponding to the closest
// matching rule and passes the context along
Dispatcher disp = new DefaultRuleDispatcher(null, rules, walkerCtx);
GraphWalker ogw = new PreOrderOnceWalker(disp);
// Create a list of topOp nodes
ArrayList topNodes = new ArrayList();
// Get the top Nodes for this task
for (Operator extends OperatorDesc>
workOperator : topOperators) {
if (parseContext.getTopOps().values().contains(workOperator)) {
topNodes.add(workOperator);
}
}
Operator extends OperatorDesc> reducer = task.getReducer(mapWork);
if (reducer != null) {
topNodes.add(reducer);
}
ogw.startWalking(topNodes, null);
LOG.debug(String.format("Found %d null table scans",
walkerCtx.getMetadataOnlyTableScans().size()));
if (walkerCtx.getMetadataOnlyTableScans().size() > 0)
processAlias(mapWork, walkerCtx.getMetadataOnlyTableScans());
}
return null;
}
}