Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.optimizer.spark;
import java.util.List;
import java.util.Map;
import java.util.Stack;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.SMBMapJoinOperator;
import org.apache.hadoop.hive.ql.exec.TableScanOperator;
import org.apache.hadoop.hive.ql.lib.Node;
import org.apache.hadoop.hive.ql.lib.NodeProcessor;
import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx;
import org.apache.hadoop.hive.ql.optimizer.GenMapRedUtils;
import org.apache.hadoop.hive.ql.parse.SemanticException;
import org.apache.hadoop.hive.ql.parse.spark.GenSparkProcContext;
import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext;
import org.apache.hadoop.hive.ql.plan.MapWork;
import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
/**
* Operator factory for Spark SMBJoin processing.
*/
public final class SparkSortMergeJoinFactory {
private SparkSortMergeJoinFactory() {
// prevent instantiation
}
/**
* Annotate MapWork, input is a SMBJoinOp that is part of a MapWork, and its root TS operator.
*
* 1. Initializes the MapWork's aliasToWork, pointing to big-table's TS.
* 2. Adds the bucketing information to the MapWork.
* 3. Adds localwork to the MapWork, with localWork's aliasToWork pointing to small-table's TS.
* @param context proc walker context
* @param mapWork mapwork to annotate
* @param smbMapJoinOp SMB Map Join operator to get data
* @param ts Table Scan operator to get data
* @param local Whether ts is from a 'local' source (small-table that will be loaded by SMBJoin 'local' task)
*/
public static void annotateMapWork(GenSparkProcContext context, MapWork mapWork,
SMBMapJoinOperator smbMapJoinOp, TableScanOperator ts, boolean local)
throws SemanticException {
initSMBJoinPlan(context, mapWork, ts, local);
setupBucketMapJoinInfo(mapWork, smbMapJoinOp);
}
private static void setupBucketMapJoinInfo(MapWork plan, SMBMapJoinOperator currMapJoinOp) {
if (currMapJoinOp != null) {
Map>> aliasBucketFileNameMapping =
currMapJoinOp.getConf().getAliasBucketFileNameMapping();
if (aliasBucketFileNameMapping != null) {
MapredLocalWork localPlan = plan.getMapRedLocalWork();
if (localPlan == null) {
localPlan = currMapJoinOp.getConf().getLocalWork();
} else {
// local plan is not null, we want to merge it into SMBMapJoinOperator's local work
MapredLocalWork smbLocalWork = currMapJoinOp.getConf().getLocalWork();
if (smbLocalWork != null) {
localPlan.getAliasToFetchWork().putAll(smbLocalWork.getAliasToFetchWork());
localPlan.getAliasToWork().putAll(smbLocalWork.getAliasToWork());
}
}
if (localPlan == null) {
return;
}
plan.setMapRedLocalWork(null);
currMapJoinOp.getConf().setLocalWork(localPlan);
BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext();
localPlan.setBucketMapjoinContext(bucketMJCxt);
bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping);
bucketMJCxt.setBucketFileNameMapping(
currMapJoinOp.getConf().getBigTableBucketNumMapping());
localPlan.setInputFileChangeSensitive(true);
bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias());
bucketMJCxt
.setBucketMatcherClass(org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class);
bucketMJCxt.setBigTablePartSpecToFileMapping(
currMapJoinOp.getConf().getBigTablePartSpecToFileMapping());
plan.setUseBucketizedHiveInputFormat(true);
}
}
}
private static void initSMBJoinPlan(GenSparkProcContext opProcCtx,
MapWork mapWork, TableScanOperator currentRootOperator, boolean local)
throws SemanticException {
String currAliasId = findAliasId(opProcCtx, currentRootOperator);
GenMapRedUtils.setMapWork(mapWork, opProcCtx.parseContext,
opProcCtx.inputs, null, currentRootOperator, currAliasId, opProcCtx.conf, local);
}
private static String findAliasId(GenSparkProcContext opProcCtx, TableScanOperator ts) {
for (String alias : opProcCtx.topOps.keySet()) {
if (opProcCtx.topOps.get(alias) == ts) {
return alias;
}
}
return null;
}
}