Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec;
import static org.apache.hadoop.hive.ql.optimizer.SortedDynPartitionOptimizer.BUCKET_NUMBER_COL_NAME;
import static org.apache.hadoop.hive.ql.plan.ReduceSinkDesc.ReducerTraits.UNIFORM;
import java.io.IOException;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Random;
import java.util.function.BiFunction;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.CompilationOpContext;
import org.apache.hadoop.hive.ql.io.AcidUtils;
import org.apache.hadoop.hive.ql.io.HiveKey;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.ExprNodeConstantDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDesc;
import org.apache.hadoop.hive.ql.plan.ExprNodeDescUtils;
import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc;
import org.apache.hadoop.hive.ql.plan.TableDesc;
import org.apache.hadoop.hive.ql.plan.api.OperatorType;
import org.apache.hadoop.hive.serde2.ByteStream;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.Serializer;
import org.apache.hadoop.hive.serde2.binarysortable.BinarySortableSerDe;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.StandardUnionObjectInspector.StandardUnion;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.UnionObjectInspector;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hive.common.util.Murmur3;
/**
* Reduce Sink Operator sends output to the reduce stage.
**/
public class ReduceSinkOperator extends TerminalOperator
implements Serializable, TopNHash.BinaryCollector {
private static final long serialVersionUID = 1L;
private transient ObjectInspector[] partitionObjectInspectors;
private transient ObjectInspector[] bucketObjectInspectors;
private transient int buckColIdxInKey;
/**
* {@link org.apache.hadoop.hive.ql.optimizer.SortedDynPartitionOptimizer}
*/
private transient int buckColIdxInKeyForSdpo = -1;
private boolean firstRow;
private transient int tag;
private boolean skipTag = false;
private transient int[] valueIndex; // index for value(+ from keys, - from values)
protected transient OutputCollector out;
/**
* The evaluators for the key columns. Key columns decide the sort order on
* the reducer side. Key columns are passed to the reducer in the "key".
*/
protected transient ExprNodeEvaluator[] keyEval;
/**
* The evaluators for the value columns. Value columns are passed to reducer
* in the "value".
*/
protected transient ExprNodeEvaluator[] valueEval;
/**
* The evaluators for the partition columns (CLUSTER BY or DISTRIBUTE BY in
* Hive language). Partition columns decide the reducer that the current row
* goes to. Partition columns are not passed to reducer.
*/
protected transient ExprNodeEvaluator[] partitionEval;
/**
* Evaluators for bucketing columns. This is used to compute bucket number.
*/
protected transient ExprNodeEvaluator[] bucketEval = null;
// TODO: we use MetadataTypedColumnsetSerDe for now, till DynamicSerDe is ready
protected transient Serializer keySerializer;
protected transient boolean keyIsText;
protected transient Serializer valueSerializer;
protected transient byte[] tagByte = new byte[1];
protected transient int numDistributionKeys;
protected transient int numDistinctExprs;
protected transient String[] inputAliases; // input aliases of this RS for join (used for PPD)
protected transient boolean useUniformHash = false;
// picks topN K:V pairs from input.
protected transient TopNHash reducerHash;
protected transient HiveKey keyWritable = new HiveKey();
protected transient ObjectInspector keyObjectInspector;
protected transient ObjectInspector valueObjectInspector;
protected transient Object[] cachedValues;
protected transient List> distinctColIndices;
protected transient Random random;
protected transient BiFunction