org.apache.hadoop.hive.ql.plan.ReduceSinkDesc Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-exec Show documentation
Show all versions of hive-exec Show documentation
Hive is a data warehouse infrastructure built on top of Hadoop see
http://wiki.apache.org/hadoop/Hive
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.plan;
import java.util.ArrayList;
import java.util.List;
/**
* ReduceSinkDesc.
*
*/
@Explain(displayName = "Reduce Output Operator")
public class ReduceSinkDesc extends AbstractOperatorDesc {
private static final long serialVersionUID = 1L;
/**
* Key columns are passed to reducer in the "key".
*/
private java.util.ArrayList keyCols;
private java.util.ArrayList outputKeyColumnNames;
private List> distinctColumnIndices;
/**
* Value columns are passed to reducer in the "value".
*/
private java.util.ArrayList valueCols;
private java.util.ArrayList outputValueColumnNames;
/**
* Describe how to serialize the key.
*/
private TableDesc keySerializeInfo;
/**
* Describe how to serialize the value.
*/
private TableDesc valueSerializeInfo;
/**
* The tag for this reducesink descriptor.
*/
private int tag;
/**
* Number of distribution keys.
*/
private int numDistributionKeys;
/**
* Whether optimized for skew data
*/
private boolean optimizeSkew;
/**
* The partition columns (CLUSTER BY or DISTRIBUTE BY in Hive language).
* Partition columns decide the reducer that the current row goes to.
* Partition columns are not passed to reducer.
*/
private java.util.ArrayList partitionCols;
private int numReducers;
public ReduceSinkDesc() {
}
public ReduceSinkDesc(java.util.ArrayList keyCols,
int numDistributionKeys,
java.util.ArrayList valueCols,
java.util.ArrayList outputKeyColumnNames,
List> distinctColumnIndices,
java.util.ArrayList outputValueColumnNames, int tag,
java.util.ArrayList partitionCols, int numReducers,
final TableDesc keySerializeInfo, final TableDesc valueSerializeInfo,
boolean optimizeSkew) {
this.keyCols = keyCols;
this.numDistributionKeys = numDistributionKeys;
this.valueCols = valueCols;
this.outputKeyColumnNames = outputKeyColumnNames;
this.outputValueColumnNames = outputValueColumnNames;
this.tag = tag;
this.numReducers = numReducers;
this.partitionCols = partitionCols;
this.keySerializeInfo = keySerializeInfo;
this.valueSerializeInfo = valueSerializeInfo;
this.distinctColumnIndices = distinctColumnIndices;
this.optimizeSkew = optimizeSkew;
}
@Override
public Object clone() {
ReduceSinkDesc desc = new ReduceSinkDesc();
desc.setKeyCols((ArrayList) getKeyCols().clone());
desc.setValueCols((ArrayList) getValueCols().clone());
desc.setOutputKeyColumnNames((ArrayList) getOutputKeyColumnNames().clone());
List> distinctColumnIndicesClone = new ArrayList>();
for (List distinctColumnIndex : getDistinctColumnIndices()) {
List tmp = new ArrayList();
tmp.addAll(distinctColumnIndex);
distinctColumnIndicesClone.add(tmp);
}
desc.setDistinctColumnIndices(distinctColumnIndicesClone);
desc.setOutputValueColumnNames((ArrayList) getOutputValueColumnNames().clone());
desc.setNumDistributionKeys(getNumDistributionKeys());
desc.setTag(getTag());
desc.setNumReducers(getNumReducers());
desc.setPartitionCols((ArrayList) getPartitionCols().clone());
desc.setKeySerializeInfo((TableDesc) getKeySerializeInfo().clone());
desc.setValueSerializeInfo((TableDesc) getValueSerializeInfo().clone());
return desc;
}
public java.util.ArrayList getOutputKeyColumnNames() {
return outputKeyColumnNames;
}
public void setOutputKeyColumnNames(
java.util.ArrayList outputKeyColumnNames) {
this.outputKeyColumnNames = outputKeyColumnNames;
}
public java.util.ArrayList getOutputValueColumnNames() {
return outputValueColumnNames;
}
public void setOutputValueColumnNames(
java.util.ArrayList outputValueColumnNames) {
this.outputValueColumnNames = outputValueColumnNames;
}
@Explain(displayName = "key expressions")
public java.util.ArrayList getKeyCols() {
return keyCols;
}
public void setKeyCols(final java.util.ArrayList keyCols) {
this.keyCols = keyCols;
}
public int getNumDistributionKeys() {
return this.numDistributionKeys;
}
public void setNumDistributionKeys(int numKeys) {
this.numDistributionKeys = numKeys;
}
@Explain(displayName = "value expressions")
public java.util.ArrayList getValueCols() {
return valueCols;
}
public void setValueCols(final java.util.ArrayList valueCols) {
this.valueCols = valueCols;
}
@Explain(displayName = "Map-reduce partition columns")
public java.util.ArrayList getPartitionCols() {
return partitionCols;
}
public void setPartitionCols(
final java.util.ArrayList partitionCols) {
this.partitionCols = partitionCols;
}
@Explain(displayName = "tag")
public int getTag() {
return tag;
}
public void setTag(int tag) {
this.tag = tag;
}
/**
* Returns the number of reducers for the map-reduce job. -1 means to decide
* the number of reducers at runtime. This enables Hive to estimate the number
* of reducers based on the map-reduce input data size, which is only
* available right before we start the map-reduce job.
*/
public int getNumReducers() {
return numReducers;
}
public void setNumReducers(int numReducers) {
this.numReducers = numReducers;
}
public TableDesc getKeySerializeInfo() {
return keySerializeInfo;
}
public void setKeySerializeInfo(TableDesc keySerializeInfo) {
this.keySerializeInfo = keySerializeInfo;
}
public TableDesc getValueSerializeInfo() {
return valueSerializeInfo;
}
public void setValueSerializeInfo(TableDesc valueSerializeInfo) {
this.valueSerializeInfo = valueSerializeInfo;
}
/**
* Returns the sort order of the key columns.
*
* @return null, which means ascending order for all key columns, or a String
* of the same length as key columns, that consists of only "+"
* (ascending order) and "-" (descending order).
*/
@Explain(displayName = "sort order")
public String getOrder() {
return keySerializeInfo.getProperties().getProperty(
org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_SORT_ORDER);
}
public void setOrder(String orderStr) {
keySerializeInfo.getProperties().setProperty(
org.apache.hadoop.hive.serde.serdeConstants.SERIALIZATION_SORT_ORDER,
orderStr);
}
public List> getDistinctColumnIndices() {
return distinctColumnIndices;
}
public void setDistinctColumnIndices(
List> distinctColumnIndices) {
this.distinctColumnIndices = distinctColumnIndices;
}
public boolean isOptimizeSkew() {
return optimizeSkew;
}
public void setOptimizeSkew(boolean optimizeSkew) {
this.optimizeSkew = optimizeSkew;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy