org.apache.hadoop.hive.ql.plan.ListBucketingCtx Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.plan;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.ql.exec.ColumnInfo;
import org.apache.hadoop.hive.ql.exec.FileSinkOperator;
import org.apache.hadoop.hive.ql.exec.RowSchema;
import org.apache.hadoop.hive.ql.optimizer.listbucketingpruner.ListBucketingPrunerUtils;
/**
* Context for list bucketing.
* It's created in SemanticAnalyzer.genFileSinkPlan().
* It's used in FileSinkOperator.processOp(), merging files, alter table ...concatenate etc.
*/
public class ListBucketingCtx implements Serializable {
/**
* default serialization ID.
*/
private static final long serialVersionUID = 1L;
private List skewedColNames;
private List> skewedColValues;
private Map, String> lbLocationMap;
private List rowSkewedIndex;
private boolean isStoredAsSubDirectories;
private String defaultKey;
private String defaultDirName;
private List skewedValuesDirNames;
public ListBucketingCtx() {
rowSkewedIndex = new ArrayList();
skewedValuesDirNames = new ArrayList();
}
/**
* @return the skewedColNames
*/
public List getSkewedColNames() {
return skewedColNames;
}
/**
* @param skewedColNames the skewedColNames to set
*/
public void setSkewedColNames(List skewedColNames) {
this.skewedColNames = skewedColNames;
}
/**
* @return the skewedColValues
*/
public List> getSkewedColValues() {
return skewedColValues;
}
/**
* @param skewedColValues the skewedColValues to set
*/
public void setSkewedColValues(List> skewedColValues) {
this.skewedColValues = skewedColValues;
}
/**
* @return the lbLocationMap
*/
public Map, String> getLbLocationMap() {
return lbLocationMap;
}
/**
* @param lbLocationMap the lbLocationMap to set
*/
public void setLbLocationMap(Map, String> lbLocationMap) {
this.lbLocationMap = lbLocationMap;
}
/**
* Match column in skewed column list and record position.
* The position will be used in {@link FileSinkOperator} generateListBucketingDirName().
* Note that skewed column name matches skewed value in order.
*
* @param rowSch
*/
public void processRowSkewedIndex(RowSchema rowSch) {
if ((this.skewedColNames != null) && (this.skewedColNames.size() > 0) && (rowSch != null)
&& (rowSch.getSignature() != null) && (rowSch.getSignature().size() > 0)) {
List cols = rowSch.getSignature();
int hitNo = 0;
for (int i = 0; i < cols.size(); i++) {
int index = this.skewedColNames.indexOf(cols.get(i).getInternalName());
if (index > -1) {
hitNo++;
SkewedColumnPositionPair pair = new SkewedColumnPositionPair(i, index);
rowSkewedIndex.add(pair);
}
}
assert (hitNo == this.skewedColNames.size()) : "RowSchema doesn't have all skewed columns."
+ "Skewed column: " + this.skewedColNames.toString() + ". Rowschema has columns: " + cols;
}
}
/**
* Calculate skewed value subdirectory directory which is used in
* FileSinkOperator.java createKeyForStatsPublisher()
* For example, create table test skewed by (key, value) on (('484','val_484')
* stored as DIRECTORIES;
* after the method, skewedValuesDirNames will contain 2 elements:
* key=484/value=val_484
* HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME/HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME
*/
public void calculateSkewedValueSubDirList() {
if (isSkewedStoredAsDir()) {
for (List value : this.skewedColValues) {
skewedValuesDirNames.add(FileUtils.makeListBucketingDirName(this.skewedColNames, value));
}
// creat default dir
skewedValuesDirNames.add(FileUtils.makeDefaultListBucketingDirName(
this.skewedColNames,
ListBucketingPrunerUtils.HIVE_LIST_BUCKETING_DEFAULT_DIR_NAME));
}
}
/**
* @return the isStoredAsSubDirectories
*/
public boolean isStoredAsSubDirectories() {
return isStoredAsSubDirectories;
}
/**
* @param isStoredAsSubDirectories the isStoredAsSubDirectories to set
*/
public void setStoredAsSubDirectories(boolean isStoredAsSubDirectories) {
this.isStoredAsSubDirectories = isStoredAsSubDirectories;
}
/**
* @return the defaultKey
*/
public String getDefaultKey() {
return defaultKey;
}
/**
* @param defaultKey the defaultKey to set
*/
public void setDefaultKey(String defaultKey) {
this.defaultKey = defaultKey;
}
/**
* @return the defaultDirName
*/
public String getDefaultDirName() {
return defaultDirName;
}
/**
* @param defaultDirName the defaultDirName to set
*/
public void setDefaultDirName(String defaultDirName) {
this.defaultDirName = defaultDirName;
}
/**
* check if list bucketing is enabled.
*
* @param ctx
* @return
*/
public boolean isSkewedStoredAsDir() {
return (this.getSkewedColNames() != null)
&& (this.getSkewedColNames().size() > 0)
&& (this.getSkewedColValues() != null)
&& (this.getSkewedColValues().size() > 0)
&& (this.isStoredAsSubDirectories());
}
/**
* Calculate list bucketing level.
*
* 0: not list bucketing
* int: no. of skewed columns
*
* @param ctx
* @return
*/
public int calculateListBucketingLevel() {
int lbLevel = isSkewedStoredAsDir() ? this.getSkewedColNames().size() : 0;
return lbLevel;
}
/**
* @return the skewedValuesDirNames
*/
public List getSkewedValuesDirNames() {
return skewedValuesDirNames;
}
/**
* @param skewedValuesDirNames the skewedValuesDirNames to set
*/
public void setSkewedValuesDirNames(List skewedValuesDirNames) {
this.skewedValuesDirNames = skewedValuesDirNames;
}
/**
* @return the rowSkewedIndex
*/
public List getRowSkewedIndex() {
return rowSkewedIndex;
}
/**
* @param rowSkewedIndex the rowSkewedIndex to set
*/
public void setRowSkewedIndex(List rowSkewedIndex) {
this.rowSkewedIndex = rowSkewedIndex;
}
}