org.apache.hadoop.hive.ql.plan.MapJoinDesc Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.plan;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
/**
* Map Join operator Descriptor implementation.
*
*/
@Explain(displayName = "Map Join Operator")
public class MapJoinDesc extends JoinDesc implements Serializable {
private static final long serialVersionUID = 1L;
private Map> keys;
private TableDesc keyTblDesc;
private List valueTblDescs;
private List valueFilteredTblDescs;
private int posBigTable;
private Map valueIndices;
private Map> retainList;
private transient String bigTableAlias;
// for tez. used to remember which position maps to which logical input
// TODO: should these rather be arrays?
private Map parentToInput = new HashMap();
private Map parentKeyCounts = new HashMap();
// for tez. used to remember which type of a Bucket Map Join this is.
private boolean customBucketMapJoin;
// table alias (small) --> input file name (big) --> target file names (small)
private Map>> aliasBucketFileNameMapping;
private Map bigTableBucketNumMapping;
private Map> bigTablePartSpecToFileMapping;
//map join dump file name
private String dumpFilePrefix;
// flag for bucket map join. One usage is to set BucketizedHiveInputFormat
private boolean isBucketMapJoin;
// Hash table memory usage allowed; used in case of non-staged mapjoin.
private float hashtableMemoryUsage;
protected boolean genJoinKeys = true;
public MapJoinDesc() {
bigTableBucketNumMapping = new LinkedHashMap();
}
public MapJoinDesc(MapJoinDesc clone) {
super(clone);
this.keys = clone.keys;
this.keyTblDesc = clone.keyTblDesc;
this.valueTblDescs = clone.valueTblDescs;
this.posBigTable = clone.posBigTable;
this.valueIndices = clone.valueIndices;
this.retainList = clone.retainList;
this.bigTableAlias = clone.bigTableAlias;
this.aliasBucketFileNameMapping = clone.aliasBucketFileNameMapping;
this.bigTableBucketNumMapping = clone.bigTableBucketNumMapping;
this.bigTablePartSpecToFileMapping = clone.bigTablePartSpecToFileMapping;
this.dumpFilePrefix = clone.dumpFilePrefix;
this.parentToInput = clone.parentToInput;
this.parentKeyCounts = clone.parentKeyCounts;
this.customBucketMapJoin = clone.customBucketMapJoin;
}
public MapJoinDesc(final Map> keys,
final TableDesc keyTblDesc, final Map> values,
final List valueTblDescs,final List valueFilteredTblDescs, List outputColumnNames,
final int posBigTable, final JoinCondDesc[] conds,
final Map> filters, boolean noOuterJoin, String dumpFilePrefix) {
super(values, outputColumnNames, noOuterJoin, conds, filters);
this.keys = keys;
this.keyTblDesc = keyTblDesc;
this.valueTblDescs = valueTblDescs;
this.valueFilteredTblDescs = valueFilteredTblDescs;
this.posBigTable = posBigTable;
this.bigTableBucketNumMapping = new LinkedHashMap();
this.dumpFilePrefix = dumpFilePrefix;
initRetainExprList();
}
private void initRetainExprList() {
retainList = new HashMap>();
Set>> set = super.getExprs().entrySet();
Iterator>> setIter = set.iterator();
while (setIter.hasNext()) {
Entry> current = setIter.next();
List list = new ArrayList();
for (int i = 0; i < current.getValue().size(); i++) {
list.add(i);
}
retainList.put(current.getKey(), list);
}
}
@Explain(displayName = "input vertices")
public Map getParentToInput() {
return parentToInput;
}
public void setParentToInput(Map parentToInput) {
this.parentToInput = parentToInput;
}
public Map getParentKeyCounts() {
return parentKeyCounts;
}
@Explain(displayName = "Estimated key counts", normalExplain = false)
public String getKeyCountsExplainDesc() {
StringBuilder result = null;
for (Map.Entry entry : parentKeyCounts.entrySet()) {
if (result == null) {
result = new StringBuilder();
} else {
result.append(", ");
}
result.append(parentToInput.get(entry.getKey())).append(" => ").append(entry.getValue());
}
return result == null ? null : result.toString();
}
public void setParentKeyCount(Map parentKeyCounts) {
this.parentKeyCounts = parentKeyCounts;
}
public Map getValueIndices() {
return valueIndices;
}
public void setValueIndices(Map valueIndices) {
this.valueIndices = valueIndices;
}
public int[] getValueIndex(byte alias) {
return valueIndices == null ? null : valueIndices.get(alias);
}
public Map> getRetainList() {
return retainList;
}
public void setRetainList(Map> retainList) {
this.retainList = retainList;
}
/**
* @return the dumpFilePrefix
*/
public String getDumpFilePrefix() {
return dumpFilePrefix;
}
/**
* @param dumpFilePrefix
* the dumpFilePrefix to set
*/
public void setDumpFilePrefix(String dumpFilePrefix) {
this.dumpFilePrefix = dumpFilePrefix;
}
/**
* @return the keys in string form
*/
@Explain(displayName = "keys")
public Map getKeysString() {
Map keyMap = new LinkedHashMap();
for (Map.Entry> k: getKeys().entrySet()) {
keyMap.put(k.getKey(), PlanUtils.getExprListString(k.getValue()));
}
return keyMap;
}
/**
* @return the keys
*/
public Map> getKeys() {
return keys;
}
/**
* @param keys
* the keys to set
*/
public void setKeys(Map> keys) {
this.keys = keys;
}
/**
* @return the position of the big table not in memory
*/
@Explain(displayName = "Position of Big Table", normalExplain = false)
public int getPosBigTable() {
return posBigTable;
}
/**
* @param posBigTable
* the position of the big table not in memory
*/
public void setPosBigTable(int posBigTable) {
this.posBigTable = posBigTable;
}
/**
* @return the keyTblDesc
*/
public TableDesc getKeyTblDesc() {
return keyTblDesc;
}
/**
* @param keyTblDesc
* the keyTblDesc to set
*/
public void setKeyTblDesc(TableDesc keyTblDesc) {
this.keyTblDesc = keyTblDesc;
}
public List getValueFilteredTblDescs() {
return valueFilteredTblDescs;
}
public void setValueFilteredTblDescs(List valueFilteredTblDescs) {
this.valueFilteredTblDescs = valueFilteredTblDescs;
}
/**
* @return the valueTblDescs
*/
public List getValueTblDescs() {
return valueTblDescs;
}
/**
* @param valueTblDescs
* the valueTblDescs to set
*/
public void setValueTblDescs(List valueTblDescs) {
this.valueTblDescs = valueTblDescs;
}
/**
* @return bigTableAlias
*/
public String getBigTableAlias() {
return bigTableAlias;
}
/**
* @param bigTableAlias
*/
public void setBigTableAlias(String bigTableAlias) {
this.bigTableAlias = bigTableAlias;
}
public Map>> getAliasBucketFileNameMapping() {
return aliasBucketFileNameMapping;
}
public void setAliasBucketFileNameMapping(
Map>> aliasBucketFileNameMapping) {
this.aliasBucketFileNameMapping = aliasBucketFileNameMapping;
}
public Map getBigTableBucketNumMapping() {
return bigTableBucketNumMapping;
}
public void setBigTableBucketNumMapping(Map bigTableBucketNumMapping) {
this.bigTableBucketNumMapping = bigTableBucketNumMapping;
}
public Map> getBigTablePartSpecToFileMapping() {
return bigTablePartSpecToFileMapping;
}
public void setBigTablePartSpecToFileMapping(Map> partToFileMapping) {
this.bigTablePartSpecToFileMapping = partToFileMapping;
}
@Explain(displayName = "BucketMapJoin", normalExplain = false, displayOnlyOnTrue = true)
public boolean isBucketMapJoin() {
return isBucketMapJoin;
}
public void setBucketMapJoin(boolean isBucketMapJoin) {
this.isBucketMapJoin = isBucketMapJoin;
}
public void setHashTableMemoryUsage(float hashtableMemoryUsage) {
this.hashtableMemoryUsage = hashtableMemoryUsage;
}
public float getHashTableMemoryUsage() {
return hashtableMemoryUsage;
}
public void setCustomBucketMapJoin(boolean customBucketMapJoin) {
this.customBucketMapJoin = customBucketMapJoin;
}
public boolean getCustomBucketMapJoin() {
return this.customBucketMapJoin;
}
public boolean isMapSideJoin() {
return true;
}
public void setGenJoinKeys(boolean genJoinKeys) {
this.genJoinKeys = genJoinKeys;
}
public boolean getGenJoinKeys() {
return genJoinKeys;
}
}