org.apache.hadoop.hive.ql.plan.BaseWork Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-apache Show documentation
Show all versions of hive-apache Show documentation
Shaded version of Apache Hive for Presto
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.plan;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import io.prestosql.hive.$internal.org.slf4j.Logger;
import io.prestosql.hive.$internal.org.slf4j.LoggerFactory;
import org.apache.hadoop.hive.common.type.DataTypePhysicalVariation;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.HashTableDummyOperator;
import org.apache.hadoop.hive.ql.exec.Operator;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx;
import org.apache.hadoop.hive.ql.exec.vector.VectorizedSupport.Support;
import org.apache.hadoop.hive.ql.parse.RuntimeValuesInfo;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.hive.ql.metadata.VirtualColumn;
import org.apache.hadoop.hive.ql.optimizer.physical.VectorizerReason;
import org.apache.hadoop.hive.ql.plan.Explain.Level;
import org.apache.hadoop.hive.ql.plan.Explain.Vectorization;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
/**
* BaseWork. Base class for any "work" that's being done on the cluster. Items like stats
* gathering that are commonly used regardless of the type of work live here.
*/
@SuppressWarnings({"serial"})
public abstract class BaseWork extends AbstractOperatorDesc {
protected static final Logger LOG = LoggerFactory.getLogger(BaseWork.class);
// dummyOps is a reference to all the HashTableDummy operators in the
// plan. These have to be separately initialized when we setup a task.
// Their function is mainly as root ops to give the mapjoin the correct
// schema info.
List dummyOps;
int tag = 0;
private final List sortColNames = new ArrayList();
private MapredLocalWork mrLocalWork;
public BaseWork() {}
public BaseWork(String name) {
setName(name);
}
private boolean gatheringStats;
private String name;
/*
* Vectorization.
*/
// This will be true if a node was examined by the Vectorizer class.
protected boolean vectorizationExamined;
protected boolean vectorizationEnabled;
protected VectorizedRowBatchCtx vectorizedRowBatchCtx;
protected boolean useVectorizedInputFileFormat;
protected Set inputFormatSupportSet;
protected Set supportSetInUse;
protected List supportRemovedReasons;
private transient VectorizerReason notVectorizedReason;
private boolean groupByVectorOutput;
private boolean allNative;
private boolean usesVectorUDFAdaptor;
protected long vectorizedVertexNum;
protected int vectorizedTestingReducerBatchSize;
private boolean isTestForcedVectorizationEnable;
private boolean isTestVectorizationSuppressExplainExecutionMode;
protected boolean llapMode = false;
protected boolean uberMode = false;
private int reservedMemoryMB = -1; // default to -1 means we leave it up to Tez to decide
// Used for value registry
private Map inputSourceToRuntimeValuesInfo =
new HashMap();
public void setGatheringStats(boolean gatherStats) {
this.gatheringStats = gatherStats;
}
public boolean isGatheringStats() {
return this.gatheringStats;
}
public String getName() {
return name;
}
public void setName(String name) {
this.name = name;
}
public List getDummyOps() {
return dummyOps;
}
public void setDummyOps(List dummyOps) {
if (this.dummyOps != null && !this.dummyOps.isEmpty()
&& (dummyOps == null || dummyOps.isEmpty())) {
LOG.info("Removing dummy operators from " + name + " " + this.getClass().getSimpleName());
}
this.dummyOps = dummyOps;
}
public void addDummyOp(HashTableDummyOperator dummyOp) {
if (dummyOps == null) {
dummyOps = new LinkedList();
}
dummyOps.add(dummyOp);
}
public abstract void replaceRoots(Map, Operator>> replacementMap);
public abstract Set> getAllRootOperators();
public abstract Operator extends OperatorDesc> getAnyRootOperator();
public Set> getAllOperators() {
Set> returnSet = new LinkedHashSet>();
Set> opSet = getAllRootOperators();
Stack> opStack = new Stack>();
// add all children
opStack.addAll(opSet);
while(!opStack.empty()) {
Operator> op = opStack.pop();
returnSet.add(op);
if (op.getChildOperators() != null) {
opStack.addAll(op.getChildOperators());
}
}
return returnSet;
}
/**
* Returns a set containing all leaf operators from the operator tree in this work.
* @return a set containing all leaf operators in this operator tree.
*/
public Set> getAllLeafOperators() {
Set> returnSet = new LinkedHashSet>();
Set> opSet = getAllRootOperators();
Stack> opStack = new Stack>();
// add all children
opStack.addAll(opSet);
while (!opStack.empty()) {
Operator> op = opStack.pop();
if (op.getNumChild() == 0) {
returnSet.add(op);
}
if (op.getChildOperators() != null) {
opStack.addAll(op.getChildOperators());
}
}
return returnSet;
}
public void setVectorizedVertexNum(long vectorizedVertexNum) {
this.vectorizedVertexNum = vectorizedVertexNum;
}
public long getVectorizedVertexNum() {
return vectorizedVertexNum;
}
public void setVectorizedTestingReducerBatchSize(int vectorizedTestingReducerBatchSize) {
this.vectorizedTestingReducerBatchSize = vectorizedTestingReducerBatchSize;
}
public int getVectorizedTestingReducerBatchSize() {
return vectorizedTestingReducerBatchSize;
}
// -----------------------------------------------------------------------------------------------
public void setVectorizationExamined(boolean vectorizationExamined) {
this.vectorizationExamined = vectorizationExamined;
}
public boolean getVectorizationExamined() {
return vectorizationExamined;
}
public void setVectorizationEnabled(boolean vectorizationEnabled) {
this.vectorizationEnabled = vectorizationEnabled;
}
public boolean getVectorizationEnabled() {
return vectorizationEnabled;
}
/*
* The vectorization context for creating the VectorizedRowBatch for the node.
*/
public VectorizedRowBatchCtx getVectorizedRowBatchCtx() {
return vectorizedRowBatchCtx;
}
public void setVectorizedRowBatchCtx(VectorizedRowBatchCtx vectorizedRowBatchCtx) {
this.vectorizedRowBatchCtx = vectorizedRowBatchCtx;
}
public void setNotVectorizedReason(VectorizerReason notVectorizedReason) {
this.notVectorizedReason = notVectorizedReason;
}
public VectorizerReason getNotVectorizedReason() {
return notVectorizedReason;
}
public void setUsesVectorUDFAdaptor(boolean usesVectorUDFAdaptor) {
this.usesVectorUDFAdaptor = usesVectorUDFAdaptor;
}
public boolean getUsesVectorUDFAdaptor() {
return usesVectorUDFAdaptor;
}
public void setAllNative(boolean allNative) {
this.allNative = allNative;
}
public boolean getAllNative() {
return allNative;
}
public void setIsTestForcedVectorizationEnable(boolean isTestForcedVectorizationEnable) {
this.isTestForcedVectorizationEnable = isTestForcedVectorizationEnable;
}
public boolean getIsTestForcedVectorizationEnable() {
return isTestForcedVectorizationEnable;
}
public void setIsTestVectorizationSuppressExplainExecutionMode(
boolean isTestVectorizationSuppressExplainExecutionMode) {
this.isTestVectorizationSuppressExplainExecutionMode =
isTestVectorizationSuppressExplainExecutionMode;
}
public boolean getIsTestVectorizationSuppressExplainExecutionMode() {
return isTestVectorizationSuppressExplainExecutionMode;
}
public static class BaseExplainVectorization {
private final BaseWork baseWork;
public BaseExplainVectorization(BaseWork baseWork) {
this.baseWork = baseWork;
}
public static List getColumnAndTypes(
int[] projectionColumns,
String[] columnNames, TypeInfo[] typeInfos,
DataTypePhysicalVariation[] dataTypePhysicalVariations) {
final int size = columnNames.length;
List result = new ArrayList(size);
for (int i = 0; i < size; i++) {
String displayString = projectionColumns[i] + ":" + columnNames[i] + ":" + typeInfos[i];
if (dataTypePhysicalVariations != null &&
dataTypePhysicalVariations[i] != DataTypePhysicalVariation.NONE) {
displayString += "/" + dataTypePhysicalVariations[i].toString();
}
result.add(displayString);
}
return result;
}
@Explain(vectorization = Vectorization.SUMMARY, displayName = "enabled",
explainLevels = { Level.DEFAULT, Level.EXTENDED })
public boolean enabled() {
return baseWork.getVectorizationEnabled();
}
@Explain(vectorization = Vectorization.SUMMARY, displayName = "vectorized",
explainLevels = { Level.DEFAULT, Level.EXTENDED })
public Boolean vectorized() {
if (!baseWork.getVectorizationEnabled()) {
return null;
}
return baseWork.getVectorMode();
}
@Explain(vectorization = Vectorization.SUMMARY, displayName = "notVectorizedReason",
explainLevels = { Level.DEFAULT, Level.EXTENDED })
public String notVectorizedReason() {
if (!baseWork.getVectorizationEnabled() || baseWork.getVectorMode()) {
return null;
}
VectorizerReason notVectorizedReason = baseWork.getNotVectorizedReason();
if (notVectorizedReason == null) {
return "Unknown";
}
return notVectorizedReason.toString();
}
@Explain(vectorization = Vectorization.SUMMARY, displayName = "allNative",
explainLevels = { Level.DEFAULT, Level.EXTENDED })
public Boolean nativeVectorized() {
if (!baseWork.getVectorMode()) {
return null;
}
return baseWork.getAllNative();
}
public static List getColumns(VectorizedRowBatchCtx vectorizedRowBatchCtx,
int startIndex, int count) {
String[] rowColumnNames = vectorizedRowBatchCtx.getRowColumnNames();
TypeInfo[] rowColumnTypeInfos = vectorizedRowBatchCtx.getRowColumnTypeInfos();
DataTypePhysicalVariation[] dataTypePhysicalVariations =
vectorizedRowBatchCtx.getRowdataTypePhysicalVariations();
List result = new ArrayList(count);
final int end = startIndex + count;
for (int i = startIndex; i < end; i++) {
String displayString = rowColumnNames[i] + ":" + rowColumnTypeInfos[i];
if (dataTypePhysicalVariations != null &&
dataTypePhysicalVariations[i] != DataTypePhysicalVariation.NONE) {
displayString += "/" + dataTypePhysicalVariations[i].toString();
}
result.add(displayString);
}
return result;
}
public static String getScratchColumns(VectorizedRowBatchCtx vectorizedRowBatchCtx) {
String[] scratchColumnTypeNames = vectorizedRowBatchCtx.getScratchColumnTypeNames();
DataTypePhysicalVariation[] scratchDataTypePhysicalVariations =
vectorizedRowBatchCtx.getScratchDataTypePhysicalVariations();
final int size = scratchColumnTypeNames.length;
List result = new ArrayList(size);
for (int i = 0; i < size; i++) {
String displayString = scratchColumnTypeNames[i];
if (scratchDataTypePhysicalVariations != null &&
scratchDataTypePhysicalVariations[i] != DataTypePhysicalVariation.NONE) {
displayString += "/" + scratchDataTypePhysicalVariations[i].toString();
}
result.add(displayString);
}
return result.toString();
}
@Explain(vectorization = Vectorization.SUMMARY, displayName = "usesVectorUDFAdaptor",
explainLevels = { Level.DEFAULT, Level.EXTENDED })
public Boolean usesVectorUDFAdaptor() {
if (!baseWork.getVectorMode()) {
return null;
}
return baseWork.getUsesVectorUDFAdaptor();
}
public static class RowBatchContextExplainVectorization {
private final VectorizedRowBatchCtx vectorizedRowBatchCtx;
public RowBatchContextExplainVectorization(VectorizedRowBatchCtx vectorizedRowBatchCtx) {
this.vectorizedRowBatchCtx = vectorizedRowBatchCtx;
}
@Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumns",
explainLevels = { Level.DEFAULT, Level.EXTENDED })
public List getDataColumns() {
return getColumns(
vectorizedRowBatchCtx,
0,
vectorizedRowBatchCtx.getDataColumnCount());
}
@Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumns",
explainLevels = { Level.DEFAULT, Level.EXTENDED })
public List getPartitionColumns() {
return getColumns(
vectorizedRowBatchCtx,
vectorizedRowBatchCtx.getDataColumnCount(),
vectorizedRowBatchCtx.getPartitionColumnCount());
}
@Explain(vectorization = Vectorization.DETAIL, displayName = "includeColumns",
explainLevels = { Level.DEFAULT, Level.EXTENDED })
public String getDataColumnNums() {
int[] dataColumnNums = vectorizedRowBatchCtx.getDataColumnNums();
if (dataColumnNums == null) {
return null;
}
return Arrays.toString(dataColumnNums);
}
@Explain(vectorization = Vectorization.DETAIL, displayName = "dataColumnCount",
explainLevels = { Level.DEFAULT, Level.EXTENDED })
public int getDataColumnCount() {
return vectorizedRowBatchCtx.getDataColumnCount();
}
@Explain(vectorization = Vectorization.DETAIL, displayName = "partitionColumnCount",
explainLevels = { Level.DEFAULT, Level.EXTENDED })
public int getPartitionColumnCount() {
return vectorizedRowBatchCtx.getPartitionColumnCount();
}
@Explain(vectorization = Vectorization.DETAIL, displayName = "scratchColumnTypeNames",
explainLevels = { Level.DEFAULT, Level.EXTENDED })
public String getScratchColumnTypeNames() {
return getScratchColumns(vectorizedRowBatchCtx);
}
@Explain(vectorization = Vectorization.DETAIL, displayName = "neededVirtualColumns",
explainLevels = { Level.DEFAULT, Level.EXTENDED })
public String getNeededVirtualColumns() {
VirtualColumn[] neededVirtualColumns = vectorizedRowBatchCtx.getNeededVirtualColumns();
if (neededVirtualColumns == null || neededVirtualColumns.length == 0) {
return null;
}
return Arrays.toString(neededVirtualColumns);
}
}
@Explain(vectorization = Vectorization.DETAIL, displayName = "rowBatchContext",
explainLevels = { Level.DEFAULT, Level.EXTENDED })
public RowBatchContextExplainVectorization vectorizedRowBatchContext() {
if (!baseWork.getVectorMode()) {
return null;
}
return new RowBatchContextExplainVectorization(baseWork.getVectorizedRowBatchCtx());
}
}
// -----------------------------------------------------------------------------------------------
/**
* @return the mapredLocalWork
*/
@Explain(displayName = "Local Work", explainLevels = { Level.USER, Level.DEFAULT, Level.EXTENDED })
public MapredLocalWork getMapRedLocalWork() {
return mrLocalWork;
}
/**
* @param mapLocalWork
* the mapredLocalWork to set
*/
public void setMapRedLocalWork(final MapredLocalWork mapLocalWork) {
this.mrLocalWork = mapLocalWork;
}
public void setUberMode(boolean uberMode) {
this.uberMode = uberMode;
}
public boolean getUberMode() {
return uberMode;
}
public void setLlapMode(boolean llapMode) {
this.llapMode = llapMode;
}
public boolean getLlapMode() {
return llapMode;
}
public int getReservedMemoryMB() {
return reservedMemoryMB;
}
public void setReservedMemoryMB(int memoryMB) {
reservedMemoryMB = memoryMB;
}
public abstract void configureJobConf(JobConf job);
public void setTag(int tag) {
this.tag = tag;
}
@Explain(displayName = "tag", explainLevels = { Level.USER })
public int getTag() {
return tag;
}
public void addSortCols(List sortCols) {
this.sortColNames.addAll(sortCols);
}
public List getSortCols() {
return sortColNames;
}
public Map getInputSourceToRuntimeValuesInfo() {
return inputSourceToRuntimeValuesInfo;
}
public void setInputSourceToRuntimeValuesInfo(
String workName, RuntimeValuesInfo runtimeValuesInfo) {
inputSourceToRuntimeValuesInfo.put(workName, runtimeValuesInfo);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy