org.apache.hadoop.hive.ql.Context Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-apache Show documentation
Show all versions of hive-apache Show documentation
Shaded version of Apache Hive for Trino
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql;
import java.io.DataInput;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicInteger;
import org.antlr.runtime.TokenRewriteStream;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.ContentSummary;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.hive.common.BlobStorageUtils;
import org.apache.hadoop.hive.common.FileUtils;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.exec.TaskRunner;
import org.apache.hadoop.hive.ql.exec.Utilities;
import org.apache.hadoop.hive.ql.hooks.WriteEntity;
import org.apache.hadoop.hive.ql.lockmgr.DbTxnManager.Heartbeater;
import org.apache.hadoop.hive.ql.lockmgr.HiveLock;
import org.apache.hadoop.hive.ql.lockmgr.HiveLockObj;
import org.apache.hadoop.hive.ql.lockmgr.HiveTxnManager;
import org.apache.hadoop.hive.ql.lockmgr.LockException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.parse.ASTNode;
import org.apache.hadoop.hive.ql.parse.ExplainConfiguration;
import org.apache.hadoop.hive.ql.parse.ExplainConfiguration.AnalyzeState;
import org.apache.hadoop.hive.ql.parse.HiveParser;
import org.apache.hadoop.hive.ql.parse.QB;
import org.apache.hadoop.hive.ql.plan.LoadTableDesc;
import org.apache.hadoop.hive.ql.plan.mapper.EmptyStatsSource;
import org.apache.hadoop.hive.ql.plan.mapper.PlanMapper;
import org.apache.hadoop.hive.ql.plan.mapper.StatsSource;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.wm.WmContext;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.util.StringUtils;
import io.trino.hive.$internal.org.slf4j.Logger;
import io.trino.hive.$internal.org.slf4j.LoggerFactory;
/**
* Context for Semantic Analyzers. Usage: not reusable - construct a new one for
* each query should call clear() at end of use to remove temporary folders
*/
public class Context {
private boolean isHDFSCleanup;
private Path resFile;
private Path resDir;
private FileSystem resFs;
private static final Logger LOG = LoggerFactory.getLogger("hive.ql.Context");
private Path[] resDirPaths;
private int resDirFilesNum;
boolean initialized;
String originalTracker = null;
private CompilationOpContext opContext;
private final Map pathToCS = new ConcurrentHashMap();
// scratch path to use for all non-local (ie. hdfs) file system tmp folders
private final Path nonLocalScratchPath;
// scratch directory to use for local file system tmp folders
private final String localScratchDir;
// the permission to scratch directory (local and hdfs)
private final String scratchDirPermission;
// Keeps track of scratch directories created for different scheme/authority
private final Map fsScratchDirs = new HashMap();
private Configuration conf;
protected int pathid = 10000;
protected ExplainConfiguration explainConfig = null;
protected String cboInfo;
protected boolean cboSucceeded;
protected String cmd = "";
private TokenRewriteStream tokenRewriteStream;
// Holds the qualified name to tokenRewriteStream for the views
// referenced by the query. This is used to rewrite the view AST
// with column masking and row filtering policies.
private final Map viewsTokenRewriteStreams;
private final String executionId;
// Some statements, e.g., UPDATE, DELETE, or MERGE, get rewritten into different
// subqueries that create new contexts. We keep them here so we can clean them
// up when we are done.
private final Set rewrittenStatementContexts;
// List of Locks for this query
protected List hiveLocks;
// Transaction manager for this query
protected HiveTxnManager hiveTxnManager;
private boolean needLockMgr;
private AtomicInteger sequencer = new AtomicInteger();
private final Map cteTables = new HashMap();
// Keep track of the mapping from load table desc to the output and the lock
private final Map loadTableOutputMap =
new HashMap();
private final Map> outputLockObjects =
new HashMap>();
private final String stagingDir;
private Heartbeater heartbeater;
private boolean skipTableMasking;
// Identify whether the query involves an UPDATE, DELETE or MERGE
private boolean isUpdateDeleteMerge;
// Whether the analyzer has been instantiated to read and load materialized view plans
private boolean isLoadingMaterializedView;
/**
* This determines the prefix of the
* {@link org.apache.hadoop.hive.ql.parse.SemanticAnalyzer.Phase1Ctx#dest}
* name for a given subtree of the AST. Most of the times there is only 1 destination in a
* given tree but multi-insert has several and multi-insert representing MERGE must use
* different prefixes to encode the purpose of different Insert branches
*/
private Map insertBranchToNamePrefix = new HashMap<>();
private Operation operation = Operation.OTHER;
private WmContext wmContext;
private boolean isExplainPlan = false;
private PlanMapper planMapper = new PlanMapper();
private StatsSource statsSource;
private int executionIndex;
// Load data rewrite
private Table tempTableForLoad;
public void setOperation(Operation operation) {
this.operation = operation;
}
public WmContext getWmContext() {
return wmContext;
}
public void setWmContext(final WmContext wmContext) {
this.wmContext = wmContext;
}
/**
* These ops require special handling in various places
* (note that Insert into Acid table is in OTHER category)
*/
public enum Operation {UPDATE, DELETE, MERGE, OTHER};
public enum DestClausePrefix {
INSERT("insclause-"), UPDATE("updclause-"), DELETE("delclause-");
private final String prefix;
DestClausePrefix(String prefix) {
this.prefix = prefix;
}
@Override
public String toString() {
return prefix;
}
}
private String getMatchedText(ASTNode n) {
return getTokenRewriteStream().toString(n.getTokenStartIndex(), n.getTokenStopIndex() + 1).trim();
}
/**
* The suffix is always relative to a given ASTNode
*/
public DestClausePrefix getDestNamePrefix(ASTNode curNode, QB queryBlock) {
assert curNode != null : "must supply curNode";
if(queryBlock.isInsideView() || queryBlock.getParseInfo().getIsSubQ()) {
/**
* Views get inlined in the logical plan but not in the AST
* {@link org.apache.hadoop.hive.ql.parse.SemanticAnalyzer#replaceViewReferenceWithDefinition(QB, Table, String, String)}
* Since here we only care to identify clauses representing Update/Delete which are not
* possible inside a view/subquery, we can immediately return the default {@link DestClausePrefix.INSERT}
*/
return DestClausePrefix.INSERT;
}
if(curNode.getType() != HiveParser.TOK_INSERT_INTO) {
//select statement
assert curNode.getType() == HiveParser.TOK_DESTINATION;
if(operation == Operation.OTHER) {
//not an 'interesting' op
return DestClausePrefix.INSERT;
}
//if it is an 'interesting' op but it's a select it must be a sub-query or a derived table
//it doesn't require a special Acid code path - the reset of the code here is to ensure
//the tree structure is what we expect
boolean thisIsInASubquery = false;
parentLoop: while(curNode.getParent() != null) {
curNode = (ASTNode) curNode.getParent();
switch (curNode.getType()) {
case HiveParser.TOK_SUBQUERY_EXPR:
//this is a real subquery (foo IN (select ...))
case HiveParser.TOK_SUBQUERY:
//this is a Derived Table Select * from (select a from ...))
//strictly speaking SetOps should have a TOK_SUBQUERY parent so next 6 items are redundant
case HiveParser.TOK_UNIONALL:
case HiveParser.TOK_UNIONDISTINCT:
case HiveParser.TOK_EXCEPTALL:
case HiveParser.TOK_EXCEPTDISTINCT:
case HiveParser.TOK_INTERSECTALL:
case HiveParser.TOK_INTERSECTDISTINCT:
thisIsInASubquery = true;
break parentLoop;
}
}
if(!thisIsInASubquery) {
throw new IllegalStateException("Expected '" + getMatchedText(curNode) + "' to be in sub-query or set operation.");
}
return DestClausePrefix.INSERT;
}
switch (operation) {
case OTHER:
return DestClausePrefix.INSERT;
case UPDATE:
return DestClausePrefix.UPDATE;
case DELETE:
return DestClausePrefix.DELETE;
case MERGE:
/* This is the structrue expected here
HiveParser.TOK_QUERY;
HiveParser.TOK_FROM
HiveParser.TOK_INSERT;
HiveParser.TOK_INSERT_INTO;
HiveParser.TOK_INSERT;
HiveParser.TOK_INSERT_INTO;
.....*/
ASTNode insert = (ASTNode) curNode.getParent();
assert insert != null && insert.getType() == HiveParser.TOK_INSERT;
ASTNode query = (ASTNode) insert.getParent();
assert query != null && query.getType() == HiveParser.TOK_QUERY;
for(int childIdx = 1; childIdx < query.getChildCount(); childIdx++) {//1st child is TOK_FROM
assert query.getChild(childIdx).getType() == HiveParser.TOK_INSERT;
if(insert == query.getChild(childIdx)) {
DestClausePrefix prefix = insertBranchToNamePrefix.get(childIdx);
if(prefix == null) {
throw new IllegalStateException("Found a node w/o branch mapping: '" +
getMatchedText(insert) + "'");
}
return prefix;
}
}
throw new IllegalStateException("Could not locate '" + getMatchedText(insert) + "'");
default:
throw new IllegalStateException("Unexpected operation: " + operation);
}
}
/**
* Will make SemanticAnalyzer.Phase1Ctx#dest in subtree rooted at 'tree' use 'prefix'. This to
* handle multi-insert stmt that represents Merge stmt and has insert branches representing
* update/delete/insert.
* @param pos ordinal index of specific TOK_INSERT as child of TOK_QUERY
* @return previous prefix for 'tree' or null
*/
public DestClausePrefix addDestNamePrefix(int pos, DestClausePrefix prefix) {
return insertBranchToNamePrefix.put(pos, prefix);
}
public Context(Configuration conf) throws IOException {
this(conf, generateExecutionId());
}
/**
* Create a Context with a given executionId. ExecutionId, together with
* user name and conf, will determine the temporary directory locations.
*/
private Context(Configuration conf, String executionId) {
this.conf = conf;
this.executionId = executionId;
this.rewrittenStatementContexts = new HashSet<>();
// local & non-local tmp location is configurable. however it is the same across
// all external file systems
nonLocalScratchPath = new Path(SessionState.getHDFSSessionPath(conf), executionId);
localScratchDir = new Path(SessionState.getLocalSessionPath(conf), executionId).toUri().getPath();
scratchDirPermission = HiveConf.getVar(conf, HiveConf.ConfVars.SCRATCHDIRPERMISSION);
stagingDir = HiveConf.getVar(conf, HiveConf.ConfVars.STAGINGDIR);
opContext = new CompilationOpContext();
viewsTokenRewriteStreams = new HashMap<>();
}
protected Context(Context ctx) {
// This method creates a deep copy of context, but the copy is partial,
// hence it needs to be used carefully. In particular, following objects
// are ignored:
// opContext, pathToCS, cboInfo, cboSucceeded, tokenRewriteStream, viewsTokenRewriteStreams,
// rewrittenStatementContexts, cteTables, loadTableOutputMap, planMapper, insertBranchToNamePrefix
this.isHDFSCleanup = ctx.isHDFSCleanup;
this.resFile = ctx.resFile;
this.resDir = ctx.resDir;
this.resFs = ctx.resFs;
this.resDirPaths = ctx.resDirPaths;
this.resDirFilesNum = ctx.resDirFilesNum;
this.initialized = ctx.initialized;
this.originalTracker = ctx.originalTracker;
this.nonLocalScratchPath = ctx.nonLocalScratchPath;
this.localScratchDir = ctx.localScratchDir;
this.scratchDirPermission = ctx.scratchDirPermission;
this.fsScratchDirs.putAll(ctx.fsScratchDirs);
this.conf = ctx.conf;
this.pathid = ctx.pathid;
this.explainConfig = ctx.explainConfig;
this.cmd = ctx.cmd;
this.executionId = ctx.executionId;
this.hiveLocks = ctx.hiveLocks;
this.hiveTxnManager = ctx.hiveTxnManager;
this.needLockMgr = ctx.needLockMgr;
this.sequencer = ctx.sequencer;
this.outputLockObjects.putAll(ctx.outputLockObjects);
this.stagingDir = ctx.stagingDir;
this.heartbeater = ctx.heartbeater;
this.skipTableMasking = ctx.skipTableMasking;
this.isUpdateDeleteMerge = ctx.isUpdateDeleteMerge;
this.isLoadingMaterializedView = ctx.isLoadingMaterializedView;
this.operation = ctx.operation;
this.wmContext = ctx.wmContext;
this.isExplainPlan = ctx.isExplainPlan;
this.statsSource = ctx.statsSource;
this.executionIndex = ctx.executionIndex;
this.viewsTokenRewriteStreams = new HashMap<>();
this.rewrittenStatementContexts = new HashSet<>();
this.opContext = new CompilationOpContext();
}
public Map getFsScratchDirs() {
return fsScratchDirs;
}
public Map getLoadTableOutputMap() {
return loadTableOutputMap;
}
public Map> getOutputLockObjects() {
return outputLockObjects;
}
/**
* Find whether we should execute the current query due to explain
* @return true if the query needs to be executed, false if not
*/
public boolean isExplainSkipExecution() {
return (explainConfig != null && explainConfig.getAnalyze() != AnalyzeState.RUNNING);
}
/**
* Find whether the current query is a logical explain query
*/
public boolean getExplainLogical() {
return explainConfig != null && explainConfig.isLogical();
}
public AnalyzeState getExplainAnalyze() {
if (explainConfig != null) {
return explainConfig.getAnalyze();
}
return null;
}
/**
* Set the original query command.
* @param cmd the original query command string
*/
public void setCmd(String cmd) {
this.cmd = cmd;
}
/**
* Find the original query command.
* @return the original query command string
*/
public String getCmd () {
return cmd;
}
/**
* Gets a temporary staging directory related to a path.
* If a path already contains a staging directory, then returns the current directory; otherwise
* create the directory if needed.
*
* @param inputPath URI of the temporary directory
* @param mkdir Create the directory if True.
* @return A temporary path.
*/
private Path getStagingDir(Path inputPath, boolean mkdir) {
final URI inputPathUri = inputPath.toUri();
final String inputPathName = inputPathUri.getPath();
final String fileSystem = inputPathUri.getScheme() + ":" + inputPathUri.getAuthority();
final FileSystem fs;
try {
fs = inputPath.getFileSystem(conf);
} catch (IOException e) {
throw new IllegalStateException("Error getting FileSystem for " + inputPath + ": "+ e, e);
}
String stagingPathName;
if (inputPathName.indexOf(stagingDir) == -1) {
stagingPathName = new Path(inputPathName, stagingDir).toString();
} else {
stagingPathName = inputPathName.substring(0, inputPathName.indexOf(stagingDir) + stagingDir.length());
}
final String key = fileSystem + "-" + stagingPathName + "-" + TaskRunner.getTaskRunnerID();
Path dir = fsScratchDirs.get(key);
if (dir == null) {
// Append task specific info to stagingPathName, instead of creating a sub-directory.
// This way we don't have to worry about deleting the stagingPathName separately at
// end of query execution.
dir = fs.makeQualified(new Path(
stagingPathName + "_" + this.executionId + "-" + TaskRunner.getTaskRunnerID()));
LOG.debug("Created staging dir = " + dir + " for path = " + inputPath);
if (mkdir) {
try {
if (!FileUtils.mkdir(fs, dir, conf)) {
throw new IllegalStateException("Cannot create staging directory '" + dir.toString() + "'");
}
if (isHDFSCleanup) {
fs.deleteOnExit(dir);
}
} catch (IOException e) {
throw new RuntimeException("Cannot create staging directory '" + dir.toString() + "': " + e.getMessage(), e);
}
}
fsScratchDirs.put(key, dir);
}
return dir;
}
/**
* Get a tmp directory on specified URI
*
* @param scheme Scheme of the target FS
* @param authority Authority of the target FS
* @param mkdir create the directory if true
* @param scratchDir path of tmp directory
*/
private Path getScratchDir(String scheme, String authority,
boolean mkdir, String scratchDir) {
String fileSystem = scheme + ":" + authority;
Path dir = fsScratchDirs.get(fileSystem + "-" + TaskRunner.getTaskRunnerID());
if (dir == null) {
Path dirPath = new Path(scheme, authority,
scratchDir + "-" + TaskRunner.getTaskRunnerID());
if (mkdir) {
try {
FileSystem fs = dirPath.getFileSystem(conf);
dirPath = new Path(fs.makeQualified(dirPath).toString());
FsPermission fsPermission = new FsPermission(scratchDirPermission);
if (!fs.mkdirs(dirPath, fsPermission)) {
throw new RuntimeException("Cannot make directory: "
+ dirPath.toString());
}
if (isHDFSCleanup) {
fs.deleteOnExit(dirPath);
}
} catch (IOException e) {
throw new RuntimeException (e);
}
}
dir = dirPath;
fsScratchDirs.put(fileSystem + "-" + TaskRunner.getTaskRunnerID(), dir);
}
return dir;
}
/**
* Create a local scratch directory on demand and return it.
*/
public Path getLocalScratchDir(boolean mkdir) {
try {
FileSystem fs = FileSystem.getLocal(conf);
URI uri = fs.getUri();
return getScratchDir(uri.getScheme(), uri.getAuthority(),
mkdir, localScratchDir);
} catch (IOException e) {
throw new RuntimeException (e);
}
}
/**
* Create a map-reduce scratch directory on demand and return it.
*
*/
public Path getMRScratchDir() {
// if we are executing entirely on the client side - then
// just (re)use the local scratch directory
if(isLocalOnlyExecutionMode()) {
return getLocalScratchDir(!isExplainSkipExecution());
}
try {
Path dir = FileUtils.makeQualified(nonLocalScratchPath, conf);
URI uri = dir.toUri();
Path newScratchDir = getScratchDir(uri.getScheme(), uri.getAuthority(),
!isExplainSkipExecution(), uri.getPath());
LOG.info("New scratch dir is " + newScratchDir);
return newScratchDir;
} catch (IOException e) {
throw new RuntimeException(e);
} catch (IllegalArgumentException e) {
throw new RuntimeException("Error while making MR scratch "
+ "directory - check filesystem config (" + e.getCause() + ")", e);
}
}
/**
* Create a temporary directory depending of the path specified.
* - If path is an Object store filesystem, then use the default MR scratch directory (HDFS), unless isFinalJob and
* {@link BlobStorageUtils#areOptimizationsEnabled(Configuration)} are both true, then return a path on
* the blobstore.
* - If path is on HDFS, then create a staging directory inside the path
*
* @param path Path used to verify the Filesystem to use for temporary directory
*
* @return A path to the new temporary directory
*/
public Path getTempDirForInterimJobPath(Path path) {
// For better write performance, we use HDFS for temporary data when object store is used.
// Note that the scratch directory configuration variable must use HDFS or any other
// non-blobstorage system to take advantage of this performance.
boolean isBlobstorageOptimized = BlobStorageUtils.isBlobStoragePath(conf, path)
&& !BlobStorageUtils.isBlobStorageAsScratchDir(conf) && BlobStorageUtils.areOptimizationsEnabled(conf);
if (isPathLocal(path) || isBlobstorageOptimized) {
return getMRTmpPath();
}
return getExtTmpPathRelTo(path);
}
/**
* Create a temporary directory depending of the path specified.
* - If path is an Object store filesystem, then use the default MR scratch directory (HDFS)
* - If path is on HDFS, then create a staging directory inside the path
*
* @param path Path used to verify the Filesystem to use for temporary directory
* @return A path to the new temporary directory
*/
public Path getTempDirForFinalJobPath(Path path) {
return getExtTmpPathRelTo(path);
}
/*
* Checks if the path is for the local filesystem or not
*/
private boolean isPathLocal(Path path) {
boolean isLocal = false;
if (path != null) {
String scheme = path.toUri().getScheme();
if (scheme != null) {
isLocal = scheme.equals(Utilities.HADOOP_LOCAL_FS_SCHEME);
}
}
return isLocal;
}
private Path getExternalScratchDir(URI extURI) {
return getStagingDir(new Path(extURI.getScheme(), extURI.getAuthority(), extURI.getPath()), !isExplainSkipExecution());
}
/**
* Remove any created scratch directories.
*/
public void removeScratchDir() {
for (Map.Entry entry : fsScratchDirs.entrySet()) {
try {
Path p = entry.getValue();
FileSystem fs = p.getFileSystem(conf);
LOG.debug("Deleting scratch dir: {}", p);
fs.delete(p, true);
fs.cancelDeleteOnExit(p);
} catch (Exception e) {
LOG.warn("Error Removing Scratch: "
+ StringUtils.stringifyException(e));
}
}
fsScratchDirs.clear();
}
/**
* Remove any created directories for CTEs.
*/
public void removeMaterializedCTEs() {
// clean CTE tables
for (Table materializedTable : cteTables.values()) {
Path location = materializedTable.getDataLocation();
try {
FileSystem fs = location.getFileSystem(conf);
boolean status = fs.delete(location, true);
LOG.info("Removed " + location + " for materialized "
+ materializedTable.getTableName() + ", status=" + status);
} catch (IOException e) {
// ignore
LOG.warn("Error removing " + location + " for materialized " + materializedTable.getTableName() +
": " + StringUtils.stringifyException(e));
}
}
cteTables.clear();
}
private String nextPathId() {
return Integer.toString(pathid++);
}
private static final String MR_PREFIX = "-mr-";
public static final String EXT_PREFIX = "-ext-";
private static final String LOCAL_PREFIX = "-local-";
/**
* Check if path is for intermediate data
* @return true if a uri is a temporary uri for map-reduce intermediate data,
* false otherwise
*/
public boolean isMRTmpFileURI(String uriStr) {
return (uriStr.indexOf(executionId) != -1) &&
(uriStr.indexOf(MR_PREFIX) != -1);
}
public Path getMRTmpPath(URI uri) {
return new Path(getStagingDir(new Path(uri), !isExplainSkipExecution()), MR_PREFIX + nextPathId());
}
/**
* Get a path to store map-reduce intermediate data in.
*
* @return next available path for map-red intermediate data
*/
public Path getMRTmpPath() {
return new Path(getMRScratchDir(), MR_PREFIX +
nextPathId());
}
/**
* Get a tmp path on local host to store intermediate data.
*
* @return next available tmp path on local fs
*/
public Path getLocalTmpPath() {
return new Path(getLocalScratchDir(true), LOCAL_PREFIX + nextPathId());
}
/**
* Get a path to store tmp data destined for external Path.
*
* @param path external Path to which the tmp data has to be eventually moved
* @return next available tmp path on the file system corresponding extURI
*/
public Path getExternalTmpPath(Path path) {
URI extURI = path.toUri();
if (extURI.getScheme().equals("viewfs")) {
// if we are on viewfs we don't want to use /tmp as tmp dir since rename from /tmp/..
// to final /user/hive/warehouse/ will fail later, so instead pick tmp dir
// on same namespace as tbl dir.
return getExtTmpPathRelTo(path.getParent());
}
return new Path(getExternalScratchDir(extURI), EXT_PREFIX +
nextPathId());
}
/**
* This is similar to getExternalTmpPath() with difference being this method returns temp path
* within passed in uri, whereas getExternalTmpPath() ignores passed in path and returns temp
* path within /tmp
*/
public Path getExtTmpPathRelTo(Path path) {
return new Path(getStagingDir(path, !isExplainSkipExecution()), EXT_PREFIX + nextPathId());
}
/**
* @return the resFile
*/
public Path getResFile() {
return resFile;
}
/**
* @param resFile
* the resFile to set
*/
public void setResFile(Path resFile) {
this.resFile = resFile;
resDir = null;
resDirPaths = null;
resDirFilesNum = 0;
}
/**
* @return the resDir
*/
public Path getResDir() {
return resDir;
}
/**
* @param resDir
* the resDir to set
*/
public void setResDir(Path resDir) {
this.resDir = resDir;
resFile = null;
resDirFilesNum = 0;
resDirPaths = null;
}
public void clear() throws IOException {
// First clear the other contexts created by this query
for (Context subContext : rewrittenStatementContexts) {
subContext.clear();
}
// Then clear this context
if (resDir != null) {
try {
FileSystem fs = resDir.getFileSystem(conf);
LOG.debug("Deleting result dir: {}", resDir);
fs.delete(resDir, true);
} catch (IOException e) {
LOG.info("Context clear error: " + StringUtils.stringifyException(e));
}
}
if (resFile != null) {
try {
FileSystem fs = resFile.getFileSystem(conf);
LOG.debug("Deleting result file: {}", resFile);
fs.delete(resFile, false);
} catch (IOException e) {
LOG.info("Context clear error: " + StringUtils.stringifyException(e));
}
}
removeMaterializedCTEs();
removeScratchDir();
originalTracker = null;
setNeedLockMgr(false);
}
public DataInput getStream() {
try {
if (!initialized) {
initialized = true;
if ((resFile == null) && (resDir == null)) {
return null;
}
if (resFile != null) {
return resFile.getFileSystem(conf).open(resFile);
}
resFs = resDir.getFileSystem(conf);
FileStatus status = resFs.getFileStatus(resDir);
assert status.isDir();
FileStatus[] resDirFS = resFs.globStatus(new Path(resDir + "/*"), FileUtils.HIDDEN_FILES_PATH_FILTER);
resDirPaths = new Path[resDirFS.length];
int pos = 0;
for (FileStatus resFS : resDirFS) {
if (!resFS.isDir()) {
resDirPaths[pos++] = resFS.getPath();
}
}
if (pos == 0) {
return null;
}
return resFs.open(resDirPaths[resDirFilesNum++]);
} else {
return getNextStream();
}
} catch (FileNotFoundException e) {
LOG.info("getStream error: " + StringUtils.stringifyException(e));
return null;
} catch (IOException e) {
LOG.info("getStream error: " + StringUtils.stringifyException(e));
return null;
}
}
private DataInput getNextStream() {
try {
if (resDir != null && resDirFilesNum < resDirPaths.length
&& (resDirPaths[resDirFilesNum] != null)) {
return resFs.open(resDirPaths[resDirFilesNum++]);
}
} catch (FileNotFoundException e) {
LOG.info("getNextStream error: " + StringUtils.stringifyException(e));
return null;
} catch (IOException e) {
LOG.info("getNextStream error: " + StringUtils.stringifyException(e));
return null;
}
return null;
}
public void resetStream() {
if (initialized) {
resDirFilesNum = 0;
initialized = false;
}
}
/**
* Little abbreviation for StringUtils.
*/
private static boolean strEquals(String str1, String str2) {
return io.trino.hive.$internal.org.apache.commons.lang.StringUtils.equals(str1, str2);
}
/**
* Set the token rewrite stream being used to parse the current top-level SQL
* statement. Note that this should not be used for other parsing
* activities; for example, when we encounter a reference to a view, we switch
* to a new stream for parsing the stored view definition from the catalog,
* but we don't clobber the top-level stream in the context.
*
* @param tokenRewriteStream
* the stream being used
*/
public void setTokenRewriteStream(TokenRewriteStream tokenRewriteStream) {
assert (this.tokenRewriteStream == null || this.getExplainAnalyze() == AnalyzeState.RUNNING);
this.tokenRewriteStream = tokenRewriteStream;
}
/**
* @return the token rewrite stream being used to parse the current top-level
* SQL statement, or null if it isn't available (e.g. for parser
* tests)
*/
public TokenRewriteStream getTokenRewriteStream() {
return tokenRewriteStream;
}
public void addViewTokenRewriteStream(String viewFullyQualifiedName,
TokenRewriteStream tokenRewriteStream) {
viewsTokenRewriteStreams.put(viewFullyQualifiedName, tokenRewriteStream);
}
public TokenRewriteStream getViewTokenRewriteStream(String viewFullyQualifiedName) {
return viewsTokenRewriteStreams.get(viewFullyQualifiedName);
}
/**
* Generate a unique executionId. An executionId, together with user name and
* the configuration, will determine the temporary locations of all intermediate
* files.
*
* In the future, users can use the executionId to resume a query.
*/
public static String generateExecutionId() {
Random rand = new Random();
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd_HH-mm-ss_SSS");
String executionId = "hive_" + format.format(new Date()) + "_"
+ Math.abs(rand.nextLong());
return executionId;
}
/**
* Does Hive wants to run tasks entirely on the local machine
* (where the query is being compiled)?
*
* Today this translates into running hadoop jobs locally
*/
public boolean isLocalOnlyExecutionMode() {
// Always allow spark to run in a cluster mode. Without this, depending on
// user's local hadoop settings, true may be returned, which causes plan to be
// stored in local path.
if (HiveConf.getVar(conf, HiveConf.ConfVars.HIVE_EXECUTION_ENGINE).equals("spark")) {
return false;
}
return ShimLoader.getHadoopShims().isLocalMode(conf);
}
public List getHiveLocks() {
return hiveLocks;
}
public void setHiveLocks(List hiveLocks) {
this.hiveLocks = hiveLocks;
}
public HiveTxnManager getHiveTxnManager() {
return hiveTxnManager;
}
public void setHiveTxnManager(HiveTxnManager txnMgr) {
hiveTxnManager = txnMgr;
}
public void setOriginalTracker(String originalTracker) {
this.originalTracker = originalTracker;
}
public void restoreOriginalTracker() {
if (originalTracker != null) {
ShimLoader.getHadoopShims().setJobLauncherRpcAddress(conf, originalTracker);
originalTracker = null;
}
}
public void addRewrittenStatementContext(Context context) {
rewrittenStatementContexts.add(context);
}
public void addCS(String path, ContentSummary cs) {
pathToCS.put(path, cs);
}
public ContentSummary getCS(Path path) {
return getCS(path.toString());
}
public ContentSummary getCS(String path) {
return pathToCS.get(path);
}
public Map getPathToCS() {
return pathToCS;
}
public Configuration getConf() {
return conf;
}
/**
* @return the isHDFSCleanup
*/
public boolean isHDFSCleanup() {
return isHDFSCleanup;
}
/**
* @param isHDFSCleanup the isHDFSCleanup to set
*/
public void setHDFSCleanup(boolean isHDFSCleanup) {
this.isHDFSCleanup = isHDFSCleanup;
}
public boolean isNeedLockMgr() {
return needLockMgr;
}
public void setNeedLockMgr(boolean needLockMgr) {
this.needLockMgr = needLockMgr;
}
public String getCboInfo() {
return cboInfo;
}
public void setCboInfo(String cboInfo) {
this.cboInfo = cboInfo;
}
public boolean isCboSucceeded() {
return cboSucceeded;
}
public void setCboSucceeded(boolean cboSucceeded) {
this.cboSucceeded = cboSucceeded;
}
public Table getMaterializedTable(String cteName) {
return cteTables.get(cteName);
}
public void addMaterializedTable(String cteName, Table table) {
cteTables.put(cteName, table);
}
public AtomicInteger getSequencer() {
return sequencer;
}
public CompilationOpContext getOpContext() {
return opContext;
}
public void setOpContext(CompilationOpContext opContext) {
this.opContext = opContext;
}
public Heartbeater getHeartbeater() {
return heartbeater;
}
public void setHeartbeater(Heartbeater heartbeater) {
this.heartbeater = heartbeater;
}
public void checkHeartbeaterLockException() throws LockException {
if (getHeartbeater() != null && getHeartbeater().getLockException() != null) {
throw getHeartbeater().getLockException();
}
}
public boolean isSkipTableMasking() {
return skipTableMasking;
}
public void setSkipTableMasking(boolean skipTableMasking) {
this.skipTableMasking = skipTableMasking;
}
public ExplainConfiguration getExplainConfig() {
return explainConfig;
}
public boolean isExplainPlan() {
return isExplainPlan;
}
public void setExplainPlan(boolean t) {
this.isExplainPlan = t;
}
public void setExplainConfig(ExplainConfiguration explainConfig) {
this.explainConfig = explainConfig;
}
public void resetOpContext() {
opContext = new CompilationOpContext();
sequencer = new AtomicInteger();
}
public boolean getIsUpdateDeleteMerge() {
return isUpdateDeleteMerge;
}
public void setIsUpdateDeleteMerge(boolean isUpdate) {
this.isUpdateDeleteMerge = isUpdate;
}
public boolean isLoadingMaterializedView() {
return isLoadingMaterializedView;
}
public void setIsLoadingMaterializedView(boolean isLoadingMaterializedView) {
this.isLoadingMaterializedView = isLoadingMaterializedView;
}
public String getExecutionId() {
return executionId;
}
public PlanMapper getPlanMapper() {
return planMapper;
}
public void setStatsSource(StatsSource statsSource) {
this.statsSource = statsSource;
}
public StatsSource getStatsSource() {
if (statsSource != null) {
return statsSource;
} else {
// hierarchical; add def stats also here
return EmptyStatsSource.INSTANCE;
}
}
public int getExecutionIndex() {
return executionIndex;
}
public void setExecutionIndex(int executionIndex) {
this.executionIndex = executionIndex;
}
public void setConf(HiveConf conf) {
this.conf = conf;
}
public Table getTempTableForLoad() {
return tempTableForLoad;
}
public void setTempTableForLoad(Table tempTableForLoad) {
this.tempTableForLoad = tempTableForLoad;
}
}