org.apache.hadoop.hive.ql.exec.MapredLocalTask Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hive-exec Show documentation
Show all versions of hive-exec Show documentation
Hive is a data warehouse infrastructure built on top of Hadoop see
http://wiki.apache.org/hadoop/Hive
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hive.ql.exec;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.Serializable;
import java.lang.management.ManagementFactory;
import java.lang.management.MemoryMXBean;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import org.apache.commons.lang.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.FileUtil;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.common.io.CachingPrintStream;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hadoop.hive.ql.Context;
import org.apache.hadoop.hive.ql.DriverContext;
import org.apache.hadoop.hive.ql.QueryPlan;
import org.apache.hadoop.hive.ql.exec.Utilities.StreamPrinter;
import org.apache.hadoop.hive.ql.exec.persistence.AbstractMapJoinKey;
import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper;
import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectValue;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.plan.BucketMapJoinContext;
import org.apache.hadoop.hive.ql.plan.FetchWork;
import org.apache.hadoop.hive.ql.plan.MapredLocalWork;
import org.apache.hadoop.hive.ql.plan.OperatorDesc;
import org.apache.hadoop.hive.ql.plan.api.StageType;
import org.apache.hadoop.hive.ql.session.SessionState;
import org.apache.hadoop.hive.ql.session.SessionState.LogHelper;
import org.apache.hadoop.hive.serde2.ColumnProjectionUtils;
import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.shims.HadoopShims;
import org.apache.hadoop.hive.shims.ShimLoader;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.util.ReflectionUtils;
public class MapredLocalTask extends Task implements Serializable {
private Map fetchOperators;
protected HadoopJobExecHelper jobExecHelper;
private JobConf job;
public static transient final Log l4j = LogFactory.getLog(MapredLocalTask.class);
static final String HADOOP_MEM_KEY = "HADOOP_HEAPSIZE";
static final String HADOOP_OPTS_KEY = "HADOOP_OPTS";
static final String[] HIVE_SYS_PROP = {"build.dir", "build.dir.hive"};
public static MemoryMXBean memoryMXBean;
private static final Log LOG = LogFactory.getLog(MapredLocalTask.class);
// not sure we need this exec context; but all the operators in the work
// will pass this context throught
private final ExecMapperContext execContext = new ExecMapperContext();
private Process executor;
public MapredLocalTask() {
super();
}
public MapredLocalTask(MapredLocalWork plan, JobConf job, boolean isSilent) throws HiveException {
setWork(plan);
this.job = job;
console = new LogHelper(LOG, isSilent);
}
@Override
public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext driverContext) {
super.initialize(conf, queryPlan, driverContext);
job = new JobConf(conf, ExecDriver.class);
//we don't use the HadoopJobExecHooks for local tasks
this.jobExecHelper = new HadoopJobExecHelper(job, console, this, null);
}
public static String now() {
Calendar cal = Calendar.getInstance();
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-mm-dd hh:mm:ss");
return sdf.format(cal.getTime());
}
@Override
public boolean requireLock() {
return true;
}
@Override
public int execute(DriverContext driverContext) {
try {
// generate the cmd line to run in the child jvm
Context ctx = driverContext.getCtx();
String hiveJar = conf.getJar();
String hadoopExec = conf.getVar(HiveConf.ConfVars.HADOOPBIN);
String libJarsOption;
// write out the plan to a local file
Path planPath = new Path(ctx.getLocalTmpFileURI(), "plan.xml");
OutputStream out = FileSystem.getLocal(conf).create(planPath);
MapredLocalWork plan = getWork();
LOG.info("Generating plan file " + planPath.toString());
Utilities.serializeMapRedLocalWork(plan, out);
String isSilent = "true".equalsIgnoreCase(System.getProperty("test.silent")) ? "-nolog" : "";
String jarCmd;
jarCmd = hiveJar + " " + ExecDriver.class.getName();
String hiveConfArgs = ExecDriver.generateCmdLine(conf, ctx);
String cmdLine = hadoopExec + " jar " + jarCmd + " -localtask -plan " + planPath.toString()
+ " " + isSilent + " " + hiveConfArgs;
String workDir = (new File(".")).getCanonicalPath();
String files = Utilities.getResourceFiles(conf, SessionState.ResourceType.FILE);
if (!files.isEmpty()) {
cmdLine = cmdLine + " -files " + files;
workDir = (new Path(ctx.getLocalTmpFileURI())).toUri().getPath();
if (!(new File(workDir)).mkdir()) {
throw new IOException("Cannot create tmp working dir: " + workDir);
}
for (String f : StringUtils.split(files, ',')) {
Path p = new Path(f);
String target = p.toUri().getPath();
String link = workDir + Path.SEPARATOR + p.getName();
if (FileUtil.symLink(target, link) != 0) {
throw new IOException("Cannot link to added file: " + target + " from: " + link);
}
}
}
LOG.info("Executing: " + cmdLine);
// Inherit Java system variables
String hadoopOpts;
StringBuilder sb = new StringBuilder();
Properties p = System.getProperties();
for (String element : HIVE_SYS_PROP) {
if (p.containsKey(element)) {
sb.append(" -D" + element + "=" + p.getProperty(element));
}
}
hadoopOpts = sb.toString();
// Inherit the environment variables
String[] env;
Map variables = new HashMap(System.getenv());
// The user can specify the hadoop memory
// if ("local".equals(conf.getVar(HiveConf.ConfVars.HADOOPJT))) {
// if we are running in local mode - then the amount of memory used
// by the child jvm can no longer default to the memory used by the
// parent jvm
// int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM);
int hadoopMem = conf.getIntVar(HiveConf.ConfVars.HIVEHADOOPMAXMEM);
if (hadoopMem == 0) {
// remove env var that would default child jvm to use parent's memory
// as default. child jvm would use default memory for a hadoop client
variables.remove(HADOOP_MEM_KEY);
} else {
// user specified the memory for local mode hadoop run
console.printInfo(" set heap size\t" + hadoopMem + "MB");
variables.put(HADOOP_MEM_KEY, String.valueOf(hadoopMem));
}
// } else {
// nothing to do - we are not running in local mode - only submitting
// the job via a child process. in this case it's appropriate that the
// child jvm use the same memory as the parent jvm
// }
//Set HADOOP_USER_NAME env variable for child process, so that
// it also runs with hadoop permissions for the user the job is running as
// This will be used by hadoop only in unsecure(/non kerberos) mode
HadoopShims shim = ShimLoader.getHadoopShims();
String endUserName = shim.getShortUserName(shim.getUGIForConf(job));
console.printInfo("setting HADOOP_USER_NAME\t" + endUserName);
variables.put("HADOOP_USER_NAME", endUserName);
if (variables.containsKey(HADOOP_OPTS_KEY)) {
variables.put(HADOOP_OPTS_KEY, variables.get(HADOOP_OPTS_KEY) + hadoopOpts);
} else {
variables.put(HADOOP_OPTS_KEY, hadoopOpts);
}
if(variables.containsKey(MapRedTask.HIVE_DEBUG_RECURSIVE)) {
MapRedTask.configureDebugVariablesForChildJVM(variables);
}
env = new String[variables.size()];
int pos = 0;
for (Map.Entry entry : variables.entrySet()) {
String name = entry.getKey();
String value = entry.getValue();
env[pos++] = name + "=" + value;
}
// Run ExecDriver in another JVM
executor = Runtime.getRuntime().exec(cmdLine, env, new File(workDir));
CachingPrintStream errPrintStream = new CachingPrintStream(System.err);
StreamPrinter outPrinter = new StreamPrinter(executor.getInputStream(), null, System.out);
StreamPrinter errPrinter = new StreamPrinter(executor.getErrorStream(), null, errPrintStream);
outPrinter.start();
errPrinter.start();
int exitVal = jobExecHelper.progressLocal(executor, getId());
if (exitVal != 0) {
LOG.error("Execution failed with exit status: " + exitVal);
if (SessionState.get() != null) {
SessionState.get().addLocalMapRedErrors(getId(), errPrintStream.getOutput());
}
} else {
LOG.info("Execution completed successfully");
console.printInfo("Mapred Local Task Succeeded . Convert the Join into MapJoin");
}
return exitVal;
} catch (Exception e) {
e.printStackTrace();
LOG.error("Exception: " + e.getMessage());
return (1);
}
}
public int executeFromChildJVM(DriverContext driverContext) {
// check the local work
if (work == null) {
return -1;
}
memoryMXBean = ManagementFactory.getMemoryMXBean();
long startTime = System.currentTimeMillis();
console.printInfo(Utilities.now()
+ "\tStarting to launch local task to process map join;\tmaximum memory = "
+ memoryMXBean.getHeapMemoryUsage().getMax());
fetchOperators = new HashMap();
Map fetchOpJobConfMap = new HashMap();
execContext.setJc(job);
// set the local work, so all the operator can get this context
execContext.setLocalWork(work);
boolean inputFileChangeSenstive = work.getInputFileChangeSensitive();
try {
initializeOperators(fetchOpJobConfMap);
// for each big table's bucket, call the start forward
if (inputFileChangeSenstive) {
for (Map> bigTableBucketFiles : work
.getBucketMapjoinContext().getAliasBucketFileNameMapping().values()) {
for (String bigTableBucket : bigTableBucketFiles.keySet()) {
startForward(inputFileChangeSenstive, bigTableBucket);
}
}
} else {
startForward(inputFileChangeSenstive, null);
}
long currentTime = System.currentTimeMillis();
long elapsed = currentTime - startTime;
console.printInfo(Utilities.now() + "\tEnd of local task; Time Taken: "
+ Utilities.showTime(elapsed) + " sec.");
} catch (Throwable e) {
if (e instanceof OutOfMemoryError
|| (e instanceof HiveException && e.getMessage().equals("RunOutOfMeomoryUsage"))) {
// Don't create a new object if we are already out of memory
return 3;
} else {
l4j.error("Hive Runtime Error: Map local work failed");
e.printStackTrace();
return 2;
}
}
return 0;
}
private void startForward(boolean inputFileChangeSenstive, String bigTableBucket)
throws Exception {
for (Map.Entry entry : fetchOperators.entrySet()) {
int fetchOpRows = 0;
String alias = entry.getKey();
FetchOperator fetchOp = entry.getValue();
if (inputFileChangeSenstive) {
fetchOp.clearFetchContext();
setUpFetchOpContext(fetchOp, alias, bigTableBucket);
}
if (fetchOp.isEmptyTable()) {
//generate empty hashtable for empty table
this.generateDummyHashTable(alias, bigTableBucket);
continue;
}
// get the root operator
Operator extends OperatorDesc> forwardOp = work.getAliasToWork().get(alias);
// walk through the operator tree
while (true) {
InspectableObject row = fetchOp.getNextRow();
if (row == null) {
if (inputFileChangeSenstive) {
execContext.setCurrentBigBucketFile(bigTableBucket);
forwardOp.reset();
}
forwardOp.close(false);
break;
}
fetchOpRows++;
forwardOp.process(row.o, 0);
// check if any operator had a fatal error or early exit during
// execution
if (forwardOp.getDone()) {
// ExecMapper.setDone(true);
break;
}
}
}
}
private void initializeOperators(Map fetchOpJobConfMap)
throws HiveException {
// this mapper operator is used to initialize all the operators
for (Map.Entry entry : work.getAliasToFetchWork().entrySet()) {
JobConf jobClone = new JobConf(job);
Operator extends OperatorDesc> tableScan =
work.getAliasToWork().get(entry.getKey());
boolean setColumnsNeeded = false;
if (tableScan instanceof TableScanOperator) {
ArrayList list = ((TableScanOperator) tableScan).getNeededColumnIDs();
if (list != null) {
ColumnProjectionUtils.appendReadColumnIDs(jobClone, list);
setColumnsNeeded = true;
}
}
if (!setColumnsNeeded) {
ColumnProjectionUtils.setFullyReadColumns(jobClone);
}
// create a fetch operator
FetchOperator fetchOp = new FetchOperator(entry.getValue(), jobClone);
fetchOpJobConfMap.put(fetchOp, jobClone);
fetchOperators.put(entry.getKey(), fetchOp);
l4j.info("fetchoperator for " + entry.getKey() + " created");
}
// initilize all forward operator
for (Map.Entry entry : fetchOperators.entrySet()) {
// get the forward op
String alias = entry.getKey();
Operator extends OperatorDesc> forwardOp = work.getAliasToWork().get(alias);
// put the exe context into all the operators
forwardOp.setExecContext(execContext);
// All the operators need to be initialized before process
FetchOperator fetchOp = entry.getValue();
JobConf jobConf = fetchOpJobConfMap.get(fetchOp);
if (jobConf == null) {
jobConf = job;
}
// initialize the forward operator
ObjectInspector objectInspector = fetchOp.getOutputObjectInspector();
forwardOp.initialize(jobConf, new ObjectInspector[] {objectInspector});
l4j.info("fetchoperator for " + entry.getKey() + " initialized");
}
}
private void generateDummyHashTable(String alias, String bigBucketFileName) throws HiveException,IOException {
// find the (byte)tag for the map join(HashTableSinkOperator)
Operator extends OperatorDesc> parentOp = work.getAliasToWork().get(alias);
Operator extends OperatorDesc> childOp = parentOp.getChildOperators().get(0);
while ((childOp != null) && (!(childOp instanceof HashTableSinkOperator))) {
parentOp = childOp;
assert parentOp.getChildOperators().size() == 1;
childOp = parentOp.getChildOperators().get(0);
}
if (childOp == null) {
throw new HiveException(
"Cannot find HashTableSink op by tracing down the table scan operator tree");
}
byte tag = (byte) childOp.getParentOperators().indexOf(parentOp);
// generate empty hashtable for this (byte)tag
String tmpURI = this.getWork().getTmpFileURI();
HashMapWrapper hashTable =
new HashMapWrapper();
String fileName = work.getBucketFileName(bigBucketFileName);
HashTableSinkOperator htso = (HashTableSinkOperator)childOp;
String tmpURIPath = Utilities.generatePath(tmpURI, htso.getConf().getDumpFilePrefix(),
tag, fileName);
console.printInfo(Utilities.now() + "\tDump the hashtable into file: " + tmpURIPath);
Path path = new Path(tmpURIPath);
FileSystem fs = path.getFileSystem(job);
File file = new File(path.toUri().getPath());
fs.create(path);
long fileLength = hashTable.flushMemoryCacheToPersistent(file);
console.printInfo(Utilities.now() + "\tUpload 1 File to: " + tmpURIPath + " File size: "
+ fileLength);
hashTable.close();
}
private void setUpFetchOpContext(FetchOperator fetchOp, String alias, String currentInputFile)
throws Exception {
BucketMapJoinContext bucketMatcherCxt = this.work.getBucketMapjoinContext();
Class extends BucketMatcher> bucketMatcherCls = bucketMatcherCxt.getBucketMatcherClass();
BucketMatcher bucketMatcher = (BucketMatcher) ReflectionUtils.newInstance(bucketMatcherCls,
null);
bucketMatcher.setAliasBucketFileNameMapping(bucketMatcherCxt.getAliasBucketFileNameMapping());
List aliasFiles = bucketMatcher.getAliasBucketFiles(currentInputFile, bucketMatcherCxt
.getMapJoinBigTableAlias(), alias);
fetchOp.setupContext(aliasFiles);
}
@Override
public void localizeMRTmpFilesImpl(Context ctx) {
}
@Override
public boolean isMapRedLocalTask() {
return true;
}
@Override
public Collection> getTopOperators() {
return getWork().getAliasToWork().values();
}
@Override
public String getName() {
return "MAPREDLOCAL";
}
@Override
public StageType getType() {
//assert false;
return StageType.MAPREDLOCAL;
}
@Override
public void shutdown() {
super.shutdown();
if (executor != null) {
executor.destroy();
executor = null;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy