com.marklogic.contentpump.LocalJobRunner Maven / Gradle / Ivy
/*
* Copyright (c) 2021 MarkLogic Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.marklogic.contentpump;
import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.mapreduce.CounterGroup;
import org.apache.hadoop.mapreduce.InputFormat;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobID;
import org.apache.hadoop.mapreduce.JobStatus;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.OutputCommitter;
import org.apache.hadoop.mapreduce.OutputFormat;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.TaskAttemptID;
import org.apache.hadoop.mapreduce.TaskID;
import org.apache.hadoop.mapreduce.TaskType;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringUtils;
import com.marklogic.contentpump.utilities.ReflectionUtil;
/**
* Runs a job in-process, potentially multi-threaded. Only supports map-only
* jobs.
*
* @author jchen
*
*/
public class LocalJobRunner implements ConfigConstants {
public static final Log LOG = LogFactory.getLog(LocalJobRunner.class);
private LocalJob job;
private AtomicInteger[] progress;
private long startTime;
private ContentPumpReporter reporter;
private ThreadManager threadManager;
private ThreadPoolExecutor pool;
public LocalJobRunner(LocalJob job, CommandLine cmdline, Command cmd) {
this.job = job;
this.threadManager = job.getThreadManager();
threadManager.parseCmdlineOptions(cmdline, cmd);
startTime = System.currentTimeMillis();
}
/**
* Run the job. Get the input splits, create map tasks and submit it to
* the thread pool if there is one; otherwise, runs the the task one by
* one.
*
* @param
* @param
* @param
* @param
* @throws Exception
*/
@SuppressWarnings("unchecked")
public
void run() throws Exception {
Configuration conf = job.getConfiguration();
reporter = new ContentPumpReporter();
InputFormat inputFormat =
(InputFormat)ReflectionUtils.newInstance(
job.getInputFormatClass(), conf);
List splits;
T[] array;
try {
splits = inputFormat.getSplits(job);
array = (T[])splits.toArray(
new org.apache.hadoop.mapreduce.InputSplit[splits.size()]);
// sort the splits into order based on size, so that the biggest
// goes first
Arrays.sort(array, new SplitLengthComparator());
} catch (Exception ex) {
if (LOG.isDebugEnabled()) {
LOG.debug("Error getting input splits: ", ex);
} else {
LOG.error("Error getting input splits: ");
LOG.error(ex.getMessage());
}
job.setJobState(JobStatus.State.FAILED);
return;
}
OutputFormat outputFormat =
(OutputFormat)ReflectionUtils.newInstance(
job.getOutputFormatClass(), conf);
Class extends Mapper,?,?,?>> mapperClass = job.getMapperClass();
Mapper mapper =
(Mapper)ReflectionUtils.newInstance(
mapperClass, conf);
try {
// Set newServerThreads and restrictHosts in ThreadManager for
// initializing thread pool
outputFormat.checkOutputSpecs(job);
} catch (Exception ex) {
if (LOG.isDebugEnabled()) {
LOG.debug("Error checking output specification: ", ex);
} else {
LOG.error("Error checking output specification: ");
LOG.error(ex.getMessage());
}
job.setJobState(JobStatus.State.FAILED);
return;
}
// Initialize thread pool
pool = threadManager.initThreadPool();
threadManager.runThreadPoller();
progress = new AtomicInteger[splits.size()];
for (int i = 0; i < splits.size(); i++) {
progress[i] = new AtomicInteger();
}
job.setJobState(JobStatus.State.RUNNING);
Monitor monitor = new Monitor();
monitor.start();
for (int i = 0; i < array.length && !ContentPump.shutdown; i++) {
InputSplit split = array[i];
if (pool != null) {
LocalMapTask task =
new LocalMapTask<>(
inputFormat, outputFormat, conf, i, split, reporter,
progress[i]);
threadManager.submitTask(task, i, array.length);
} else { // single-threaded
JobID jid = new JobID();
TaskID taskId = new TaskID(jid.getJtIdentifier(), jid.getId(), TaskType.MAP, i);
TaskAttemptID taskAttemptId = new TaskAttemptID(taskId, 0);
TaskAttemptContext context =
ReflectionUtil.createTaskAttemptContext(conf, taskAttemptId);
RecordReader reader =
inputFormat.createRecordReader(split, context);
RecordWriter writer =
outputFormat.getRecordWriter(context);
OutputCommitter committer =
outputFormat.getOutputCommitter(context);
TrackingRecordReader trackingReader =
new TrackingRecordReader(reader, progress[i]);
Mapper.Context mapperContext =
ReflectionUtil.createMapperContext(mapper, conf,
taskAttemptId, trackingReader, writer, committer,
reporter, split);
trackingReader.initialize(split, mapperContext);
// no thread pool (only 1 thread specified)
Class extends Mapper,?,?,?>> mapClass =
job.getMapperClass();
mapperContext.getConfiguration().setClass(
CONF_MAPREDUCE_JOB_MAP_CLASS , mapClass, Mapper.class);
mapper = (Mapper)
ReflectionUtils.newInstance(mapClass,
mapperContext.getConfiguration());
if (LOG.isDebugEnabled()) {
LOG.debug("Running with single thread and will not " +
"auto-scale");
}
try {
mapper.run(mapperContext);
} finally {
try {
trackingReader.close();
} catch (Throwable t) {
LOG.error("Error closing reader: " + t.getMessage());
if (LOG.isDebugEnabled()) {
LOG.debug(t);
}
}
try {
writer.close(mapperContext);
} catch (Throwable t) {
LOG.error("Error closing writer: " + t.getMessage());
if (LOG.isDebugEnabled()) {
LOG.debug(t);
}
}
try {
committer.commitTask(context);
} catch (Throwable t) {
LOG.error("Error committing task: " + t.getMessage());
if (LOG.isDebugEnabled()) {
LOG.debug(t);
}
}
}
}
}
threadManager.shutdownThreadPool();
job.setJobState(JobStatus.State.SUCCEEDED);
monitor.interrupt();
monitor.join(1000);
// report counters
Iterator groupIt =
reporter.counters.iterator();
while (groupIt.hasNext()) {
CounterGroup group = groupIt.next();
LOG.info(group.getDisplayName() + ": ");
Iterator counterIt = group.iterator();
while (counterIt.hasNext()) {
Counter counter = counterIt.next();
LOG.info(counter.getDisplayName() + ": " +
counter.getValue());
}
}
LOG.info("Total execution time: " +
(System.currentTimeMillis() - startTime) / 1000 + " sec");
}
/**
* A map task to be run in a thread.
*
* @author jchen
*
* @param
* @param
* @param
* @param
*/
public class LocalMapTask
implements Callable
© 2015 - 2024 Weber Informatics LLC | Privacy Policy