org.apache.hadoop.mapreduce.lib.chain.Chain Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in org.apache.hadoop.shaded.com.liance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org.apache.hadoop.shaded.org.licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.lib.chain;
import java.org.apache.hadoop.shaded.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.shaded.org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.shaded.org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.shaded.org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.DefaultStringifier;
import org.apache.hadoop.shaded.org.apache.hadoop.org.apache.hadoop.shaded.io.Stringifier;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.MapContext;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.ReduceContext;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.TaskInputOutputContext;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.lib.map.WrappedMapper;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.lib.reduce.WrappedReducer;
import org.apache.hadoop.shaded.org.apache.hadoop.util.ReflectionUtils;
/**
* The Chain class provides all the org.apache.hadoop.shaded.com.on functionality for the
* {@link ChainMapper} and the {@link ChainReducer} classes.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
public class Chain {
protected static final String CHAIN_MAPPER = "mapreduce.chain.mapper";
protected static final String CHAIN_REDUCER = "mapreduce.chain.reducer";
protected static final String CHAIN_MAPPER_SIZE = ".size";
protected static final String CHAIN_MAPPER_CLASS = ".mapper.class.";
protected static final String CHAIN_MAPPER_CONFIG = ".mapper.config.";
protected static final String CHAIN_REDUCER_CLASS = ".reducer.class";
protected static final String CHAIN_REDUCER_CONFIG = ".reducer.config";
protected static final String MAPPER_INPUT_KEY_CLASS =
"mapreduce.chain.mapper.input.key.class";
protected static final String MAPPER_INPUT_VALUE_CLASS =
"mapreduce.chain.mapper.input.value.class";
protected static final String MAPPER_OUTPUT_KEY_CLASS =
"mapreduce.chain.mapper.output.key.class";
protected static final String MAPPER_OUTPUT_VALUE_CLASS =
"mapreduce.chain.mapper.output.value.class";
protected static final String REDUCER_INPUT_KEY_CLASS =
"mapreduce.chain.reducer.input.key.class";
protected static final String REDUCER_INPUT_VALUE_CLASS =
"mapreduce.chain.reducer.input.value.class";
protected static final String REDUCER_OUTPUT_KEY_CLASS =
"mapreduce.chain.reducer.output.key.class";
protected static final String REDUCER_OUTPUT_VALUE_CLASS =
"mapreduce.chain.reducer.output.value.class";
protected boolean isMap;
@SuppressWarnings("unchecked")
private List mappers = new ArrayList();
private Reducer, ?, ?, ?> reducer;
private List confList = new ArrayList();
private Configuration rConf;
private List threads = new ArrayList();
private List> blockingQueues =
new ArrayList>();
private Throwable throwable = null;
/**
* Creates a Chain instance configured for a Mapper or a Reducer.
*
* @param isMap
* TRUE indicates the chain is for a Mapper, FALSE that is for a
* Reducer.
*/
protected Chain(boolean isMap) {
this.isMap = isMap;
}
static class KeyValuePair {
K key;
V value;
boolean endOfInput;
KeyValuePair(K key, V value) {
this.key = key;
this.value = value;
this.endOfInput = false;
}
KeyValuePair(boolean eof) {
this.key = null;
this.value = null;
this.endOfInput = eof;
}
}
// ChainRecordReader either reads from blocking queue or task context.
private static class ChainRecordReader extends
RecordReader {
private Class> keyClass;
private Class> valueClass;
private KEYIN key;
private VALUEIN value;
private Configuration conf;
TaskInputOutputContext inputContext = null;
ChainBlockingQueue> inputQueue = null;
// constructor to read from a blocking queue
ChainRecordReader(Class> keyClass, Class> valueClass,
ChainBlockingQueue> inputQueue,
Configuration conf) {
this.keyClass = keyClass;
this.valueClass = valueClass;
this.inputQueue = inputQueue;
this.conf = conf;
}
// constructor to read from the context
ChainRecordReader(TaskInputOutputContext context) {
inputContext = context;
}
public void initialize(InputSplit split, TaskAttemptContext context)
throws IOException, InterruptedException {
}
/**
* Advance to the next key, value pair, returning null if at end.
*
* @return the key object that was read into, or null if no more
*/
public boolean nextKeyValue() throws IOException, InterruptedException {
if (inputQueue != null) {
return readFromQueue();
} else if (inputContext.nextKeyValue()) {
this.key = inputContext.getCurrentKey();
this.value = inputContext.getCurrentValue();
return true;
} else {
return false;
}
}
@SuppressWarnings("unchecked")
private boolean readFromQueue() throws IOException, InterruptedException {
KeyValuePair kv = null;
// wait for input on queue
kv = inputQueue.dequeue();
if (kv.endOfInput) {
return false;
}
key = (KEYIN) ReflectionUtils.newInstance(keyClass, conf);
value = (VALUEIN) ReflectionUtils.newInstance(valueClass, conf);
ReflectionUtils.copy(conf, kv.key, this.key);
ReflectionUtils.copy(conf, kv.value, this.value);
return true;
}
/**
* Get the current key.
*
* @return the current key object or null if there isn't one
* @throws IOException
* @throws InterruptedException
*/
public KEYIN getCurrentKey() throws IOException, InterruptedException {
return this.key;
}
/**
* Get the current value.
*
* @return the value object that was read into
* @throws IOException
* @throws InterruptedException
*/
public VALUEIN getCurrentValue() throws IOException, InterruptedException {
return this.value;
}
@Override
public void close() throws IOException {
}
@Override
public float getProgress() throws IOException, InterruptedException {
return 0;
}
}
// ChainRecordWriter either writes to blocking queue or task context
private static class ChainRecordWriter extends
RecordWriter {
TaskInputOutputContext, ?, KEYOUT, VALUEOUT> outputContext = null;
ChainBlockingQueue> outputQueue = null;
KEYOUT keyout;
VALUEOUT valueout;
Configuration conf;
Class> keyClass;
Class> valueClass;
// constructor to write to context
ChainRecordWriter(TaskInputOutputContext, ?, KEYOUT, VALUEOUT> context) {
outputContext = context;
}
// constructor to write to blocking queue
ChainRecordWriter(Class> keyClass, Class> valueClass,
ChainBlockingQueue> output,
Configuration conf) {
this.keyClass = keyClass;
this.valueClass = valueClass;
this.outputQueue = output;
this.conf = conf;
}
/**
* Writes a key/value pair.
*
* @param key
* the key to write.
* @param value
* the value to write.
* @throws IOException
*/
public void write(KEYOUT key, VALUEOUT value) throws IOException,
InterruptedException {
if (outputQueue != null) {
writeToQueue(key, value);
} else {
outputContext.write(key, value);
}
}
@SuppressWarnings("unchecked")
private void writeToQueue(KEYOUT key, VALUEOUT value) throws IOException,
InterruptedException {
this.keyout = (KEYOUT) ReflectionUtils.newInstance(keyClass, conf);
this.valueout = (VALUEOUT) ReflectionUtils.newInstance(valueClass, conf);
ReflectionUtils.copy(conf, key, this.keyout);
ReflectionUtils.copy(conf, value, this.valueout);
// wait to write output to queuue
outputQueue.enqueue(new KeyValuePair(keyout, valueout));
}
/**
* Close this RecordWriter
to future operations.
*
* @param context
* the context of the task
* @throws IOException
*/
public void close(TaskAttemptContext context) throws IOException,
InterruptedException {
if (outputQueue != null) {
// write end of input
outputQueue.enqueue(new KeyValuePair(true));
}
}
}
private synchronized Throwable getThrowable() {
return throwable;
}
private synchronized boolean setIfUnsetThrowable(Throwable th) {
if (throwable == null) {
throwable = th;
return true;
}
return false;
}
private class MapRunner extends Thread {
private Mapper mapper;
private Mapper.Context chainContext;
private RecordReader rr;
private RecordWriter rw;
public MapRunner(Mapper mapper,
Mapper.Context mapperContext,
RecordReader rr, RecordWriter rw)
throws IOException, InterruptedException {
this.mapper = mapper;
this.rr = rr;
this.rw = rw;
this.chainContext = mapperContext;
}
@Override
public void run() {
if (getThrowable() != null) {
return;
}
try {
mapper.run(chainContext);
rr.close();
rw.close(chainContext);
} catch (Throwable th) {
if (setIfUnsetThrowable(th)) {
interruptAllThreads();
}
}
}
}
private class ReduceRunner extends Thread {
private Reducer reducer;
private Reducer.Context chainContext;
private RecordWriter rw;
ReduceRunner(Reducer.Context context,
Reducer reducer,
RecordWriter rw) throws IOException,
InterruptedException {
this.reducer = reducer;
this.chainContext = context;
this.rw = rw;
}
@Override
public void run() {
try {
reducer.run(chainContext);
rw.close(chainContext);
} catch (Throwable th) {
if (setIfUnsetThrowable(th)) {
interruptAllThreads();
}
}
}
}
Configuration getConf(int index) {
return confList.get(index);
}
/**
* Create a map context that is based on ChainMapContext and the given record
* reader and record writer
*/
private
Mapper.Context createMapContext(
RecordReader rr, RecordWriter rw,
TaskInputOutputContext context,
Configuration conf) {
MapContext mapContext =
new ChainMapContextImpl(
context, rr, rw, conf);
Mapper.Context mapperContext =
new WrappedMapper()
.getMapContext(mapContext);
return mapperContext;
}
@SuppressWarnings("unchecked")
void runMapper(TaskInputOutputContext context, int index) throws IOException,
InterruptedException {
Mapper mapper = mappers.get(index);
RecordReader rr = new ChainRecordReader(context);
RecordWriter rw = new ChainRecordWriter(context);
Mapper.Context mapperContext = createMapContext(rr, rw, context,
getConf(index));
mapper.run(mapperContext);
rr.close();
rw.close(context);
}
/**
* Add mapper(the first mapper) that reads input from the input
* context and writes to queue
*/
@SuppressWarnings("unchecked")
void addMapper(TaskInputOutputContext inputContext,
ChainBlockingQueue> output, int index)
throws IOException, InterruptedException {
Configuration conf = getConf(index);
Class> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class);
Class> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS,
Object.class);
RecordReader rr = new ChainRecordReader(inputContext);
RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output,
conf);
Mapper.Context mapperContext = createMapContext(rr, rw,
(MapContext) inputContext, getConf(index));
MapRunner runner = new MapRunner(mappers.get(index), mapperContext, rr, rw);
threads.add(runner);
}
/**
* Add mapper(the last mapper) that reads input from
* queue and writes output to the output context
*/
@SuppressWarnings("unchecked")
void addMapper(ChainBlockingQueue> input,
TaskInputOutputContext outputContext, int index) throws IOException,
InterruptedException {
Configuration conf = getConf(index);
Class> keyClass = conf.getClass(MAPPER_INPUT_KEY_CLASS, Object.class);
Class> valueClass = conf.getClass(MAPPER_INPUT_VALUE_CLASS, Object.class);
RecordReader rr = new ChainRecordReader(keyClass, valueClass, input, conf);
RecordWriter rw = new ChainRecordWriter(outputContext);
MapRunner runner = new MapRunner(mappers.get(index), createMapContext(rr,
rw, outputContext, getConf(index)), rr, rw);
threads.add(runner);
}
/**
* Add mapper that reads and writes from/to the queue
*/
@SuppressWarnings("unchecked")
void addMapper(ChainBlockingQueue> input,
ChainBlockingQueue> output,
TaskInputOutputContext context, int index) throws IOException,
InterruptedException {
Configuration conf = getConf(index);
Class> keyClass = conf.getClass(MAPPER_INPUT_KEY_CLASS, Object.class);
Class> valueClass = conf.getClass(MAPPER_INPUT_VALUE_CLASS, Object.class);
Class> keyOutClass = conf.getClass(MAPPER_OUTPUT_KEY_CLASS, Object.class);
Class> valueOutClass = conf.getClass(MAPPER_OUTPUT_VALUE_CLASS,
Object.class);
RecordReader rr = new ChainRecordReader(keyClass, valueClass, input, conf);
RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass, output,
conf);
MapRunner runner = new MapRunner(mappers.get(index), createMapContext(rr,
rw, context, getConf(index)), rr, rw);
threads.add(runner);
}
/**
* Create a reduce context that is based on ChainMapContext and the given
* record writer
*/
private
Reducer.Context createReduceContext(
RecordWriter rw,
ReduceContext context,
Configuration conf) {
ReduceContext reduceContext =
new ChainReduceContextImpl(
context, rw, conf);
Reducer.Context reducerContext =
new WrappedReducer()
.getReducerContext(reduceContext);
return reducerContext;
}
// Run the reducer directly.
@SuppressWarnings("unchecked")
void runReducer(
TaskInputOutputContext context)
throws IOException, InterruptedException {
RecordWriter rw = new ChainRecordWriter(
context);
Reducer.Context reducerContext = createReduceContext(rw,
(ReduceContext) context, rConf);
reducer.run(reducerContext);
rw.close(context);
}
/**
* Add reducer that reads from context and writes to a queue
*/
@SuppressWarnings("unchecked")
void addReducer(TaskInputOutputContext inputContext,
ChainBlockingQueue> outputQueue) throws IOException,
InterruptedException {
Class> keyOutClass = rConf.getClass(REDUCER_OUTPUT_KEY_CLASS,
Object.class);
Class> valueOutClass = rConf.getClass(REDUCER_OUTPUT_VALUE_CLASS,
Object.class);
RecordWriter rw = new ChainRecordWriter(keyOutClass, valueOutClass,
outputQueue, rConf);
Reducer.Context reducerContext = createReduceContext(rw,
(ReduceContext) inputContext, rConf);
ReduceRunner runner = new ReduceRunner(reducerContext, reducer, rw);
threads.add(runner);
}
// start all the threads
void startAllThreads() {
for (Thread thread : threads) {
thread.start();
}
}
// wait till all threads finish
void joinAllThreads() throws IOException, InterruptedException {
for (Thread thread : threads) {
thread.join();
}
Throwable th = getThrowable();
if (th != null) {
if (th instanceof IOException) {
throw (IOException) th;
} else if (th instanceof InterruptedException) {
throw (InterruptedException) th;
} else {
throw new RuntimeException(th);
}
}
}
// interrupt all threads
private synchronized void interruptAllThreads() {
for (Thread th : threads) {
th.interrupt();
}
for (ChainBlockingQueue> queue : blockingQueues) {
queue.interrupt();
}
}
/**
* Returns the prefix to use for the configuration of the chain depending if
* it is for a Mapper or a Reducer.
*
* @param isMap
* TRUE for Mapper, FALSE for Reducer.
* @return the prefix to use.
*/
protected static String getPrefix(boolean isMap) {
return (isMap) ? CHAIN_MAPPER : CHAIN_REDUCER;
}
protected static int getIndex(Configuration conf, String prefix) {
return conf.getInt(prefix + CHAIN_MAPPER_SIZE, 0);
}
/**
* Creates a {@link Configuration} for the Map or Reduce in the chain.
*
*
* It creates a new Configuration using the chain job's Configuration as base
* and adds to it the configuration properties for the chain element. The keys
* of the chain element Configuration have precedence over the given
* Configuration.
*
*
* @param jobConf
* the chain job's Configuration.
* @param confKey
* the key for chain element configuration serialized in the chain
* job's Configuration.
* @return a new Configuration aggregating the chain job's Configuration with
* the chain element configuration properties.
*/
protected static Configuration getChainElementConf(Configuration jobConf,
String confKey) {
Configuration conf = null;
try (Stringifier stringifier =
new DefaultStringifier(jobConf, Configuration.class);) {
String confString = jobConf.get(confKey, null);
if (confString != null) {
conf = stringifier.fromString(jobConf.get(confKey, null));
}
} catch (IOException org.apache.hadoop.shaded.io.x) {
throw new RuntimeException(org.apache.hadoop.shaded.io.x);
}
// we have to do this because the Writable desearialization clears all
// values set in the conf making not possible do a
// new Configuration(jobConf) in the creation of the conf above
jobConf = new Configuration(jobConf);
if (conf != null) {
for (Map.Entry entry : conf) {
jobConf.set(entry.getKey(), entry.getValue());
}
}
return jobConf;
}
/**
* Adds a Mapper class to the chain job.
*
*
* The configuration properties of the chain job have precedence over the
* configuration properties of the Mapper.
*
* @param isMap
* indicates if the Chain is for a Mapper or for a Reducer.
* @param job
* chain job.
* @param klass
* the Mapper class to add.
* @param inputKeyClass
* mapper input key class.
* @param inputValueClass
* mapper input value class.
* @param outputKeyClass
* mapper output key class.
* @param outputValueClass
* mapper output value class.
* @param mapperConf
* a configuration for the Mapper class. It is recommended to use a
* Configuration without default values using the
* Configuration(boolean loadDefaults)
constructor with
* FALSE.
*/
@SuppressWarnings("unchecked")
protected static void addMapper(boolean isMap, Job job,
Class extends Mapper> klass, Class> inputKeyClass,
Class> inputValueClass, Class> outputKeyClass,
Class> outputValueClass, Configuration mapperConf) {
String prefix = getPrefix(isMap);
Configuration jobConf = job.getConfiguration();
// if a reducer chain check the Reducer has been already set
checkReducerAlreadySet(isMap, jobConf, prefix, true);
// set the mapper class
int index = getIndex(jobConf, prefix);
jobConf.setClass(prefix + CHAIN_MAPPER_CLASS + index, klass, Mapper.class);
validateKeyValueTypes(isMap, jobConf, inputKeyClass, inputValueClass,
outputKeyClass, outputValueClass, index, prefix);
setMapperConf(isMap, jobConf, inputKeyClass, inputValueClass,
outputKeyClass, outputValueClass, mapperConf, index, prefix);
}
// if a reducer chain check the Reducer has been already set or not
protected static void checkReducerAlreadySet(boolean isMap,
Configuration jobConf, String prefix, boolean shouldSet) {
if (!isMap) {
if (shouldSet) {
if (jobConf.getClass(prefix + CHAIN_REDUCER_CLASS, null) == null) {
throw new IllegalStateException(
"A Mapper can be added to the chain only after the Reducer has "
+ "been set");
}
} else {
if (jobConf.getClass(prefix + CHAIN_REDUCER_CLASS, null) != null) {
throw new IllegalStateException("Reducer has been already set");
}
}
}
}
protected static void validateKeyValueTypes(boolean isMap,
Configuration jobConf, Class> inputKeyClass, Class> inputValueClass,
Class> outputKeyClass, Class> outputValueClass, int index,
String prefix) {
// if it is a reducer chain and the first Mapper is being added check the
// key and value input classes of the mapper match those of the reducer
// output.
if (!isMap && index == 0) {
Configuration reducerConf = getChainElementConf(jobConf, prefix
+ CHAIN_REDUCER_CONFIG);
if (!inputKeyClass.isAssignableFrom(reducerConf.getClass(
REDUCER_OUTPUT_KEY_CLASS, null))) {
throw new IllegalArgumentException("The Reducer output key class does"
+ " not match the Mapper input key class");
}
if (!inputValueClass.isAssignableFrom(reducerConf.getClass(
REDUCER_OUTPUT_VALUE_CLASS, null))) {
throw new IllegalArgumentException("The Reducer output value class"
+ " does not match the Mapper input value class");
}
} else if (index > 0) {
// check the that the new Mapper in the chain key and value input classes
// match those of the previous Mapper output.
Configuration previousMapperConf = getChainElementConf(jobConf, prefix
+ CHAIN_MAPPER_CONFIG + (index - 1));
if (!inputKeyClass.isAssignableFrom(previousMapperConf.getClass(
MAPPER_OUTPUT_KEY_CLASS, null))) {
throw new IllegalArgumentException("The specified Mapper input key class does"
+ " not match the previous Mapper's output key class.");
}
if (!inputValueClass.isAssignableFrom(previousMapperConf.getClass(
MAPPER_OUTPUT_VALUE_CLASS, null))) {
throw new IllegalArgumentException("The specified Mapper input value class"
+ " does not match the previous Mapper's output value class.");
}
}
}
protected static void setMapperConf(boolean isMap, Configuration jobConf,
Class> inputKeyClass, Class> inputValueClass,
Class> outputKeyClass, Class> outputValueClass,
Configuration mapperConf, int index, String prefix) {
// if the Mapper does not have a configuration, create an empty one
if (mapperConf == null) {
// using a Configuration without defaults to make it lightweight.
// still the chain's conf may have all defaults and this conf is
// overlapped to the chain configuration one.
mapperConf = new Configuration(true);
}
// store the input/output classes of the mapper in the mapper conf
mapperConf.setClass(MAPPER_INPUT_KEY_CLASS, inputKeyClass, Object.class);
mapperConf
.setClass(MAPPER_INPUT_VALUE_CLASS, inputValueClass, Object.class);
mapperConf.setClass(MAPPER_OUTPUT_KEY_CLASS, outputKeyClass, Object.class);
mapperConf.setClass(MAPPER_OUTPUT_VALUE_CLASS, outputValueClass,
Object.class);
// serialize the mapper configuration in the chain configuration.
Stringifier stringifier =
new DefaultStringifier(jobConf, Configuration.class);
try {
jobConf.set(prefix + CHAIN_MAPPER_CONFIG + index, stringifier
.toString(new Configuration(mapperConf)));
} catch (IOException org.apache.hadoop.shaded.io.x) {
throw new RuntimeException(org.apache.hadoop.shaded.io.x);
}
// increment the chain counter
jobConf.setInt(prefix + CHAIN_MAPPER_SIZE, index + 1);
}
/**
* Sets the Reducer class to the chain job.
*
*
* The configuration properties of the chain job have precedence over the
* configuration properties of the Reducer.
*
* @param job
* the chain job.
* @param klass
* the Reducer class to add.
* @param inputKeyClass
* reducer input key class.
* @param inputValueClass
* reducer input value class.
* @param outputKeyClass
* reducer output key class.
* @param outputValueClass
* reducer output value class.
* @param reducerConf
* a configuration for the Reducer class. It is recommended to use a
* Configuration without default values using the
* Configuration(boolean loadDefaults)
constructor with
* FALSE.
*/
@SuppressWarnings("unchecked")
protected static void setReducer(Job job, Class extends Reducer> klass,
Class> inputKeyClass, Class> inputValueClass,
Class> outputKeyClass, Class> outputValueClass,
Configuration reducerConf) {
String prefix = getPrefix(false);
Configuration jobConf = job.getConfiguration();
checkReducerAlreadySet(false, jobConf, prefix, false);
jobConf.setClass(prefix + CHAIN_REDUCER_CLASS, klass, Reducer.class);
setReducerConf(jobConf, inputKeyClass, inputValueClass, outputKeyClass,
outputValueClass, reducerConf, prefix);
}
protected static void setReducerConf(Configuration jobConf,
Class> inputKeyClass, Class> inputValueClass,
Class> outputKeyClass, Class> outputValueClass,
Configuration reducerConf, String prefix) {
// if the Reducer does not have a Configuration, create an empty one
if (reducerConf == null) {
// using a Configuration without defaults to make it lightweight.
// still the chain's conf may have all defaults and this conf is
// overlapped to the chain's Configuration one.
reducerConf = new Configuration(false);
}
// store the input/output classes of the reducer in
// the reducer configuration
reducerConf.setClass(REDUCER_INPUT_KEY_CLASS, inputKeyClass, Object.class);
reducerConf.setClass(REDUCER_INPUT_VALUE_CLASS, inputValueClass,
Object.class);
reducerConf
.setClass(REDUCER_OUTPUT_KEY_CLASS, outputKeyClass, Object.class);
reducerConf.setClass(REDUCER_OUTPUT_VALUE_CLASS, outputValueClass,
Object.class);
// serialize the reducer configuration in the chain's configuration.
Stringifier stringifier =
new DefaultStringifier(jobConf, Configuration.class);
try {
jobConf.set(prefix + CHAIN_REDUCER_CONFIG, stringifier
.toString(new Configuration(reducerConf)));
} catch (IOException org.apache.hadoop.shaded.io.x) {
throw new RuntimeException(org.apache.hadoop.shaded.io.x);
}
}
/**
* Setup the chain.
*
* @param jobConf
* chain job's {@link Configuration}.
*/
@SuppressWarnings("unchecked")
void setup(Configuration jobConf) {
String prefix = getPrefix(isMap);
int index = jobConf.getInt(prefix + CHAIN_MAPPER_SIZE, 0);
for (int i = 0; i < index; i++) {
Class extends Mapper> klass = jobConf.getClass(prefix
+ CHAIN_MAPPER_CLASS + i, null, Mapper.class);
Configuration mConf = getChainElementConf(jobConf, prefix
+ CHAIN_MAPPER_CONFIG + i);
confList.add(mConf);
Mapper mapper = ReflectionUtils.newInstance(klass, mConf);
mappers.add(mapper);
}
Class extends Reducer> klass = jobConf.getClass(prefix
+ CHAIN_REDUCER_CLASS, null, Reducer.class);
if (klass != null) {
rConf = getChainElementConf(jobConf, prefix + CHAIN_REDUCER_CONFIG);
reducer = ReflectionUtils.newInstance(klass, rConf);
}
}
@SuppressWarnings("unchecked")
List getAllMappers() {
return mappers;
}
/**
* Returns the Reducer instance in the chain.
*
* @return the Reducer instance in the chain or NULL if none.
*/
Reducer, ?, ?, ?> getReducer() {
return reducer;
}
/**
* Creates a ChainBlockingQueue with KeyValuePair as element
*
* @return the ChainBlockingQueue
*/
ChainBlockingQueue> createBlockingQueue() {
return new ChainBlockingQueue>();
}
/**
* A blocking queue with one element.
*
* @param
*/
class ChainBlockingQueue {
E element = null;
boolean isInterrupted = false;
ChainBlockingQueue() {
blockingQueues.add(this);
}
synchronized void enqueue(E e) throws InterruptedException {
while (element != null) {
if (isInterrupted) {
throw new InterruptedException();
}
this.wait();
}
element = e;
this.notify();
}
synchronized E dequeue() throws InterruptedException {
while (element == null) {
if (isInterrupted) {
throw new InterruptedException();
}
this.wait();
}
E e = element;
element = null;
this.notify();
return e;
}
synchronized void interrupt() {
isInterrupted = true;
this.notifyAll();
}
}
}