All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.mapreduce.lib.map.MultithreadedMapper Maven / Gradle / Ivy

There is a newer version: 3.4.1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in org.apache.hadoop.shaded.com.liance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org.apache.hadoop.shaded.org.licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.lib.map;

import org.apache.hadoop.shaded.org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.shaded.org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.shaded.org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.shaded.org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.Counter;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.MapContext;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.RecordWriter;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.StatusReporter;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.task.MapContextImpl;
import org.apache.hadoop.shaded.org.slf4j.Logger;
import org.apache.hadoop.shaded.org.slf4j.LoggerFactory;

import java.org.apache.hadoop.shaded.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * Multithreaded implementation for @link org.apache.hadoop.shaded.org.apache.hadoop.mapreduce.Mapper.
 * 

* It can be used instead of the default implementation, * {@link org.apache.hadoop.shaded.org.apache.hadoop.mapred.MapRunner}, when the Map operation is not CPU * bound in order to improve throughput. *

* Mapper implementations using this MapRunnable must be thread-safe. *

* The Map-Reduce job has to be configured with the mapper to use via * {@link #setMapperClass(Job, Class)} and * the number of thread the thread-pool can use with the * {@link #getNumberOfThreads(JobContext)} method. The default * value is 10 threads. *

*/ @InterfaceAudience.Public @InterfaceStability.Stable public class MultithreadedMapper extends Mapper { private static final Logger LOG = LoggerFactory.getLogger(MultithreadedMapper.class); public static String NUM_THREADS = "mapreduce.mapper.multithreadedmapper.threads"; public static String MAP_CLASS = "mapreduce.mapper.multithreadedmapper.mapclass"; private Class> mapClass; private Context outer; private List runners; /** * The number of threads in the thread pool that will run the map function. * @param job the job * @return the number of threads */ public static int getNumberOfThreads(JobContext job) { return job.getConfiguration().getInt(NUM_THREADS, 10); } /** * Set the number of threads in the pool for running maps. * @param job the job to modify * @param threads the new number of threads */ public static void setNumberOfThreads(Job job, int threads) { job.getConfiguration().setInt(NUM_THREADS, threads); } /** * Get the application's mapper class. * @param the map's input key type * @param the map's input value type * @param the map's output key type * @param the map's output value type * @param job the job * @return the mapper class to run */ @SuppressWarnings("unchecked") public static Class> getMapperClass(JobContext job) { return (Class>) job.getConfiguration().getClass(MAP_CLASS, Mapper.class); } /** * Set the application's mapper class. * @param the map input key type * @param the map input value type * @param the map output key type * @param the map output value type * @param job the job to modify * @param cls the class to use as the mapper */ public static void setMapperClass(Job job, Class> cls) { if (MultithreadedMapper.class.isAssignableFrom(cls)) { throw new IllegalArgumentException("Can't have recursive " + "MultithreadedMapper instances."); } job.getConfiguration().setClass(MAP_CLASS, cls, Mapper.class); } /** * Run the application's maps using a thread pool. */ @Override public void run(Context context) throws IOException, InterruptedException { outer = context; int numberOfThreads = getNumberOfThreads(context); mapClass = getMapperClass(context); if (LOG.isDebugEnabled()) { LOG.debug("Configuring multithread runner to use " + numberOfThreads + " threads"); } runners = new ArrayList(numberOfThreads); for(int i=0; i < numberOfThreads; ++i) { MapRunner thread = new MapRunner(context); thread.start(); runners.add(i, thread); } for(int i=0; i < numberOfThreads; ++i) { MapRunner thread = runners.get(i); thread.join(); Throwable th = thread.throwable; if (th != null) { if (th instanceof IOException) { throw (IOException) th; } else if (th instanceof InterruptedException) { throw (InterruptedException) th; } else { throw new RuntimeException(th); } } } } private class SubMapRecordReader extends RecordReader { private K1 key; private V1 value; private Configuration conf; @Override public void close() throws IOException { } @Override public float getProgress() throws IOException, InterruptedException { return 0; } @Override public void initialize(InputSplit split, TaskAttemptContext context ) throws IOException, InterruptedException { conf = context.getConfiguration(); } @Override public boolean nextKeyValue() throws IOException, InterruptedException { synchronized (outer) { if (!outer.nextKeyValue()) { return false; } key = ReflectionUtils.copy(outer.getConfiguration(), outer.getCurrentKey(), key); value = ReflectionUtils.copy(conf, outer.getCurrentValue(), value); return true; } } public K1 getCurrentKey() { return key; } @Override public V1 getCurrentValue() { return value; } } private class SubMapRecordWriter extends RecordWriter { @Override public void close(TaskAttemptContext context) throws IOException, InterruptedException { } @Override public void write(K2 key, V2 value) throws IOException, InterruptedException { synchronized (outer) { outer.write(key, value); } } } private class SubMapStatusReporter extends StatusReporter { @Override public Counter getCounter(Enum name) { return outer.getCounter(name); } @Override public Counter getCounter(String group, String name) { return outer.getCounter(group, name); } @Override public void progress() { outer.progress(); } @Override public void setStatus(String status) { outer.setStatus(status); } @Override public float getProgress() { return outer.getProgress(); } } private class MapRunner extends Thread { private Mapper mapper; private Context subcontext; private Throwable throwable; private RecordReader reader = new SubMapRecordReader(); MapRunner(Context context) throws IOException, InterruptedException { mapper = ReflectionUtils.newInstance(mapClass, context.getConfiguration()); MapContext mapContext = new MapContextImpl(outer.getConfiguration(), outer.getTaskAttemptID(), reader, new SubMapRecordWriter(), context.getOutputCommitter(), new SubMapStatusReporter(), outer.getInputSplit()); subcontext = new WrappedMapper().getMapContext(mapContext); reader.initialize(context.getInputSplit(), context); } @Override public void run() { try { mapper.run(subcontext); reader.close(); } catch (Throwable ie) { throwable = ie; } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy