All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.hadoopcompatibility.mapred.HadoopMapFunction Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.hadoopcompatibility.mapred;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;

import org.apache.flink.annotation.Public;
import org.apache.flink.api.common.functions.RichFlatMapFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.api.java.hadoop.mapred.wrapper.HadoopDummyReporter;
import org.apache.flink.hadoopcompatibility.mapred.wrapper.HadoopOutputCollector;
import org.apache.flink.util.Collector;
import org.apache.flink.util.InstantiationUtil;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Mapper;
import org.apache.hadoop.mapred.Reporter;

/**
 * This wrapper maps a Hadoop Mapper (mapred API) to a Flink FlatMapFunction. 
 */
@SuppressWarnings("rawtypes")
@Public
public final class HadoopMapFunction 
					extends RichFlatMapFunction, Tuple2> 
					implements ResultTypeQueryable>, Serializable {

	private static final long serialVersionUID = 1L;

	private transient Mapper mapper;
	private transient JobConf jobConf;

	private transient HadoopOutputCollector outputCollector;
	private transient Reporter reporter;
	
	/**
	 * Maps a Hadoop Mapper (mapred API) to a Flink FlatMapFunction.
	 * 
	 * @param hadoopMapper The Hadoop Mapper to wrap.
	 */
	public HadoopMapFunction(Mapper hadoopMapper) {
		this(hadoopMapper, new JobConf());
	}
	
	/**
	 * Maps a Hadoop Mapper (mapred API) to a Flink FlatMapFunction.
	 * The Hadoop Mapper is configured with the provided JobConf.
	 * 
	 * @param hadoopMapper The Hadoop Mapper to wrap.
	 * @param conf The JobConf that is used to configure the Hadoop Mapper.
	 */
	public HadoopMapFunction(Mapper hadoopMapper, JobConf conf) {
		if(hadoopMapper == null) {
			throw new NullPointerException("Mapper may not be null.");
		}
		if(conf == null) {
			throw new NullPointerException("JobConf may not be null.");
		}
		
		this.mapper = hadoopMapper;
		this.jobConf = conf;
	}

	@Override
	public void open(Configuration parameters) throws Exception {
		super.open(parameters);
		this.mapper.configure(jobConf);
		
		this.reporter = new HadoopDummyReporter();
		this.outputCollector = new HadoopOutputCollector();
	}

	@Override
	public void flatMap(final Tuple2 value, final Collector> out) 
			throws Exception {
		outputCollector.setFlinkCollector(out);
		mapper.map(value.f0, value.f1, outputCollector, reporter);
	}

	@SuppressWarnings("unchecked")
	@Override
	public TypeInformation> getProducedType() {	
		Class outKeyClass = (Class) TypeExtractor.getParameterType(Mapper.class, mapper.getClass(), 2);
		Class outValClass = (Class)TypeExtractor.getParameterType(Mapper.class, mapper.getClass(), 3);
		
		final TypeInformation keyTypeInfo = TypeExtractor.getForClass((Class) outKeyClass);
		final TypeInformation valueTypleInfo = TypeExtractor.getForClass((Class) outValClass);
		return new TupleTypeInfo>(keyTypeInfo, valueTypleInfo);
	}
	
	/**
	 * Custom serialization methods.
	 * @see http://docs.oracle.com/javase/7/docs/api/java/io/Serializable.html
	 */
	private void writeObject(final ObjectOutputStream out) throws IOException {
		out.writeObject(mapper.getClass());
		jobConf.write(out);
	}

	@SuppressWarnings("unchecked")
	private void readObject(final ObjectInputStream in) throws IOException, ClassNotFoundException {
		Class> mapperClass = 
				(Class>)in.readObject();
		mapper = InstantiationUtil.instantiate(mapperClass);
		
		jobConf = new JobConf();
		jobConf.readFields(in);
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy