org.apache.flink.hadoopcompatibility.mapred.HadoopReduceFunction Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.hadoopcompatibility.mapred;
import org.apache.flink.annotation.Public;
import org.apache.flink.api.common.functions.RichGroupReduceFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.hadoop.mapred.wrapper.HadoopDummyReporter;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.hadoopcompatibility.mapred.wrapper.HadoopOutputCollector;
import org.apache.flink.hadoopcompatibility.mapred.wrapper.HadoopTupleUnwrappingIterator;
import org.apache.flink.util.Collector;
import org.apache.flink.util.InstantiationUtil;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
/**
* This wrapper maps a Hadoop Reducer (mapred API) to a non-combinable Flink GroupReduceFunction.
*/
@SuppressWarnings("rawtypes")
@Public
public final class HadoopReduceFunction
extends RichGroupReduceFunction, Tuple2>
implements ResultTypeQueryable>, Serializable {
private static final long serialVersionUID = 1L;
private transient Reducer reducer;
private transient JobConf jobConf;
private transient HadoopTupleUnwrappingIterator valueIterator;
private transient HadoopOutputCollector reduceCollector;
private transient Reporter reporter;
/**
* Maps a Hadoop Reducer (mapred API) to a non-combinable Flink GroupReduceFunction.
*
* @param hadoopReducer The Hadoop Reducer to wrap.
*/
public HadoopReduceFunction(Reducer hadoopReducer) {
this(hadoopReducer, new JobConf());
}
/**
* Maps a Hadoop Reducer (mapred API) to a non-combinable Flink GroupReduceFunction.
*
* @param hadoopReducer The Hadoop Reducer to wrap.
* @param conf The JobConf that is used to configure the Hadoop Reducer.
*/
public HadoopReduceFunction(
Reducer hadoopReducer, JobConf conf) {
if (hadoopReducer == null) {
throw new NullPointerException("Reducer may not be null.");
}
if (conf == null) {
throw new NullPointerException("JobConf may not be null.");
}
this.reducer = hadoopReducer;
this.jobConf = conf;
}
@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
this.reducer.configure(jobConf);
this.reporter = new HadoopDummyReporter();
this.reduceCollector = new HadoopOutputCollector();
Class inKeyClass =
(Class) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
TypeSerializer keySerializer =
TypeExtractor.getForClass(inKeyClass)
.createSerializer(getRuntimeContext().getExecutionConfig());
this.valueIterator = new HadoopTupleUnwrappingIterator(keySerializer);
}
@Override
public void reduce(
final Iterable> values,
final Collector> out)
throws Exception {
reduceCollector.setFlinkCollector(out);
valueIterator.set(values.iterator());
reducer.reduce(valueIterator.getCurrentKey(), valueIterator, reduceCollector, reporter);
}
@SuppressWarnings("unchecked")
@Override
public TypeInformation> getProducedType() {
Class outKeyClass =
(Class)
TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 2);
Class outValClass =
(Class)
TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 3);
final TypeInformation keyTypeInfo =
TypeExtractor.getForClass((Class) outKeyClass);
final TypeInformation valueTypleInfo =
TypeExtractor.getForClass((Class) outValClass);
return new TupleTypeInfo>(keyTypeInfo, valueTypleInfo);
}
/**
* Custom serialization methods.
*
* @see http://docs.oracle.com/javase/7/docs/api/java/io/Serializable.html
*/
private void writeObject(final ObjectOutputStream out) throws IOException {
out.writeObject(reducer.getClass());
jobConf.write(out);
}
@SuppressWarnings("unchecked")
private void readObject(final ObjectInputStream in) throws IOException, ClassNotFoundException {
Class> reducerClass =
(Class>) in.readObject();
reducer = InstantiationUtil.instantiate(reducerClass);
jobConf = new JobConf();
jobConf.readFields(in);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy