org.apache.flink.hadoopcompatibility.mapred.HadoopReduceCombineFunction Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.hadoopcompatibility.mapred;
import org.apache.flink.annotation.Public;
import org.apache.flink.api.common.functions.GroupCombineFunction;
import org.apache.flink.api.common.functions.RichGroupReduceFunction;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.hadoop.mapred.wrapper.HadoopDummyReporter;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.typeutils.ResultTypeQueryable;
import org.apache.flink.api.java.typeutils.TupleTypeInfo;
import org.apache.flink.api.java.typeutils.TypeExtractor;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.hadoopcompatibility.mapred.wrapper.HadoopOutputCollector;
import org.apache.flink.hadoopcompatibility.mapred.wrapper.HadoopTupleUnwrappingIterator;
import org.apache.flink.util.Collector;
import org.apache.flink.util.InstantiationUtil;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.Reducer;
import org.apache.hadoop.mapred.Reporter;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.io.Serializable;
/**
* This wrapper maps a Hadoop Reducer and Combiner (mapred API) to a combinable Flink
* GroupReduceFunction.
*/
@SuppressWarnings("rawtypes")
@Public
public final class HadoopReduceCombineFunction
extends RichGroupReduceFunction, Tuple2>
implements GroupCombineFunction, Tuple2>,
ResultTypeQueryable>,
Serializable {
private static final long serialVersionUID = 1L;
private transient Reducer reducer;
private transient Reducer combiner;
private transient JobConf jobConf;
private transient HadoopTupleUnwrappingIterator valueIterator;
private transient HadoopOutputCollector reduceCollector;
private transient HadoopOutputCollector combineCollector;
private transient Reporter reporter;
/**
* Maps two Hadoop Reducer (mapred API) to a combinable Flink GroupReduceFunction.
*
* @param hadoopReducer The Hadoop Reducer that is mapped to a GroupReduceFunction.
* @param hadoopCombiner The Hadoop Reducer that is mapped to the combiner function.
*/
public HadoopReduceCombineFunction(
Reducer hadoopReducer,
Reducer hadoopCombiner) {
this(hadoopReducer, hadoopCombiner, new JobConf());
}
/**
* Maps two Hadoop Reducer (mapred API) to a combinable Flink GroupReduceFunction.
*
* @param hadoopReducer The Hadoop Reducer that is mapped to a GroupReduceFunction.
* @param hadoopCombiner The Hadoop Reducer that is mapped to the combiner function.
* @param conf The JobConf that is used to configure both Hadoop Reducers.
*/
public HadoopReduceCombineFunction(
Reducer hadoopReducer,
Reducer hadoopCombiner,
JobConf conf) {
if (hadoopReducer == null) {
throw new NullPointerException("Reducer may not be null.");
}
if (hadoopCombiner == null) {
throw new NullPointerException("Combiner may not be null.");
}
if (conf == null) {
throw new NullPointerException("JobConf may not be null.");
}
this.reducer = hadoopReducer;
this.combiner = hadoopCombiner;
this.jobConf = conf;
}
@SuppressWarnings("unchecked")
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
this.reducer.configure(jobConf);
this.combiner.configure(jobConf);
this.reporter = new HadoopDummyReporter();
Class inKeyClass =
(Class) TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 0);
TypeSerializer keySerializer =
TypeExtractor.getForClass(inKeyClass)
.createSerializer(getRuntimeContext().getExecutionConfig());
this.valueIterator = new HadoopTupleUnwrappingIterator<>(keySerializer);
this.combineCollector = new HadoopOutputCollector<>();
this.reduceCollector = new HadoopOutputCollector<>();
}
@Override
public void reduce(
final Iterable> values,
final Collector> out)
throws Exception {
reduceCollector.setFlinkCollector(out);
valueIterator.set(values.iterator());
reducer.reduce(valueIterator.getCurrentKey(), valueIterator, reduceCollector, reporter);
}
@Override
public void combine(
final Iterable> values,
final Collector> out)
throws Exception {
combineCollector.setFlinkCollector(out);
valueIterator.set(values.iterator());
combiner.reduce(valueIterator.getCurrentKey(), valueIterator, combineCollector, reporter);
}
@SuppressWarnings("unchecked")
@Override
public TypeInformation> getProducedType() {
Class outKeyClass =
(Class)
TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 2);
Class outValClass =
(Class)
TypeExtractor.getParameterType(Reducer.class, reducer.getClass(), 3);
final TypeInformation keyTypeInfo = TypeExtractor.getForClass(outKeyClass);
final TypeInformation valueTypleInfo = TypeExtractor.getForClass(outValClass);
return new TupleTypeInfo<>(keyTypeInfo, valueTypleInfo);
}
/**
* Custom serialization methods.
*
* @see http://docs.oracle.com/javase/7/docs/api/java/io/Serializable.html
*/
private void writeObject(final ObjectOutputStream out) throws IOException {
out.writeObject(reducer.getClass());
out.writeObject(combiner.getClass());
jobConf.write(out);
}
@SuppressWarnings("unchecked")
private void readObject(final ObjectInputStream in) throws IOException, ClassNotFoundException {
Class> reducerClass =
(Class>) in.readObject();
reducer = InstantiationUtil.instantiate(reducerClass);
Class> combinerClass =
(Class>) in.readObject();
combiner = InstantiationUtil.instantiate(combinerClass);
jobConf = new JobConf();
jobConf.readFields(in);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy