eu.stratosphere.api.java.operators.ReduceOperator Maven / Gradle / Ivy
/***********************************************************************************************************************
*
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.api.java.operators;
import eu.stratosphere.api.common.functions.GenericMap;
import eu.stratosphere.api.common.functions.GenericReduce;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.UnaryOperatorInformation;
import eu.stratosphere.api.common.operators.base.MapOperatorBase;
import eu.stratosphere.api.common.operators.base.ReduceOperatorBase;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.functions.ReduceFunction;
import eu.stratosphere.api.java.operators.translation.KeyExtractingMapper;
import eu.stratosphere.api.java.operators.translation.KeyRemovingMapper;
import eu.stratosphere.api.java.operators.translation.PlanUnwrappingReduceOperator;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.api.java.typeutils.TupleTypeInfo;
import eu.stratosphere.types.TypeInformation;
/**
* This operator represents the application of a "reduce" function on a data set, and the
* result data set produced by the function.
*
* @param The type of the data set reduced by the operator.
*
* @see ReduceFunction
*/
public class ReduceOperator extends SingleInputUdfOperator> {
private final ReduceFunction function;
private final Grouping grouper;
/**
*
* This is the case for a reduce-all case (in contrast to the reduce-per-group case).
*
* @param input
* @param function
*/
public ReduceOperator(DataSet input, ReduceFunction function) {
super(input, input.getType());
if (function == null) {
throw new NullPointerException("Reduce function must not be null.");
}
this.function = function;
this.grouper = null;
extractSemanticAnnotationsFromUdf(function.getClass());
}
public ReduceOperator(Grouping input, ReduceFunction function) {
super(input.getDataSet(), input.getDataSet().getType());
if (function == null) {
throw new NullPointerException("Reduce function must not be null.");
}
this.function = function;
this.grouper = input;
extractSemanticAnnotationsFromUdf(function.getClass());
}
@Override
protected eu.stratosphere.api.common.operators.SingleInputOperator, IN, ?> translateToDataFlow(Operator input) {
String name = getName() != null ? getName() : function.getClass().getName();
// distinguish between grouped reduce and non-grouped reduce
if (grouper == null) {
// non grouped reduce
UnaryOperatorInformation operatorInfo = new UnaryOperatorInformation(getInputType(), getInputType());
ReduceOperatorBase> po =
new ReduceOperatorBase>(function, operatorInfo, new int[0], name);
// set input
po.setInput(input);
// the degree of parallelism for a non grouped reduce can only be 1
po.setDegreeOfParallelism(1);
return po;
}
if (grouper.getKeys() instanceof Keys.SelectorFunctionKeys) {
// reduce with key selector function
@SuppressWarnings("unchecked")
Keys.SelectorFunctionKeys selectorKeys = (Keys.SelectorFunctionKeys) grouper.getKeys();
MapOperatorBase, IN, ?> po = translateSelectorFunctionReducer(selectorKeys, function, getInputType(), name, input, this.getParallelism());
return po;
}
else if (grouper.getKeys() instanceof Keys.FieldPositionKeys) {
// reduce with field positions
int[] logicalKeyPositions = grouper.getKeys().computeLogicalKeyPositions();
UnaryOperatorInformation operatorInfo = new UnaryOperatorInformation(getInputType(), getInputType());
ReduceOperatorBase> po =
new ReduceOperatorBase>(function, operatorInfo, logicalKeyPositions, name);
// set input
po.setInput(input);
// set dop
po.setDegreeOfParallelism(this.getParallelism());
return po;
}
else {
throw new UnsupportedOperationException("Unrecognized key type.");
}
}
// --------------------------------------------------------------------------------------------
private static MapOperatorBase, T, ?> translateSelectorFunctionReducer(Keys.SelectorFunctionKeys rawKeys,
ReduceFunction function, TypeInformation inputType, String name, Operator input, int dop)
{
@SuppressWarnings("unchecked")
final Keys.SelectorFunctionKeys keys = (Keys.SelectorFunctionKeys) rawKeys;
TypeInformation> typeInfoWithKey = new TupleTypeInfo>(keys.getKeyType(), inputType);
KeyExtractingMapper extractor = new KeyExtractingMapper(keys.getKeyExtractor());
PlanUnwrappingReduceOperator reducer = new PlanUnwrappingReduceOperator(function, keys, name, inputType, typeInfoWithKey);
MapOperatorBase, GenericMap>> keyExtractingMap = new MapOperatorBase, GenericMap>>(extractor, new UnaryOperatorInformation>(inputType, typeInfoWithKey), "Key Extractor");
MapOperatorBase, T, GenericMap, T>> keyRemovingMap = new MapOperatorBase, T, GenericMap, T>>(new KeyRemovingMapper(), new UnaryOperatorInformation, T>(typeInfoWithKey, inputType), "Key Extractor");
keyExtractingMap.setInput(input);
reducer.setInput(keyExtractingMap);
keyRemovingMap.setInput(reducer);
// set dop
keyExtractingMap.setDegreeOfParallelism(input.getDegreeOfParallelism());
reducer.setDegreeOfParallelism(dop);
keyRemovingMap.setDegreeOfParallelism(dop);
return keyRemovingMap;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy