eu.stratosphere.api.java.operators.ReduceGroupOperator Maven / Gradle / Ivy
/***********************************************************************************************************************
*
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.api.java.operators;
import eu.stratosphere.api.common.functions.GenericCombine;
import eu.stratosphere.api.common.functions.GenericGroupReduce;
import eu.stratosphere.api.common.functions.GenericMap;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.Order;
import eu.stratosphere.api.common.operators.Ordering;
import eu.stratosphere.api.common.operators.UnaryOperatorInformation;
import eu.stratosphere.api.common.operators.base.GroupReduceOperatorBase;
import eu.stratosphere.api.common.operators.base.MapOperatorBase;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.functions.GroupReduceFunction;
import eu.stratosphere.api.java.functions.GroupReduceFunction.Combinable;
import eu.stratosphere.api.java.operators.translation.KeyExtractingMapper;
import eu.stratosphere.api.java.operators.translation.PlanUnwrappingReduceGroupOperator;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.api.java.typeutils.TupleTypeInfo;
import eu.stratosphere.api.java.typeutils.TypeExtractor;
import eu.stratosphere.types.TypeInformation;
/**
* This operator represents the application of a "reduceGroup" function on a data set, and the
* result data set produced by the function.
*
* @param The type of the data set consumed by the operator.
* @param The type of the data set created by the operator.
*/
public class ReduceGroupOperator extends SingleInputUdfOperator> {
private final GroupReduceFunction function;
private final Grouping grouper;
private boolean combinable;
/**
* Constructor for a non-grouped reduce (all reduce).
*
* @param input The input data set to the groupReduce function.
* @param function The user-defined GroupReduce function.
*/
public ReduceGroupOperator(DataSet input, GroupReduceFunction function) {
super(input, TypeExtractor.getGroupReduceReturnTypes(function, input.getType()));
if (function == null) {
throw new NullPointerException("GroupReduce function must not be null.");
}
this.function = function;
this.grouper = null;
checkCombinability();
}
/**
* Constructor for a grouped reduce.
*
* @param input The grouped input to be processed group-wise by the groupReduce function.
* @param function The user-defined GroupReduce function.
*/
public ReduceGroupOperator(Grouping input, GroupReduceFunction function) {
super(input != null ? input.getDataSet() : null, TypeExtractor.getGroupReduceReturnTypes(function, input.getDataSet().getType()));
if (function == null) {
throw new NullPointerException("GroupReduce function must not be null.");
}
this.function = function;
this.grouper = input;
checkCombinability();
extractSemanticAnnotationsFromUdf(function.getClass());
}
private void checkCombinability() {
if (function instanceof GenericCombine && function.getClass().getAnnotation(Combinable.class) != null) {
this.combinable = true;
}
}
// --------------------------------------------------------------------------------------------
// Properties
// --------------------------------------------------------------------------------------------
public boolean isCombinable() {
return combinable;
}
public void setCombinable(boolean combinable) {
// sanity check that the function is a subclass of the combine interface
if (combinable && !(function instanceof GenericCombine)) {
throw new IllegalArgumentException("The function does not implement the combine interface.");
}
this.combinable = combinable;
}
@Override
protected eu.stratosphere.api.common.operators.base.GroupReduceOperatorBase, OUT, ?> translateToDataFlow(Operator input) {
String name = getName() != null ? getName() : function.getClass().getName();
// distinguish between grouped reduce and non-grouped reduce
if (grouper == null) {
// non grouped reduce
UnaryOperatorInformation operatorInfo = new UnaryOperatorInformation(getInputType(), getResultType());
GroupReduceOperatorBase> po =
new GroupReduceOperatorBase>(function, operatorInfo, new int[0], name);
po.setCombinable(combinable);
// set input
po.setInput(input);
// the degree of parallelism for a non grouped reduce can only be 1
po.setDegreeOfParallelism(1);
return po;
}
if (grouper.getKeys() instanceof Keys.SelectorFunctionKeys) {
@SuppressWarnings("unchecked")
Keys.SelectorFunctionKeys selectorKeys = (Keys.SelectorFunctionKeys) grouper.getKeys();
PlanUnwrappingReduceGroupOperator po = translateSelectorFunctionReducer(
selectorKeys, function, getInputType(), getResultType(), name, input, isCombinable());
po.setDegreeOfParallelism(this.getParallelism());
return po;
}
else if (grouper.getKeys() instanceof Keys.FieldPositionKeys) {
int[] logicalKeyPositions = grouper.getKeys().computeLogicalKeyPositions();
UnaryOperatorInformation operatorInfo = new UnaryOperatorInformation(getInputType(), getResultType());
GroupReduceOperatorBase> po =
new GroupReduceOperatorBase>(function, operatorInfo, logicalKeyPositions, name);
po.setCombinable(combinable);
po.setInput(input);
po.setDegreeOfParallelism(this.getParallelism());
// set group order
if (grouper instanceof SortedGrouping) {
SortedGrouping sortedGrouper = (SortedGrouping) grouper;
int[] sortKeyPositions = sortedGrouper.getGroupSortKeyPositions();
Order[] sortOrders = sortedGrouper.getGroupSortOrders();
Ordering o = new Ordering();
for(int i=0; i < sortKeyPositions.length; i++) {
o.appendOrdering(sortKeyPositions[i], null, sortOrders[i]);
}
po.setGroupOrder(o);
}
return po;
}
else {
throw new UnsupportedOperationException("Unrecognized key type.");
}
}
// --------------------------------------------------------------------------------------------
private static PlanUnwrappingReduceGroupOperator translateSelectorFunctionReducer(
Keys.SelectorFunctionKeys rawKeys, GroupReduceFunction function,
TypeInformation inputType, TypeInformation outputType, String name, Operator input,
boolean combinable)
{
@SuppressWarnings("unchecked")
final Keys.SelectorFunctionKeys keys = (Keys.SelectorFunctionKeys) rawKeys;
TypeInformation> typeInfoWithKey = new TupleTypeInfo>(keys.getKeyType(), inputType);
KeyExtractingMapper extractor = new KeyExtractingMapper(keys.getKeyExtractor());
PlanUnwrappingReduceGroupOperator reducer = new PlanUnwrappingReduceGroupOperator(function, keys, name, outputType, typeInfoWithKey, combinable);
MapOperatorBase, GenericMap>> mapper = new MapOperatorBase, GenericMap>>(extractor, new UnaryOperatorInformation>(inputType, typeInfoWithKey), "Key Extractor");
reducer.setInput(mapper);
mapper.setInput(input);
// set the mapper's parallelism to the input parallelism to make sure it is chained
mapper.setDegreeOfParallelism(input.getDegreeOfParallelism());
return reducer;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy