All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.api.java.operators.ReduceGroupOperator Maven / Gradle / Ivy

There is a newer version: 0.5.2-hadoop2
Show newest version
/***********************************************************************************************************************
 *
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 *
 **********************************************************************************************************************/
package eu.stratosphere.api.java.operators;

import eu.stratosphere.api.common.functions.GenericCombine;
import eu.stratosphere.api.common.functions.GenericGroupReduce;
import eu.stratosphere.api.common.functions.GenericMap;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.Order;
import eu.stratosphere.api.common.operators.Ordering;
import eu.stratosphere.api.common.operators.UnaryOperatorInformation;
import eu.stratosphere.api.common.operators.base.GroupReduceOperatorBase;
import eu.stratosphere.api.common.operators.base.MapOperatorBase;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.functions.GroupReduceFunction;
import eu.stratosphere.api.java.functions.GroupReduceFunction.Combinable;
import eu.stratosphere.api.java.operators.translation.KeyExtractingMapper;
import eu.stratosphere.api.java.operators.translation.PlanUnwrappingReduceGroupOperator;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.api.java.typeutils.TupleTypeInfo;
import eu.stratosphere.api.java.typeutils.TypeExtractor;
import eu.stratosphere.types.TypeInformation;

/**
 * This operator represents the application of a "reduceGroup" function on a data set, and the
 * result data set produced by the function.
 * 
 * @param  The type of the data set consumed by the operator.
 * @param  The type of the data set created by the operator.
 */
public class ReduceGroupOperator extends SingleInputUdfOperator> {
	
	private final GroupReduceFunction function;
	
	private final Grouping grouper;
	
	private boolean combinable;
	
	
	/**
	 * Constructor for a non-grouped reduce (all reduce).
	 * 
	 * @param input The input data set to the groupReduce function.
	 * @param function The user-defined GroupReduce function.
	 */
	public ReduceGroupOperator(DataSet input, GroupReduceFunction function) {
		super(input, TypeExtractor.getGroupReduceReturnTypes(function, input.getType()));
		
		if (function == null) {
			throw new NullPointerException("GroupReduce function must not be null.");
		}
		
		this.function = function;
		this.grouper = null;
		checkCombinability();
	}
	
	/**
	 * Constructor for a grouped reduce.
	 * 
	 * @param input The grouped input to be processed group-wise by the groupReduce function.
	 * @param function The user-defined GroupReduce function.
	 */
	public ReduceGroupOperator(Grouping input, GroupReduceFunction function) {
		super(input != null ? input.getDataSet() : null, TypeExtractor.getGroupReduceReturnTypes(function, input.getDataSet().getType()));
		
		if (function == null) {
			throw new NullPointerException("GroupReduce function must not be null.");
		}
		
		this.function = function;
		this.grouper = input;
		checkCombinability();
		
		extractSemanticAnnotationsFromUdf(function.getClass());
	}
	
	private void checkCombinability() {
		if (function instanceof GenericCombine && function.getClass().getAnnotation(Combinable.class) != null) {
			this.combinable = true;
		}
	}
	
	// --------------------------------------------------------------------------------------------
	//  Properties
	// --------------------------------------------------------------------------------------------
	
	public boolean isCombinable() {
		return combinable;
	}
	
	public void setCombinable(boolean combinable) {
		// sanity check that the function is a subclass of the combine interface
		if (combinable && !(function instanceof GenericCombine)) {
			throw new IllegalArgumentException("The function does not implement the combine interface.");
		}
		
		this.combinable = combinable;
	}
	
	@Override
	protected eu.stratosphere.api.common.operators.base.GroupReduceOperatorBase translateToDataFlow(Operator input) {
		
		String name = getName() != null ? getName() : function.getClass().getName();
		
		// distinguish between grouped reduce and non-grouped reduce
		if (grouper == null) {
			// non grouped reduce
			UnaryOperatorInformation operatorInfo = new UnaryOperatorInformation(getInputType(), getResultType());
			GroupReduceOperatorBase> po =
					new GroupReduceOperatorBase>(function, operatorInfo, new int[0], name);

			po.setCombinable(combinable);
			// set input
			po.setInput(input);
			// the degree of parallelism for a non grouped reduce can only be 1
			po.setDegreeOfParallelism(1);
			return po;
		}
	
		if (grouper.getKeys() instanceof Keys.SelectorFunctionKeys) {
		
			@SuppressWarnings("unchecked")
			Keys.SelectorFunctionKeys selectorKeys = (Keys.SelectorFunctionKeys) grouper.getKeys();
			
			PlanUnwrappingReduceGroupOperator po = translateSelectorFunctionReducer(
							selectorKeys, function, getInputType(), getResultType(), name, input, isCombinable());
			
			po.setDegreeOfParallelism(this.getParallelism());
			
			return po;
		}
		else if (grouper.getKeys() instanceof Keys.FieldPositionKeys) {

			int[] logicalKeyPositions = grouper.getKeys().computeLogicalKeyPositions();
			UnaryOperatorInformation operatorInfo = new UnaryOperatorInformation(getInputType(), getResultType());
			GroupReduceOperatorBase> po =
					new GroupReduceOperatorBase>(function, operatorInfo, logicalKeyPositions, name);

			po.setCombinable(combinable);
			po.setInput(input);
			po.setDegreeOfParallelism(this.getParallelism());
			
			// set group order
			if (grouper instanceof SortedGrouping) {
				SortedGrouping sortedGrouper = (SortedGrouping) grouper;
								
				int[] sortKeyPositions = sortedGrouper.getGroupSortKeyPositions();
				Order[] sortOrders = sortedGrouper.getGroupSortOrders();
				
				Ordering o = new Ordering();
				for(int i=0; i < sortKeyPositions.length; i++) {
					o.appendOrdering(sortKeyPositions[i], null, sortOrders[i]);
				}
				po.setGroupOrder(o);
			}
			
			return po;
		}
		else {
			throw new UnsupportedOperationException("Unrecognized key type.");
		}
		
	}
	
	
	// --------------------------------------------------------------------------------------------
	
	private static  PlanUnwrappingReduceGroupOperator translateSelectorFunctionReducer(
			Keys.SelectorFunctionKeys rawKeys, GroupReduceFunction function,
			TypeInformation inputType, TypeInformation outputType, String name, Operator input,
			boolean combinable)
	{
		@SuppressWarnings("unchecked")
		final Keys.SelectorFunctionKeys keys = (Keys.SelectorFunctionKeys) rawKeys;
		
		TypeInformation> typeInfoWithKey = new TupleTypeInfo>(keys.getKeyType(), inputType);
		
		KeyExtractingMapper extractor = new KeyExtractingMapper(keys.getKeyExtractor());
		
		PlanUnwrappingReduceGroupOperator reducer = new PlanUnwrappingReduceGroupOperator(function, keys, name, outputType, typeInfoWithKey, combinable);
		
		MapOperatorBase, GenericMap>> mapper = new MapOperatorBase, GenericMap>>(extractor, new UnaryOperatorInformation>(inputType, typeInfoWithKey), "Key Extractor");

		reducer.setInput(mapper);
		mapper.setInput(input);
		
		// set the mapper's parallelism to the input parallelism to make sure it is chained
		mapper.setDegreeOfParallelism(input.getDegreeOfParallelism());
		
		return reducer;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy