All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.api.java.operators.ReduceOperator Maven / Gradle / Ivy

There is a newer version: 0.5.2-hadoop2
Show newest version
/***********************************************************************************************************************
 *
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 *
 **********************************************************************************************************************/
package eu.stratosphere.api.java.operators;

import eu.stratosphere.api.common.functions.GenericMap;
import eu.stratosphere.api.common.functions.GenericReduce;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.UnaryOperatorInformation;
import eu.stratosphere.api.common.operators.base.MapOperatorBase;
import eu.stratosphere.api.common.operators.base.ReduceOperatorBase;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.functions.ReduceFunction;
import eu.stratosphere.api.java.operators.translation.KeyExtractingMapper;
import eu.stratosphere.api.java.operators.translation.KeyRemovingMapper;
import eu.stratosphere.api.java.operators.translation.PlanUnwrappingReduceOperator;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.api.java.typeutils.TupleTypeInfo;
import eu.stratosphere.types.TypeInformation;

/**
 * This operator represents the application of a "reduce" function on a data set, and the
 * result data set produced by the function.
 * 
 * @param  The type of the data set reduced by the operator.
 * 
 * @see ReduceFunction
 */
public class ReduceOperator extends SingleInputUdfOperator> {
	
	private final ReduceFunction function;
	
	private final Grouping grouper;
	
	/**
	 * 
	 * This is the case for a reduce-all case (in contrast to the reduce-per-group case).
	 * 
	 * @param input
	 * @param function
	 */
	public ReduceOperator(DataSet input, ReduceFunction function) {
		super(input, input.getType());
		
		if (function == null) {
			throw new NullPointerException("Reduce function must not be null.");
		}
		
		this.function = function;
		this.grouper = null;
		
		extractSemanticAnnotationsFromUdf(function.getClass());
	}
	
	
	public ReduceOperator(Grouping input, ReduceFunction function) {
		super(input.getDataSet(), input.getDataSet().getType());
		
		if (function == null) {
			throw new NullPointerException("Reduce function must not be null.");
		}
		
		this.function = function;
		this.grouper = input;
		
		extractSemanticAnnotationsFromUdf(function.getClass());
	}

	@Override
	protected eu.stratosphere.api.common.operators.SingleInputOperator translateToDataFlow(Operator input) {
		
		String name = getName() != null ? getName() : function.getClass().getName();
		
		// distinguish between grouped reduce and non-grouped reduce
		if (grouper == null) {
			// non grouped reduce
			UnaryOperatorInformation operatorInfo = new UnaryOperatorInformation(getInputType(), getInputType());
			ReduceOperatorBase> po =
					new ReduceOperatorBase>(function, operatorInfo, new int[0], name);
			// set input
			po.setInput(input);
			
			// the degree of parallelism for a non grouped reduce can only be 1
			po.setDegreeOfParallelism(1);
			
			return po;
		}
		
		if (grouper.getKeys() instanceof Keys.SelectorFunctionKeys) {
			
			// reduce with key selector function
			@SuppressWarnings("unchecked")
			Keys.SelectorFunctionKeys selectorKeys = (Keys.SelectorFunctionKeys) grouper.getKeys();
			
			MapOperatorBase po = translateSelectorFunctionReducer(selectorKeys, function, getInputType(), name, input, this.getParallelism());
			return po;
		}
		else if (grouper.getKeys() instanceof Keys.FieldPositionKeys) {
			
			// reduce with field positions
			int[] logicalKeyPositions = grouper.getKeys().computeLogicalKeyPositions();
			UnaryOperatorInformation operatorInfo = new UnaryOperatorInformation(getInputType(), getInputType());
			ReduceOperatorBase> po =
					new ReduceOperatorBase>(function, operatorInfo, logicalKeyPositions, name);
			
			// set input
			po.setInput(input);
			// set dop
			po.setDegreeOfParallelism(this.getParallelism());
			
			return po;
		}
		else {
			throw new UnsupportedOperationException("Unrecognized key type.");
		}
		
	}
	
	// --------------------------------------------------------------------------------------------
	
	private static  MapOperatorBase, T, ?> translateSelectorFunctionReducer(Keys.SelectorFunctionKeys rawKeys,
			ReduceFunction function, TypeInformation inputType, String name, Operator input, int dop)
	{
		@SuppressWarnings("unchecked")
		final Keys.SelectorFunctionKeys keys = (Keys.SelectorFunctionKeys) rawKeys;
		
		TypeInformation> typeInfoWithKey = new TupleTypeInfo>(keys.getKeyType(), inputType);
		
		KeyExtractingMapper extractor = new KeyExtractingMapper(keys.getKeyExtractor());
		
		PlanUnwrappingReduceOperator reducer = new PlanUnwrappingReduceOperator(function, keys, name, inputType, typeInfoWithKey);
		
		MapOperatorBase, GenericMap>> keyExtractingMap = new MapOperatorBase, GenericMap>>(extractor, new UnaryOperatorInformation>(inputType, typeInfoWithKey), "Key Extractor");
		MapOperatorBase, T, GenericMap, T>> keyRemovingMap = new MapOperatorBase, T, GenericMap, T>>(new KeyRemovingMapper(), new UnaryOperatorInformation, T>(typeInfoWithKey, inputType), "Key Extractor");

		keyExtractingMap.setInput(input);
		reducer.setInput(keyExtractingMap);
		keyRemovingMap.setInput(reducer);
		
		// set dop
		keyExtractingMap.setDegreeOfParallelism(input.getDegreeOfParallelism());
		reducer.setDegreeOfParallelism(dop);
		keyRemovingMap.setDegreeOfParallelism(dop);
		
		return keyRemovingMap;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy