All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.api.java.operators.CoGroupOperator Maven / Gradle / Ivy

There is a newer version: 0.5.2-hadoop2
Show newest version
/***********************************************************************************************************************
 *
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 *
 **********************************************************************************************************************/
package eu.stratosphere.api.java.operators;

import java.security.InvalidParameterException;

import eu.stratosphere.api.common.InvalidProgramException;
import eu.stratosphere.api.common.functions.GenericCoGrouper;
import eu.stratosphere.api.common.functions.GenericMap;
import eu.stratosphere.api.common.operators.BinaryOperatorInformation;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.UnaryOperatorInformation;
import eu.stratosphere.api.common.operators.base.CoGroupOperatorBase;
import eu.stratosphere.api.common.operators.base.MapOperatorBase;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.functions.CoGroupFunction;
import eu.stratosphere.api.java.functions.KeySelector;
import eu.stratosphere.api.java.operators.translation.KeyExtractingMapper;
import eu.stratosphere.api.java.operators.translation.PlanUnwrappingCoGroupOperator;
import eu.stratosphere.api.java.operators.translation.TupleKeyExtractingMapper;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.api.java.typeutils.TupleTypeInfo;
import eu.stratosphere.api.java.typeutils.TypeExtractor;
import eu.stratosphere.types.TypeInformation;

/**
 * A {@link DataSet} that is the result of a CoGroup transformation. 
 * 
 * @param  The type of the first input DataSet of the CoGroup transformation.
 * @param  The type of the second input DataSet of the CoGroup transformation.
 * @param  The type of the result of the CoGroup transformation.
 * 
 * @see DataSet
 */
public class CoGroupOperator extends TwoInputUdfOperator> {

	private final CoGroupFunction function;

	private final Keys keys1;
	private final Keys keys2;


	protected CoGroupOperator(DataSet input1, DataSet input2,
							Keys keys1, Keys keys2,
							CoGroupFunction function,
							TypeInformation returnType)
	{
		super(input1, input2, returnType);

		this.function = function;

		if (keys1 == null || keys2 == null) {
			throw new NullPointerException();
		}

		this.keys1 = keys1;
		this.keys2 = keys2;
		
		extractSemanticAnnotationsFromUdf(function.getClass());
	}

	protected Keys getKeys1() {
		return this.keys1;
	}

	protected Keys getKeys2() {
		return this.keys2;
	}

	@Override
	protected eu.stratosphere.api.common.operators.base.CoGroupOperatorBase translateToDataFlow(Operator input1, Operator input2) {
		
		String name = getName() != null ? getName() : function.getClass().getName();

		if (keys1 instanceof Keys.SelectorFunctionKeys
				&& keys2 instanceof Keys.SelectorFunctionKeys
				&& keys1.areCompatibale(keys2)) {

			@SuppressWarnings("unchecked")
			Keys.SelectorFunctionKeys selectorKeys1 = (Keys.SelectorFunctionKeys) keys1;
			@SuppressWarnings("unchecked")
			Keys.SelectorFunctionKeys selectorKeys2 = (Keys.SelectorFunctionKeys) keys2;

			PlanUnwrappingCoGroupOperator po =
					translateSelectorFunctionCoGroup(selectorKeys1, selectorKeys2, function,
					getInput1Type(), getInput2Type(), getResultType(), name, input1, input2);

			// set dop
			po.setDegreeOfParallelism(this.getParallelism());

			return po;

		}
		else if (keys1 instanceof Keys.FieldPositionKeys
				&& keys2 instanceof Keys.FieldPositionKeys
				&& keys1.areCompatibale(keys2)
			) {

			int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();
			int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();
			
			CoGroupOperatorBase> po =
					new CoGroupOperatorBase>(
							function, new BinaryOperatorInformation(getInput1Type(), getInput2Type(), getResultType()),
							logicalKeyPositions1, logicalKeyPositions2, name);
			
			// set inputs
			po.setFirstInput(input1);
			po.setSecondInput(input2);

			// set dop
			po.setDegreeOfParallelism(this.getParallelism());

			return po;

		}
		else if (keys1 instanceof Keys.FieldPositionKeys
				&& keys2 instanceof Keys.SelectorFunctionKeys
				&& keys1.areCompatibale(keys2)
			) {

			int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();

			@SuppressWarnings("unchecked")
			Keys.SelectorFunctionKeys selectorKeys2 = (Keys.SelectorFunctionKeys) keys2;

			PlanUnwrappingCoGroupOperator po =
					translateSelectorFunctionCoGroupRight(logicalKeyPositions1, selectorKeys2, function,
					getInput1Type(), getInput2Type(), getResultType(), name, input1, input2);

			// set dop
			po.setDegreeOfParallelism(this.getParallelism());

			return po;
		}
		else if (keys1 instanceof Keys.SelectorFunctionKeys
				&& keys2 instanceof Keys.FieldPositionKeys
				&& keys1.areCompatibale(keys2)
			) {

			@SuppressWarnings("unchecked")
			Keys.SelectorFunctionKeys selectorKeys1 = (Keys.SelectorFunctionKeys) keys1;

			int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();

			PlanUnwrappingCoGroupOperator po =
					translateSelectorFunctionCoGroupLeft(selectorKeys1, logicalKeyPositions2, function,
					getInput1Type(), getInput2Type(), getResultType(), name, input1, input2);

			// set dop
			po.setDegreeOfParallelism(this.getParallelism());

			return po;
		}
		else {
			throw new UnsupportedOperationException("Unrecognized or incompatible key types.");
		}
	}


	private static  PlanUnwrappingCoGroupOperator translateSelectorFunctionCoGroup(
			Keys.SelectorFunctionKeys rawKeys1, Keys.SelectorFunctionKeys rawKeys2,
			CoGroupFunction function,
			TypeInformation inputType1, TypeInformation inputType2, TypeInformation outputType, String name,
			Operator input1, Operator input2)
	{
		@SuppressWarnings("unchecked")
		final Keys.SelectorFunctionKeys keys1 = (Keys.SelectorFunctionKeys) rawKeys1;
		@SuppressWarnings("unchecked")
		final Keys.SelectorFunctionKeys keys2 = (Keys.SelectorFunctionKeys) rawKeys2;

		final TypeInformation> typeInfoWithKey1 = new TupleTypeInfo>(keys1.getKeyType(), inputType1);
		final TypeInformation> typeInfoWithKey2 = new TupleTypeInfo>(keys2.getKeyType(), inputType2);

		final KeyExtractingMapper extractor1 = new KeyExtractingMapper(keys1.getKeyExtractor());
		final KeyExtractingMapper extractor2 = new KeyExtractingMapper(keys2.getKeyExtractor());
		
		final MapOperatorBase, GenericMap>> keyMapper1 =
				new MapOperatorBase, GenericMap>>(extractor1, new UnaryOperatorInformation>(inputType1, typeInfoWithKey1), "Key Extractor 1");
		final MapOperatorBase, GenericMap>> keyMapper2 =
				new MapOperatorBase, GenericMap>>(extractor2, new UnaryOperatorInformation>(inputType2, typeInfoWithKey2), "Key Extractor 2");
		final PlanUnwrappingCoGroupOperator cogroup = new PlanUnwrappingCoGroupOperator(function, keys1, keys2, name, outputType, typeInfoWithKey1, typeInfoWithKey2);

		cogroup.setFirstInput(keyMapper1);
		cogroup.setSecondInput(keyMapper2);

		keyMapper1.setInput(input1);
		keyMapper2.setInput(input2);
		// set dop
		keyMapper1.setDegreeOfParallelism(input1.getDegreeOfParallelism());
		keyMapper2.setDegreeOfParallelism(input2.getDegreeOfParallelism());

		return cogroup;
	}

	private static  PlanUnwrappingCoGroupOperator translateSelectorFunctionCoGroupRight(
			int[] logicalKeyPositions1, Keys.SelectorFunctionKeys rawKeys2,
			CoGroupFunction function,
			TypeInformation inputType1, TypeInformation inputType2, TypeInformation outputType, String name,
			Operator input1, Operator input2)
	{
		if(!inputType1.isTupleType()) {
			throw new InvalidParameterException("Should not happen.");
		}

		@SuppressWarnings("unchecked")
		final Keys.SelectorFunctionKeys keys2 = (Keys.SelectorFunctionKeys) rawKeys2;

		final TypeInformation> typeInfoWithKey1 = new TupleTypeInfo>(keys2.getKeyType(), inputType1); // assume same key, checked by Key.areCompatibale() before
		final TypeInformation> typeInfoWithKey2 = new TupleTypeInfo>(keys2.getKeyType(), inputType2);

		final TupleKeyExtractingMapper extractor1 = new TupleKeyExtractingMapper(logicalKeyPositions1[0]);
		final KeyExtractingMapper extractor2 = new KeyExtractingMapper(keys2.getKeyExtractor());

		final MapOperatorBase, GenericMap>> keyMapper1 =
				new MapOperatorBase, GenericMap>>(extractor1, new UnaryOperatorInformation>(inputType1, typeInfoWithKey1), "Key Extractor 1");
		final MapOperatorBase, GenericMap>> keyMapper2 =
				new MapOperatorBase, GenericMap>>(extractor2, new UnaryOperatorInformation>(inputType2, typeInfoWithKey2), "Key Extractor 2");
		
		final PlanUnwrappingCoGroupOperator cogroup = new PlanUnwrappingCoGroupOperator(function, logicalKeyPositions1, keys2, name, outputType, typeInfoWithKey1, typeInfoWithKey2);

		cogroup.setFirstInput(keyMapper1);
		cogroup.setSecondInput(keyMapper2);

		keyMapper1.setInput(input1);
		keyMapper2.setInput(input2);
		// set dop
		keyMapper1.setDegreeOfParallelism(input1.getDegreeOfParallelism());
		keyMapper2.setDegreeOfParallelism(input2.getDegreeOfParallelism());

		return cogroup;
	}

	private static  PlanUnwrappingCoGroupOperator translateSelectorFunctionCoGroupLeft(
			Keys.SelectorFunctionKeys rawKeys1, int[] logicalKeyPositions2,
			CoGroupFunction function,
			TypeInformation inputType1, TypeInformation inputType2, TypeInformation outputType, String name,
			Operator input1, Operator input2)
	{
		if(!inputType2.isTupleType()) {
			throw new InvalidParameterException("Should not happen.");
		}

		@SuppressWarnings("unchecked")
		final Keys.SelectorFunctionKeys keys1 = (Keys.SelectorFunctionKeys) rawKeys1;

		final TypeInformation> typeInfoWithKey1 = new TupleTypeInfo>(keys1.getKeyType(), inputType1); // assume same key, checked by Key.areCompatibale() before
		final TypeInformation> typeInfoWithKey2 = new TupleTypeInfo>(keys1.getKeyType(), inputType2);

		final KeyExtractingMapper extractor1 = new KeyExtractingMapper(keys1.getKeyExtractor());
		final TupleKeyExtractingMapper extractor2 = new TupleKeyExtractingMapper(logicalKeyPositions2[0]);

		final MapOperatorBase, GenericMap>> keyMapper1 =
				new MapOperatorBase, GenericMap>>(extractor1, new UnaryOperatorInformation>(inputType1, typeInfoWithKey1), "Key Extractor 1");
		final MapOperatorBase, GenericMap>> keyMapper2 =
				new MapOperatorBase, GenericMap>>(extractor2, new UnaryOperatorInformation>(inputType2, typeInfoWithKey2), "Key Extractor 2");
		
		final PlanUnwrappingCoGroupOperator cogroup = new PlanUnwrappingCoGroupOperator(function, keys1, logicalKeyPositions2, name, outputType, typeInfoWithKey1, typeInfoWithKey2);

		cogroup.setFirstInput(keyMapper1);
		cogroup.setSecondInput(keyMapper2);

		keyMapper1.setInput(input1);
		keyMapper2.setInput(input2);
		// set dop
		keyMapper1.setDegreeOfParallelism(input1.getDegreeOfParallelism());
		keyMapper2.setDegreeOfParallelism(input2.getDegreeOfParallelism());

		return cogroup;
	}

	// --------------------------------------------------------------------------------------------
	// Builder classes for incremental construction
	// --------------------------------------------------------------------------------------------

	/**
	 * Intermediate step of a CoGroup transformation. 
* To continue the CoGroup transformation, select the grouping key of the first input {@link DataSet} by calling * {@link CoGroupOperatorSets#where(int...)} or {@link CoGroupOperatorSets#where(KeySelector)}. * * @param The type of the first input DataSet of the CoGroup transformation. * @param The type of the second input DataSet of the CoGroup transformation. */ public static final class CoGroupOperatorSets { private final DataSet input1; private final DataSet input2; public CoGroupOperatorSets(DataSet input1, DataSet input2) { if (input1 == null || input2 == null) { throw new NullPointerException(); } this.input1 = input1; this.input2 = input2; } /** * Continues a CoGroup transformation.
* Defines the {@link Tuple} fields of the first co-grouped {@link DataSet} that should be used as grouping keys.
* Note: Fields can only be selected as grouping keys on Tuple DataSets.
* * @param fields The indexes of the Tuple fields of the first co-grouped DataSets that should be used as keys. * @return An incomplete CoGroup transformation. * Call {@link CoGroupOperatorSetsPredicate#equalTo()} to continue the CoGroup. * * @see Tuple * @see DataSet */ public CoGroupOperatorSetsPredicate where(int... fields) { return new CoGroupOperatorSetsPredicate(new Keys.FieldPositionKeys(fields, input1.getType())); } /** * Continues a CoGroup transformation and defines a {@link KeySelector} function for the first co-grouped {@link DataSet}.
* The KeySelector function is called for each element of the first DataSet and extracts a single * key value on which the DataSet is grouped.
* * @param keySelector The KeySelector function which extracts the key values from the DataSet on which it is grouped. * @return An incomplete CoGroup transformation. * Call {@link CoGroupOperatorSetsPredicate#equalTo()} to continue the CoGroup. * * @see KeySelector * @see DataSet */ public CoGroupOperatorSetsPredicate where(KeySelector keyExtractor) { return new CoGroupOperatorSetsPredicate(new Keys.SelectorFunctionKeys(keyExtractor, input1.getType())); } // ---------------------------------------------------------------------------------------- /** * Intermediate step of a CoGroup transformation.
* To continue the CoGroup transformation, select the grouping key of the second input {@link DataSet} by calling * {@link CoGroupOperatorSetsPredicate#equalTo(int...)} or {@link CoGroupOperatorSetsPredicate#equalTo(KeySelector)}. * */ public final class CoGroupOperatorSetsPredicate { private final Keys keys1; private CoGroupOperatorSetsPredicate(Keys keys1) { if (keys1 == null) { throw new NullPointerException(); } if (keys1.isEmpty()) { throw new InvalidProgramException("The join keys must not be empty."); } this.keys1 = keys1; } /** * Continues a CoGroup transformation and defines the {@link Tuple} fields of the second co-grouped * {@link DataSet} that should be used as grouping keys.
* Note: Fields can only be selected as grouping keys on Tuple DataSets.
* * @param fields The indexes of the Tuple fields of the second co-grouped DataSet that should be used as keys. * @return An incomplete CoGroup transformation. * Call {@link CoGroupOperatorWithoutFunction#with(CoGroupFunction))} to finalize the CoGroup transformation. */ public CoGroupOperatorWithoutFunction equalTo(int... fields) { return createCoGroupOperator(new Keys.FieldPositionKeys(fields, input2.getType())); } /** * Continues a CoGroup transformation and defines a {@link KeySelector} function for the second co-grouped {@link DataSet}.
* The KeySelector function is called for each element of the second DataSet and extracts a single * key value on which the DataSet is grouped.
* * @param keySelector The KeySelector function which extracts the key values from the second DataSet on which it is grouped. * @return An incomplete CoGroup transformation. * Call {@link CoGroupOperatorWithoutFunction#with(CoGroupFunction))} to finalize the CoGroup transformation. */ public CoGroupOperatorWithoutFunction equalTo(KeySelector keyExtractor) { return createCoGroupOperator(new Keys.SelectorFunctionKeys(keyExtractor, input2.getType())); } /** * Intermediate step of a CoGroup transformation.
* To continue the CoGroup transformation, provide a {@link CoGroupFunction} by calling * {@link CoGroupOperatorWithoutFunction#with(CoGroupFunction))}. * */ private CoGroupOperatorWithoutFunction createCoGroupOperator(Keys keys2) { if (keys2 == null) { throw new NullPointerException(); } if (keys2.isEmpty()) { throw new InvalidProgramException("The join keys must not be empty."); } if (!keys1.areCompatibale(keys2)) { throw new InvalidProgramException("The pair of join keys are not compatible with each other."); } return new CoGroupOperatorWithoutFunction(keys2); } public final class CoGroupOperatorWithoutFunction { private final Keys keys2; private CoGroupOperatorWithoutFunction(Keys keys2) { if (keys2 == null) { throw new NullPointerException(); } if (keys2.isEmpty()) { throw new InvalidProgramException("The join keys must not be empty."); } this.keys2 = keys2; } /** * Finalizes a CoGroup transformation by applying a {@link CoGroupFunction} to groups of elements with identical keys.
* Each CoGroupFunction call returns an arbitrary number of keys. * * @param function The CoGroupFunction that is called for all groups of elements with identical keys. * @return An CoGroupOperator that represents the co-grouped result DataSet. * * @see CoGroupFunction * @see DataSet */ public CoGroupOperator with(CoGroupFunction function) { TypeInformation returnType = TypeExtractor.getCoGroupReturnTypes(function, input1.getType(), input2.getType()); return new CoGroupOperator(input1, input2, keys1, keys2, function, returnType); } } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy