eu.stratosphere.api.java.operators.CoGroupOperator Maven / Gradle / Ivy
/***********************************************************************************************************************
*
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.api.java.operators;
import java.security.InvalidParameterException;
import eu.stratosphere.api.common.InvalidProgramException;
import eu.stratosphere.api.common.functions.GenericCoGrouper;
import eu.stratosphere.api.common.functions.GenericMap;
import eu.stratosphere.api.common.operators.BinaryOperatorInformation;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.UnaryOperatorInformation;
import eu.stratosphere.api.common.operators.base.CoGroupOperatorBase;
import eu.stratosphere.api.common.operators.base.MapOperatorBase;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.functions.CoGroupFunction;
import eu.stratosphere.api.java.functions.KeySelector;
import eu.stratosphere.api.java.operators.translation.KeyExtractingMapper;
import eu.stratosphere.api.java.operators.translation.PlanUnwrappingCoGroupOperator;
import eu.stratosphere.api.java.operators.translation.TupleKeyExtractingMapper;
import eu.stratosphere.api.java.tuple.Tuple;
import eu.stratosphere.api.java.tuple.Tuple2;
import eu.stratosphere.api.java.typeutils.TupleTypeInfo;
import eu.stratosphere.api.java.typeutils.TypeExtractor;
import eu.stratosphere.types.TypeInformation;
/**
* A {@link DataSet} that is the result of a CoGroup transformation.
*
* @param The type of the first input DataSet of the CoGroup transformation.
* @param The type of the second input DataSet of the CoGroup transformation.
* @param The type of the result of the CoGroup transformation.
*
* @see DataSet
*/
public class CoGroupOperator extends TwoInputUdfOperator> {
private final CoGroupFunction function;
private final Keys keys1;
private final Keys keys2;
protected CoGroupOperator(DataSet input1, DataSet input2,
Keys keys1, Keys keys2,
CoGroupFunction function,
TypeInformation returnType)
{
super(input1, input2, returnType);
this.function = function;
if (keys1 == null || keys2 == null) {
throw new NullPointerException();
}
this.keys1 = keys1;
this.keys2 = keys2;
extractSemanticAnnotationsFromUdf(function.getClass());
}
protected Keys getKeys1() {
return this.keys1;
}
protected Keys getKeys2() {
return this.keys2;
}
@Override
protected eu.stratosphere.api.common.operators.base.CoGroupOperatorBase, ?, OUT, ?> translateToDataFlow(Operator input1, Operator input2) {
String name = getName() != null ? getName() : function.getClass().getName();
if (keys1 instanceof Keys.SelectorFunctionKeys
&& keys2 instanceof Keys.SelectorFunctionKeys
&& keys1.areCompatibale(keys2)) {
@SuppressWarnings("unchecked")
Keys.SelectorFunctionKeys selectorKeys1 = (Keys.SelectorFunctionKeys) keys1;
@SuppressWarnings("unchecked")
Keys.SelectorFunctionKeys selectorKeys2 = (Keys.SelectorFunctionKeys) keys2;
PlanUnwrappingCoGroupOperator po =
translateSelectorFunctionCoGroup(selectorKeys1, selectorKeys2, function,
getInput1Type(), getInput2Type(), getResultType(), name, input1, input2);
// set dop
po.setDegreeOfParallelism(this.getParallelism());
return po;
}
else if (keys1 instanceof Keys.FieldPositionKeys
&& keys2 instanceof Keys.FieldPositionKeys
&& keys1.areCompatibale(keys2)
) {
int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();
int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();
CoGroupOperatorBase> po =
new CoGroupOperatorBase>(
function, new BinaryOperatorInformation(getInput1Type(), getInput2Type(), getResultType()),
logicalKeyPositions1, logicalKeyPositions2, name);
// set inputs
po.setFirstInput(input1);
po.setSecondInput(input2);
// set dop
po.setDegreeOfParallelism(this.getParallelism());
return po;
}
else if (keys1 instanceof Keys.FieldPositionKeys
&& keys2 instanceof Keys.SelectorFunctionKeys
&& keys1.areCompatibale(keys2)
) {
int[] logicalKeyPositions1 = keys1.computeLogicalKeyPositions();
@SuppressWarnings("unchecked")
Keys.SelectorFunctionKeys selectorKeys2 = (Keys.SelectorFunctionKeys) keys2;
PlanUnwrappingCoGroupOperator po =
translateSelectorFunctionCoGroupRight(logicalKeyPositions1, selectorKeys2, function,
getInput1Type(), getInput2Type(), getResultType(), name, input1, input2);
// set dop
po.setDegreeOfParallelism(this.getParallelism());
return po;
}
else if (keys1 instanceof Keys.SelectorFunctionKeys
&& keys2 instanceof Keys.FieldPositionKeys
&& keys1.areCompatibale(keys2)
) {
@SuppressWarnings("unchecked")
Keys.SelectorFunctionKeys selectorKeys1 = (Keys.SelectorFunctionKeys) keys1;
int[] logicalKeyPositions2 = keys2.computeLogicalKeyPositions();
PlanUnwrappingCoGroupOperator po =
translateSelectorFunctionCoGroupLeft(selectorKeys1, logicalKeyPositions2, function,
getInput1Type(), getInput2Type(), getResultType(), name, input1, input2);
// set dop
po.setDegreeOfParallelism(this.getParallelism());
return po;
}
else {
throw new UnsupportedOperationException("Unrecognized or incompatible key types.");
}
}
private static PlanUnwrappingCoGroupOperator translateSelectorFunctionCoGroup(
Keys.SelectorFunctionKeys rawKeys1, Keys.SelectorFunctionKeys rawKeys2,
CoGroupFunction function,
TypeInformation inputType1, TypeInformation inputType2, TypeInformation outputType, String name,
Operator input1, Operator input2)
{
@SuppressWarnings("unchecked")
final Keys.SelectorFunctionKeys keys1 = (Keys.SelectorFunctionKeys) rawKeys1;
@SuppressWarnings("unchecked")
final Keys.SelectorFunctionKeys keys2 = (Keys.SelectorFunctionKeys) rawKeys2;
final TypeInformation> typeInfoWithKey1 = new TupleTypeInfo>(keys1.getKeyType(), inputType1);
final TypeInformation> typeInfoWithKey2 = new TupleTypeInfo>(keys2.getKeyType(), inputType2);
final KeyExtractingMapper extractor1 = new KeyExtractingMapper(keys1.getKeyExtractor());
final KeyExtractingMapper extractor2 = new KeyExtractingMapper(keys2.getKeyExtractor());
final MapOperatorBase, GenericMap>> keyMapper1 =
new MapOperatorBase, GenericMap>>(extractor1, new UnaryOperatorInformation>(inputType1, typeInfoWithKey1), "Key Extractor 1");
final MapOperatorBase, GenericMap>> keyMapper2 =
new MapOperatorBase, GenericMap>>(extractor2, new UnaryOperatorInformation>(inputType2, typeInfoWithKey2), "Key Extractor 2");
final PlanUnwrappingCoGroupOperator cogroup = new PlanUnwrappingCoGroupOperator(function, keys1, keys2, name, outputType, typeInfoWithKey1, typeInfoWithKey2);
cogroup.setFirstInput(keyMapper1);
cogroup.setSecondInput(keyMapper2);
keyMapper1.setInput(input1);
keyMapper2.setInput(input2);
// set dop
keyMapper1.setDegreeOfParallelism(input1.getDegreeOfParallelism());
keyMapper2.setDegreeOfParallelism(input2.getDegreeOfParallelism());
return cogroup;
}
private static PlanUnwrappingCoGroupOperator translateSelectorFunctionCoGroupRight(
int[] logicalKeyPositions1, Keys.SelectorFunctionKeys rawKeys2,
CoGroupFunction function,
TypeInformation inputType1, TypeInformation inputType2, TypeInformation outputType, String name,
Operator input1, Operator input2)
{
if(!inputType1.isTupleType()) {
throw new InvalidParameterException("Should not happen.");
}
@SuppressWarnings("unchecked")
final Keys.SelectorFunctionKeys keys2 = (Keys.SelectorFunctionKeys) rawKeys2;
final TypeInformation> typeInfoWithKey1 = new TupleTypeInfo>(keys2.getKeyType(), inputType1); // assume same key, checked by Key.areCompatibale() before
final TypeInformation> typeInfoWithKey2 = new TupleTypeInfo>(keys2.getKeyType(), inputType2);
final TupleKeyExtractingMapper extractor1 = new TupleKeyExtractingMapper(logicalKeyPositions1[0]);
final KeyExtractingMapper extractor2 = new KeyExtractingMapper(keys2.getKeyExtractor());
final MapOperatorBase, GenericMap>> keyMapper1 =
new MapOperatorBase, GenericMap>>(extractor1, new UnaryOperatorInformation>(inputType1, typeInfoWithKey1), "Key Extractor 1");
final MapOperatorBase, GenericMap>> keyMapper2 =
new MapOperatorBase, GenericMap>>(extractor2, new UnaryOperatorInformation>(inputType2, typeInfoWithKey2), "Key Extractor 2");
final PlanUnwrappingCoGroupOperator cogroup = new PlanUnwrappingCoGroupOperator(function, logicalKeyPositions1, keys2, name, outputType, typeInfoWithKey1, typeInfoWithKey2);
cogroup.setFirstInput(keyMapper1);
cogroup.setSecondInput(keyMapper2);
keyMapper1.setInput(input1);
keyMapper2.setInput(input2);
// set dop
keyMapper1.setDegreeOfParallelism(input1.getDegreeOfParallelism());
keyMapper2.setDegreeOfParallelism(input2.getDegreeOfParallelism());
return cogroup;
}
private static PlanUnwrappingCoGroupOperator translateSelectorFunctionCoGroupLeft(
Keys.SelectorFunctionKeys rawKeys1, int[] logicalKeyPositions2,
CoGroupFunction function,
TypeInformation inputType1, TypeInformation inputType2, TypeInformation outputType, String name,
Operator input1, Operator input2)
{
if(!inputType2.isTupleType()) {
throw new InvalidParameterException("Should not happen.");
}
@SuppressWarnings("unchecked")
final Keys.SelectorFunctionKeys keys1 = (Keys.SelectorFunctionKeys) rawKeys1;
final TypeInformation> typeInfoWithKey1 = new TupleTypeInfo>(keys1.getKeyType(), inputType1); // assume same key, checked by Key.areCompatibale() before
final TypeInformation> typeInfoWithKey2 = new TupleTypeInfo>(keys1.getKeyType(), inputType2);
final KeyExtractingMapper extractor1 = new KeyExtractingMapper(keys1.getKeyExtractor());
final TupleKeyExtractingMapper extractor2 = new TupleKeyExtractingMapper(logicalKeyPositions2[0]);
final MapOperatorBase, GenericMap>> keyMapper1 =
new MapOperatorBase, GenericMap>>(extractor1, new UnaryOperatorInformation>(inputType1, typeInfoWithKey1), "Key Extractor 1");
final MapOperatorBase, GenericMap>> keyMapper2 =
new MapOperatorBase, GenericMap>>(extractor2, new UnaryOperatorInformation>(inputType2, typeInfoWithKey2), "Key Extractor 2");
final PlanUnwrappingCoGroupOperator cogroup = new PlanUnwrappingCoGroupOperator(function, keys1, logicalKeyPositions2, name, outputType, typeInfoWithKey1, typeInfoWithKey2);
cogroup.setFirstInput(keyMapper1);
cogroup.setSecondInput(keyMapper2);
keyMapper1.setInput(input1);
keyMapper2.setInput(input2);
// set dop
keyMapper1.setDegreeOfParallelism(input1.getDegreeOfParallelism());
keyMapper2.setDegreeOfParallelism(input2.getDegreeOfParallelism());
return cogroup;
}
// --------------------------------------------------------------------------------------------
// Builder classes for incremental construction
// --------------------------------------------------------------------------------------------
/**
* Intermediate step of a CoGroup transformation.
* To continue the CoGroup transformation, select the grouping key of the first input {@link DataSet} by calling
* {@link CoGroupOperatorSets#where(int...)} or {@link CoGroupOperatorSets#where(KeySelector)}.
*
* @param The type of the first input DataSet of the CoGroup transformation.
* @param The type of the second input DataSet of the CoGroup transformation.
*/
public static final class CoGroupOperatorSets {
private final DataSet input1;
private final DataSet input2;
public CoGroupOperatorSets(DataSet input1, DataSet input2) {
if (input1 == null || input2 == null) {
throw new NullPointerException();
}
this.input1 = input1;
this.input2 = input2;
}
/**
* Continues a CoGroup transformation.
* Defines the {@link Tuple} fields of the first co-grouped {@link DataSet} that should be used as grouping keys.
* Note: Fields can only be selected as grouping keys on Tuple DataSets.
*
* @param fields The indexes of the Tuple fields of the first co-grouped DataSets that should be used as keys.
* @return An incomplete CoGroup transformation.
* Call {@link CoGroupOperatorSetsPredicate#equalTo()} to continue the CoGroup.
*
* @see Tuple
* @see DataSet
*/
public CoGroupOperatorSetsPredicate where(int... fields) {
return new CoGroupOperatorSetsPredicate(new Keys.FieldPositionKeys(fields, input1.getType()));
}
/**
* Continues a CoGroup transformation and defines a {@link KeySelector} function for the first co-grouped {@link DataSet}.
* The KeySelector function is called for each element of the first DataSet and extracts a single
* key value on which the DataSet is grouped.
*
* @param keySelector The KeySelector function which extracts the key values from the DataSet on which it is grouped.
* @return An incomplete CoGroup transformation.
* Call {@link CoGroupOperatorSetsPredicate#equalTo()} to continue the CoGroup.
*
* @see KeySelector
* @see DataSet
*/
public CoGroupOperatorSetsPredicate where(KeySelector keyExtractor) {
return new CoGroupOperatorSetsPredicate(new Keys.SelectorFunctionKeys(keyExtractor, input1.getType()));
}
// ----------------------------------------------------------------------------------------
/**
* Intermediate step of a CoGroup transformation.
* To continue the CoGroup transformation, select the grouping key of the second input {@link DataSet} by calling
* {@link CoGroupOperatorSetsPredicate#equalTo(int...)} or {@link CoGroupOperatorSetsPredicate#equalTo(KeySelector)}.
*
*/
public final class CoGroupOperatorSetsPredicate {
private final Keys keys1;
private CoGroupOperatorSetsPredicate(Keys keys1) {
if (keys1 == null) {
throw new NullPointerException();
}
if (keys1.isEmpty()) {
throw new InvalidProgramException("The join keys must not be empty.");
}
this.keys1 = keys1;
}
/**
* Continues a CoGroup transformation and defines the {@link Tuple} fields of the second co-grouped
* {@link DataSet} that should be used as grouping keys.
* Note: Fields can only be selected as grouping keys on Tuple DataSets.
*
* @param fields The indexes of the Tuple fields of the second co-grouped DataSet that should be used as keys.
* @return An incomplete CoGroup transformation.
* Call {@link CoGroupOperatorWithoutFunction#with(CoGroupFunction))} to finalize the CoGroup transformation.
*/
public CoGroupOperatorWithoutFunction equalTo(int... fields) {
return createCoGroupOperator(new Keys.FieldPositionKeys(fields, input2.getType()));
}
/**
* Continues a CoGroup transformation and defines a {@link KeySelector} function for the second co-grouped {@link DataSet}.
* The KeySelector function is called for each element of the second DataSet and extracts a single
* key value on which the DataSet is grouped.
*
* @param keySelector The KeySelector function which extracts the key values from the second DataSet on which it is grouped.
* @return An incomplete CoGroup transformation.
* Call {@link CoGroupOperatorWithoutFunction#with(CoGroupFunction))} to finalize the CoGroup transformation.
*/
public CoGroupOperatorWithoutFunction equalTo(KeySelector keyExtractor) {
return createCoGroupOperator(new Keys.SelectorFunctionKeys(keyExtractor, input2.getType()));
}
/**
* Intermediate step of a CoGroup transformation.
* To continue the CoGroup transformation, provide a {@link CoGroupFunction} by calling
* {@link CoGroupOperatorWithoutFunction#with(CoGroupFunction))}.
*
*/
private CoGroupOperatorWithoutFunction createCoGroupOperator(Keys keys2) {
if (keys2 == null) {
throw new NullPointerException();
}
if (keys2.isEmpty()) {
throw new InvalidProgramException("The join keys must not be empty.");
}
if (!keys1.areCompatibale(keys2)) {
throw new InvalidProgramException("The pair of join keys are not compatible with each other.");
}
return new CoGroupOperatorWithoutFunction(keys2);
}
public final class CoGroupOperatorWithoutFunction {
private final Keys keys2;
private CoGroupOperatorWithoutFunction(Keys keys2) {
if (keys2 == null) {
throw new NullPointerException();
}
if (keys2.isEmpty()) {
throw new InvalidProgramException("The join keys must not be empty.");
}
this.keys2 = keys2;
}
/**
* Finalizes a CoGroup transformation by applying a {@link CoGroupFunction} to groups of elements with identical keys.
* Each CoGroupFunction call returns an arbitrary number of keys.
*
* @param function The CoGroupFunction that is called for all groups of elements with identical keys.
* @return An CoGroupOperator that represents the co-grouped result DataSet.
*
* @see CoGroupFunction
* @see DataSet
*/
public CoGroupOperator with(CoGroupFunction function) {
TypeInformation returnType = TypeExtractor.getCoGroupReturnTypes(function, input1.getType(), input2.getType());
return new CoGroupOperator(input1, input2, keys1, keys2, function, returnType);
}
}
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy