Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/***********************************************************************************************************************
*
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.api.java.operators;
import java.security.InvalidParameterException;
import java.util.Arrays;
import eu.stratosphere.api.common.InvalidProgramException;
import eu.stratosphere.api.common.functions.GenericJoiner;
import eu.stratosphere.api.common.functions.GenericMap;
import eu.stratosphere.api.common.operators.BinaryOperatorInformation;
import eu.stratosphere.api.common.operators.Operator;
import eu.stratosphere.api.common.operators.UnaryOperatorInformation;
import eu.stratosphere.api.common.operators.base.JoinOperatorBase;
import eu.stratosphere.api.common.operators.base.MapOperatorBase;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.DeltaIteration.SolutionSetPlaceHolder;
import eu.stratosphere.api.java.functions.JoinFunction;
import eu.stratosphere.api.java.functions.KeySelector;
import eu.stratosphere.api.java.operators.Keys.FieldPositionKeys;
import eu.stratosphere.api.java.operators.translation.KeyExtractingMapper;
import eu.stratosphere.api.java.operators.translation.PlanUnwrappingJoinOperator;
import eu.stratosphere.api.java.operators.translation.TupleKeyExtractingMapper;
//CHECKSTYLE.OFF: AvoidStarImport - Needed for TupleGenerator
import eu.stratosphere.api.java.tuple.*;
//CHECKSTYLE.ON: AvoidStarImport
import eu.stratosphere.api.java.typeutils.TupleTypeInfo;
import eu.stratosphere.api.java.typeutils.TypeExtractor;
import eu.stratosphere.types.TypeInformation;
/**
* A {@link DataSet} that is the result of a Join transformation.
*
* @param The type of the first input DataSet of the Join transformation.
* @param The type of the second input DataSet of the Join transformation.
* @param The type of the result of the Join transformation.
*
* @see DataSet
*/
public abstract class JoinOperator extends TwoInputUdfOperator> {
/**
* An enumeration of hints, optionally usable to tell the system how exactly execute the join.
*/
public static enum JoinHint {
/**
* leave the choice how to do the join to the optimizer. If in doubt, the
* optimizer will choose a repartitioning join.
*/
OPTIMIZER_CHOOSES,
/**
* Hint that the first join input is much smaller than the second. This results in
* broadcasting and hashing the first input, unless the optimizer infers that
* prior existing partitioning is available that is even cheaper to exploit.
*/
BROADCAST_HASH_FIRST,
/**
* Hint that the second join input is much smaller than the second. This results in
* broadcasting and hashing the second input, unless the optimizer infers that
* prior existing partitioning is available that is even cheaper to exploit.
*/
BROADCAST_HASH_SECOND,
/**
* Hint that the first join input is a bit smaller than the second. This results in
* repartitioning both inputs and hashing the first input, unless the optimizer infers that
* prior existing partitioning and orders are available that are even cheaper to exploit.
*/
REPARTITION_HASH_FIRST,
/**
* Hint that the second join input is a bit smaller than the second. This results in
* repartitioning both inputs and hashing the second input, unless the optimizer infers that
* prior existing partitioning and orders are available that are even cheaper to exploit.
*/
REPARTITION_HASH_SECOND,
/**
* Hint that the join should repartitioning both inputs and use sorting and merging
* as the join strategy.
*/
REPARTITION_SORT_MERGE,
};
private final Keys keys1;
private final Keys keys2;
private JoinHint joinHint;
protected JoinOperator(DataSet input1, DataSet input2,
Keys keys1, Keys keys2,
TypeInformation returnType, JoinHint hint)
{
super(input1, input2, returnType);
if (keys1 == null || keys2 == null) {
throw new NullPointerException();
}
this.keys1 = keys1;
this.keys2 = keys2;
this.joinHint = hint;
}
protected Keys getKeys1() {
return this.keys1;
}
protected Keys getKeys2() {
return this.keys2;
}
protected JoinHint getJoinHint() {
return this.joinHint;
}
// --------------------------------------------------------------------------------------------
// special join types
// --------------------------------------------------------------------------------------------
/**
* A Join transformation that applies a {@JoinFunction} on each pair of joining elements.
* It also represents the {@link DataSet} that is the result of a Join transformation.
*
* @param The type of the first input DataSet of the Join transformation.
* @param The type of the second input DataSet of the Join transformation.
* @param The type of the result of the Join transformation.
*
* @see JoinFunction
* @see DataSet
*/
public static class EquiJoin extends JoinOperator {
private final JoinFunction function;
@SuppressWarnings("unused")
private boolean preserve1;
@SuppressWarnings("unused")
private boolean preserve2;
protected EquiJoin(DataSet input1, DataSet input2,
Keys keys1, Keys keys2, JoinFunction function,
TypeInformation returnType, JoinHint hint)
{
super(input1, input2, keys1, keys2, returnType, hint);
if (function == null) {
throw new NullPointerException();
}
this.function = function;
extractSemanticAnnotationsFromUdf(function.getClass());
}
// TODO
// public EquiJoin leftOuter() {
// this.preserve1 = true;
// return this;
// }
// TODO
// public EquiJoin rightOuter() {
// this.preserve2 = true;
// return this;
// }
// TODO
// public EquiJoin fullOuter() {
// this.preserve1 = true;
// this.preserve2 = true;
// return this;
// }
@Override
protected eu.stratosphere.api.common.operators.base.JoinOperatorBase, ?, OUT, ?> translateToDataFlow(Operator input1, Operator input2) {
String name = getName() != null ? getName() : function.getClass().getName();
if (super.keys1 instanceof Keys.SelectorFunctionKeys
&& super.keys2 instanceof Keys.SelectorFunctionKeys
&& super.keys1.areCompatibale(super.keys2)) {
@SuppressWarnings("unchecked")
Keys.SelectorFunctionKeys selectorKeys1 = (Keys.SelectorFunctionKeys) super.keys1;
@SuppressWarnings("unchecked")
Keys.SelectorFunctionKeys selectorKeys2 = (Keys.SelectorFunctionKeys) super.keys2;
PlanUnwrappingJoinOperator po =
translateSelectorFunctionJoin(selectorKeys1, selectorKeys2, function,
getInput1Type(), getInput2Type(), getResultType(), name, input1, input2);
// set dop
po.setDegreeOfParallelism(this.getParallelism());
return po;
}
else if (super.keys1 instanceof Keys.FieldPositionKeys
&& super.keys2 instanceof Keys.FieldPositionKeys)
{
if (!super.keys1.areCompatibale(super.keys2)) {
throw new InvalidProgramException("The types of the key fields do not match.");
}
int[] logicalKeyPositions1 = super.keys1.computeLogicalKeyPositions();
int[] logicalKeyPositions2 = super.keys2.computeLogicalKeyPositions();
JoinOperatorBase> po =
new JoinOperatorBase>(function,
new BinaryOperatorInformation(getInput1Type(), getInput2Type(), getResultType()),
logicalKeyPositions1, logicalKeyPositions2,
name);
// set inputs
po.setFirstInput(input1);
po.setSecondInput(input2);
// set dop
po.setDegreeOfParallelism(this.getParallelism());
return po;
}
else if (super.keys1 instanceof Keys.FieldPositionKeys
&& super.keys2 instanceof Keys.SelectorFunctionKeys
&& super.keys1.areCompatibale(super.keys2)
) {
int[] logicalKeyPositions1 = super.keys1.computeLogicalKeyPositions();
@SuppressWarnings("unchecked")
Keys.SelectorFunctionKeys selectorKeys2 = (Keys.SelectorFunctionKeys) super.keys2;
PlanUnwrappingJoinOperator po =
translateSelectorFunctionJoinRight(logicalKeyPositions1, selectorKeys2, function,
getInput1Type(), getInput2Type(), getResultType(), name, input1, input2);
// set dop
po.setDegreeOfParallelism(this.getParallelism());
return po;
}
else if (super.keys1 instanceof Keys.SelectorFunctionKeys
&& super.keys2 instanceof Keys.FieldPositionKeys
&& super.keys1.areCompatibale(super.keys2)
) {
@SuppressWarnings("unchecked")
Keys.SelectorFunctionKeys selectorKeys1 = (Keys.SelectorFunctionKeys) super.keys1;
int[] logicalKeyPositions2 = super.keys2.computeLogicalKeyPositions();
PlanUnwrappingJoinOperator po =
translateSelectorFunctionJoinLeft(selectorKeys1, logicalKeyPositions2, function,
getInput1Type(), getInput2Type(), getResultType(), name, input1, input2);
// set dop
po.setDegreeOfParallelism(this.getParallelism());
return po;
}
else {
throw new UnsupportedOperationException("Unrecognized or incompatible key types.");
}
}
private static PlanUnwrappingJoinOperator translateSelectorFunctionJoin(
Keys.SelectorFunctionKeys rawKeys1, Keys.SelectorFunctionKeys rawKeys2,
JoinFunction function,
TypeInformation inputType1, TypeInformation inputType2, TypeInformation outputType, String name,
Operator input1, Operator input2)
{
@SuppressWarnings("unchecked")
final Keys.SelectorFunctionKeys keys1 = (Keys.SelectorFunctionKeys) rawKeys1;
@SuppressWarnings("unchecked")
final Keys.SelectorFunctionKeys keys2 = (Keys.SelectorFunctionKeys) rawKeys2;
final TypeInformation> typeInfoWithKey1 = new TupleTypeInfo>(keys1.getKeyType(), inputType1);
final TypeInformation> typeInfoWithKey2 = new TupleTypeInfo>(keys2.getKeyType(), inputType2);
final KeyExtractingMapper extractor1 = new KeyExtractingMapper(keys1.getKeyExtractor());
final KeyExtractingMapper extractor2 = new KeyExtractingMapper(keys2.getKeyExtractor());
final MapOperatorBase, GenericMap>> keyMapper1 =
new MapOperatorBase, GenericMap>>(extractor1, new UnaryOperatorInformation>(inputType1, typeInfoWithKey1), "Key Extractor 1");
final MapOperatorBase, GenericMap>> keyMapper2 =
new MapOperatorBase, GenericMap>>(extractor2, new UnaryOperatorInformation>(inputType2, typeInfoWithKey2), "Key Extractor 2");
final PlanUnwrappingJoinOperator join = new PlanUnwrappingJoinOperator(function, keys1, keys2, name, outputType, typeInfoWithKey1, typeInfoWithKey2);
join.setFirstInput(keyMapper1);
join.setSecondInput(keyMapper2);
keyMapper1.setInput(input1);
keyMapper2.setInput(input2);
// set dop
keyMapper1.setDegreeOfParallelism(input1.getDegreeOfParallelism());
keyMapper2.setDegreeOfParallelism(input2.getDegreeOfParallelism());
return join;
}
private static PlanUnwrappingJoinOperator translateSelectorFunctionJoinRight(
int[] logicalKeyPositions1, Keys.SelectorFunctionKeys rawKeys2,
JoinFunction function,
TypeInformation inputType1, TypeInformation inputType2, TypeInformation outputType, String name,
Operator input1, Operator input2)
{
if(!inputType1.isTupleType()) {
throw new InvalidParameterException("Should not happen.");
}
@SuppressWarnings("unchecked")
final Keys.SelectorFunctionKeys keys2 = (Keys.SelectorFunctionKeys) rawKeys2;
final TypeInformation> typeInfoWithKey1 = new TupleTypeInfo>(keys2.getKeyType(), inputType1); // assume same key, checked by Key.areCompatibale() before
final TypeInformation> typeInfoWithKey2 = new TupleTypeInfo>(keys2.getKeyType(), inputType2);
final TupleKeyExtractingMapper extractor1 = new TupleKeyExtractingMapper(logicalKeyPositions1[0]);
final KeyExtractingMapper extractor2 = new KeyExtractingMapper(keys2.getKeyExtractor());
final MapOperatorBase, GenericMap>> keyMapper1 =
new MapOperatorBase, GenericMap>>(extractor1, new UnaryOperatorInformation>(inputType1, typeInfoWithKey1), "Key Extractor 1");
final MapOperatorBase, GenericMap>> keyMapper2 =
new MapOperatorBase, GenericMap>>(extractor2, new UnaryOperatorInformation>(inputType2, typeInfoWithKey2), "Key Extractor 2");
final PlanUnwrappingJoinOperator join = new PlanUnwrappingJoinOperator(function, logicalKeyPositions1, keys2, name, outputType, typeInfoWithKey1, typeInfoWithKey2);
join.setFirstInput(keyMapper1);
join.setSecondInput(keyMapper2);
keyMapper1.setInput(input1);
keyMapper2.setInput(input2);
// set dop
keyMapper1.setDegreeOfParallelism(input1.getDegreeOfParallelism());
keyMapper2.setDegreeOfParallelism(input2.getDegreeOfParallelism());
return join;
}
private static PlanUnwrappingJoinOperator translateSelectorFunctionJoinLeft(
Keys.SelectorFunctionKeys rawKeys1, int[] logicalKeyPositions2,
JoinFunction function,
TypeInformation inputType1, TypeInformation inputType2, TypeInformation outputType, String name,
Operator input1, Operator input2)
{
if(!inputType2.isTupleType()) {
throw new InvalidParameterException("Should not happen.");
}
@SuppressWarnings("unchecked")
final Keys.SelectorFunctionKeys keys1 = (Keys.SelectorFunctionKeys) rawKeys1;
final TypeInformation> typeInfoWithKey1 = new TupleTypeInfo>(keys1.getKeyType(), inputType1); // assume same key, checked by Key.areCompatibale() before
final TypeInformation> typeInfoWithKey2 = new TupleTypeInfo>(keys1.getKeyType(), inputType2);
final KeyExtractingMapper extractor1 = new KeyExtractingMapper(keys1.getKeyExtractor());
final TupleKeyExtractingMapper extractor2 = new TupleKeyExtractingMapper(logicalKeyPositions2[0]);
final MapOperatorBase, GenericMap>> keyMapper1 =
new MapOperatorBase, GenericMap>>(extractor1, new UnaryOperatorInformation>(inputType1, typeInfoWithKey1), "Key Extractor 1");
final MapOperatorBase, GenericMap>> keyMapper2 =
new MapOperatorBase, GenericMap>>(extractor2, new UnaryOperatorInformation>(inputType2, typeInfoWithKey2), "Key Extractor 2");
final PlanUnwrappingJoinOperator join = new PlanUnwrappingJoinOperator(function, keys1, logicalKeyPositions2, name, outputType, typeInfoWithKey1, typeInfoWithKey2);
join.setFirstInput(keyMapper1);
join.setSecondInput(keyMapper2);
keyMapper1.setInput(input1);
keyMapper2.setInput(input2);
// set dop
keyMapper1.setDegreeOfParallelism(input1.getDegreeOfParallelism());
keyMapper2.setDegreeOfParallelism(input2.getDegreeOfParallelism());
return join;
}
}
/**
* A Join transformation that wraps pairs of joining elements into {@link Tuple2}.
* It also represents the {@link DataSet} that is the result of a Join transformation.
*
* @param The type of the first input DataSet of the Join transformation.
* @param The type of the second input DataSet of the Join transformation.
*
* @see Tuple2
* @see DataSet
*/
public static final class DefaultJoin extends EquiJoin> {
protected DefaultJoin(DataSet input1, DataSet input2,
Keys keys1, Keys keys2, JoinHint hint)
{
super(input1, input2, keys1, keys2,
(JoinFunction>) new DefaultJoinFunction(),
new TupleTypeInfo>(input1.getType(), input2.getType()), hint);
}
/**
* Finalizes a Join transformation by applying a {@link JoinFunction} to each pair of joined elements.
* Each JoinFunction call returns exactly one element.
*
* @param function The JoinFunction that is called for each pair of joined elements.
* @return An EquiJoin that represents the joined result DataSet
*
* @see JoinFunction
* @see EquiJoin
* @see DataSet
*/
public EquiJoin with(JoinFunction function) {
TypeInformation returnType = TypeExtractor.getJoinReturnTypes(function, getInput1Type(), getInput2Type());
return new EquiJoin(getInput1(), getInput2(), getKeys1(), getKeys2(), function, returnType, getJoinHint());
}
/**
* Initiates a ProjectJoin transformation and projects the first join input
* If the first join input is a {@link Tuple} {@link DataSet}, fields can be selected by their index.
* If the first join input is not a Tuple DataSet, no parameters should be passed.
*
* Fields of the first and second input can be added by chaining the method calls of
* {@link JoinProjection#projectFirst(int...)} and {@link JoinProjection#projectSecond(int...)}.
*
* @param firstFieldIndexes If the first input is a Tuple DataSet, the indexes of the selected fields.
* For a non-Tuple DataSet, do not provide parameters.
* The order of fields in the output tuple is defined by to the order of field indexes.
* @return A JoinProjection that needs to be converted into a {@link ProjectJoin} to complete the
* Join transformation by calling {@link JoinProjection#types()}.
*
* @see Tuple
* @see DataSet
* @see JoinProjection
* @see ProjectJoin
*/
public JoinProjection projectFirst(int... firstFieldIndexes) {
return new JoinProjection(getInput1(), getInput2(), getKeys1(), getKeys2(), getJoinHint(), firstFieldIndexes, null);
}
/**
* Initiates a ProjectJoin transformation and projects the second join input
* If the second join input is a {@link Tuple} {@link DataSet}, fields can be selected by their index.
* If the second join input is not a Tuple DataSet, no parameters should be passed.
*
* Fields of the first and second input can be added by chaining the method calls of
* {@link JoinProjection#projectFirst(int...)} and {@link JoinProjection#projectSecond(int...)}.
*
* @param fieldIndexes If the second input is a Tuple DataSet, the indexes of the selected fields.
* For a non-Tuple DataSet, do not provide parameters.
* The order of fields in the output tuple is defined by to the order of field indexes.
* @return A JoinProjection that needs to be converted into a {@link ProjectJoin} to complete the
* Join transformation by calling {@link JoinProjection#types()}.
*
* @see Tuple
* @see DataSet
* @see JoinProjection
* @see ProjectJoin
*/
public JoinProjection projectSecond(int... secondFieldIndexes) {
return new JoinProjection(getInput1(), getInput2(), getKeys1(), getKeys2(), getJoinHint(), null, secondFieldIndexes);
}
// public JoinOperator leftSemiJoin() {
// return new LeftSemiJoin(getInput1(), getInput2(), getKeys1(), getKeys2(), getJoinHint());
// }
// public JoinOperator rightSemiJoin() {
// return new RightSemiJoin(getInput1(), getInput2(), getKeys1(), getKeys2(), getJoinHint());
// }
// public JoinOperator leftAntiJoin() {
// return new LeftAntiJoin(getInput1(), getInput2(), getKeys1(), getKeys2(), getJoinHint());
// }
// public JoinOperator rightAntiJoin() {
// return new RightAntiJoin(getInput1(), getInput2(), getKeys1(), getKeys2(), getJoinHint());
// }
}
/**
* A Join transformation that projects joining elements or fields of joining {@link Tuple Tuples}
* into result {@link Tuple Tuples}.
* It also represents the {@link DataSet} that is the result of a Join transformation.
*
* @param The type of the first input DataSet of the Join transformation.
* @param The type of the second input DataSet of the Join transformation.
* @param The type of the result of the Join transformation.
*
* @see Tuple
* @see DataSet
*/
private static final class ProjectJoin extends EquiJoin {
protected ProjectJoin(DataSet input1, DataSet input2, Keys keys1, Keys keys2, JoinHint hint, int[] fields, boolean[] isFromFirst, TupleTypeInfo returnType) {
super(input1, input2, keys1, keys2,
new ProjectJoinFunction(fields, isFromFirst, returnType.createSerializer().createInstance()),
returnType, hint);
}
}
// @SuppressWarnings("unused")
// private static final class LeftAntiJoin extends JoinOperator {
//
// protected LeftAntiJoin(DataSet input1, DataSet input2, Keys keys1, Keys keys2, JoinHint hint) {
// super(input1, input2, keys1, keys2, input1.getType(), hint);
// }
//
// @Override
// protected Operator translateToDataFlow(Operator input1, Operator input2) {
// throw new UnsupportedOperationException("LeftAntiJoin operator currently not supported.");
// }
// }
// @SuppressWarnings("unused")
// private static final class RightAntiJoin extends JoinOperator {
//
// protected RightAntiJoin(DataSet input1, DataSet input2, Keys keys1, Keys keys2, JoinHint hint) {
// super(input1, input2, keys1, keys2, input2.getType(), hint);
// }
//
// @Override
// protected Operator translateToDataFlow(Operator input1, Operator input2) {
// throw new UnsupportedOperationException("RightAntiJoin operator currently not supported.");
// }
// }
// @SuppressWarnings("unused")
// private static final class LeftSemiJoin extends EquiJoin {
//
// protected LeftSemiJoin(DataSet input1, DataSet input2, Keys keys1, Keys keys2, JoinHint hint) {
// super(input1, input2, keys1, keys2, new LeftSemiJoinFunction(), input1.getType(), hint);
// }
//
// @Override
// protected Operator translateToDataFlow(Operator input1, Operator input2) {
// // TODO: Runtime support required. Each left tuple may be returned only once.
// // Special exec strategy (runtime + optimizer) based on hash join required.
// // Either no duplicates of right side in HT or left tuples removed from HT after first match.
// throw new UnsupportedOperationException("LeftSemiJoin operator currently not supported.");
// }
// }
// @SuppressWarnings("unused")
// private static final class RightSemiJoin extends EquiJoin {
//
// protected RightSemiJoin(DataSet input1, DataSet input2, Keys keys1, Keys keys2, JoinHint hint) {
// super(input1, input2, keys1, keys2, new RightSemiJoinFunction(), input2.getType(), hint);
// }
//
// @Override
// protected Operator translateToDataFlow(Operator input1, Operator input2) {
// // TODO: Runtime support required. Each right tuple may be returned only once.
// // Special exec strategy (runtime + optimizer) based on hash join required.
// // Either no duplicates of left side in HT or right tuples removed from HT after first match.
// throw new UnsupportedOperationException("RightSemiJoin operator currently not supported.");
// }
// }
// --------------------------------------------------------------------------------------------
// Builder classes for incremental construction
// --------------------------------------------------------------------------------------------
/**
* Intermediate step of a Join transformation.
* To continue the Join transformation, select the join key of the first input {@link DataSet} by calling
* {@link JoinOperatorSets#where(int...)} or {@link JoinOperatorSets#where(KeySelector)}.
*
* @param The type of the first input DataSet of the Join transformation.
* @param The type of the second input DataSet of the Join transformation.
*/
public static final class JoinOperatorSets {
private final DataSet input1;
private final DataSet input2;
private final JoinHint joinHint;
public JoinOperatorSets(DataSet input1, DataSet input2) {
this(input1, input2, JoinHint.OPTIMIZER_CHOOSES);
}
public JoinOperatorSets(DataSet input1, DataSet input2, JoinHint hint) {
if (input1 == null || input2 == null) {
throw new NullPointerException();
}
this.input1 = input1;
this.input2 = input2;
this.joinHint = hint;
}
/**
* Continues a Join transformation.
* Defines the {@link Tuple} fields of the first join {@link DataSet} that should be used as join keys.
* Note: Fields can only be selected as join keys on Tuple DataSets.
*
* @param fields The indexes of the Tuple fields of the first join DataSets that should be used as keys.
* @return An incomplete Join transformation.
* Call {@link JoinOperatorSetsPredicate#equalTo(int...)} or {@link JoinOperatorSetsPredicate#equalTo(KeySelector)}
* to continue the Join.
*
* @see Tuple
* @see DataSet
*/
public JoinOperatorSetsPredicate where(int... fields) {
return new JoinOperatorSetsPredicate(new Keys.FieldPositionKeys(fields, input1.getType()));
}
/**
* Continues a Join transformation and defines a {@link KeySelector} function for the first join {@link DataSet}.
* The KeySelector function is called for each element of the first DataSet and extracts a single
* key value on which the DataSet is joined.
*
* @param keySelector The KeySelector function which extracts the key values from the DataSet on which it is joined.
* @return An incomplete Join transformation.
* Call {@link JoinOperatorSetsPredicate#equalTo(int...)} or {@link JoinOperatorSetsPredicate#equalTo(KeySelector)}
* to continue the Join.
*
* @see KeySelector
* @see DataSet
*/
public > JoinOperatorSetsPredicate where(KeySelector keySelector) {
return new JoinOperatorSetsPredicate(new Keys.SelectorFunctionKeys(keySelector, input1.getType()));
}
// ----------------------------------------------------------------------------------------
/**
* Intermediate step of a Join transformation.
* To continue the Join transformation, select the join key of the second input {@link DataSet} by calling
* {@link JoinOperatorSetsPredicate#equalTo(int...)} or {@link JoinOperatorSetsPredicate#equalTo(KeySelector)}.
*
*/
public class JoinOperatorSetsPredicate {
private final Keys keys1;
private JoinOperatorSetsPredicate(Keys keys1) {
if (keys1 == null) {
throw new NullPointerException();
}
if (keys1.isEmpty()) {
throw new InvalidProgramException("The join keys must not be empty.");
}
this.keys1 = keys1;
}
/**
* Continues a Join transformation and defines the {@link Tuple} fields of the second join
* {@link DataSet} that should be used as join keys.
* Note: Fields can only be selected as join keys on Tuple DataSets.
*
* The resulting {@link DefaultJoin} wraps each pair of joining elements into a {@link Tuple2}, with
* the element of the first input being the first field of the tuple and the element of the
* second input being the second field of the tuple.
*
* @param fields The indexes of the Tuple fields of the second join DataSet that should be used as keys.
* @return A DefaultJoin that represents the joined DataSet.
*/
public DefaultJoin equalTo(int... fields) {
return createJoinOperator(new Keys.FieldPositionKeys(fields, input2.getType()));
}
/**
* Continues a Join transformation and defines a {@link KeySelector} function for the second join {@link DataSet}.
* The KeySelector function is called for each element of the second DataSet and extracts a single
* key value on which the DataSet is joined.
*
* The resulting {@link DefaultJoin} wraps each pair of joining elements into a {@link Tuple2}, with
* the element of the first input being the first field of the tuple and the element of the
* second input being the second field of the tuple.
*
* @param keySelector The KeySelector function which extracts the key values from the second DataSet on which it is joined.
* @return A DefaultJoin that represents the joined DataSet.
*/
public DefaultJoin equalTo(KeySelector keySelector) {
return createJoinOperator(new Keys.SelectorFunctionKeys(keySelector, input2.getType()));
}
protected DefaultJoin createJoinOperator(Keys keys2) {
if (keys2 == null) {
throw new NullPointerException("The join keys may not be null.");
}
if (keys2.isEmpty()) {
throw new InvalidProgramException("The join keys may not be empty.");
}
if (!keys1.areCompatibale(keys2)) {
throw new InvalidProgramException("The pair of join keys are not compatible with each other.");
}
// sanity check solution set key mismatches
if (input1 instanceof SolutionSetPlaceHolder) {
if (keys1 instanceof FieldPositionKeys) {
int[] positions = ((FieldPositionKeys>) keys1).computeLogicalKeyPositions();
((SolutionSetPlaceHolder>) input1).checkJoinKeyFields(positions);
} else {
throw new InvalidProgramException("Currently, the solution set may only be joined with using tuple field positions.");
}
}
if (input2 instanceof SolutionSetPlaceHolder) {
if (keys2 instanceof FieldPositionKeys) {
int[] positions = ((FieldPositionKeys>) keys2).computeLogicalKeyPositions();
((SolutionSetPlaceHolder>) input2).checkJoinKeyFields(positions);
} else {
throw new InvalidProgramException("Currently, the solution set may only be joined with using tuple field positions.");
}
}
return new DefaultJoin(input1, input2, keys1, keys2, joinHint);
}
}
}
// --------------------------------------------------------------------------------------------
// default join functions
// --------------------------------------------------------------------------------------------
public static final class DefaultJoinFunction extends JoinFunction> {
private static final long serialVersionUID = 1L;
private final Tuple2 outTuple = new Tuple2();
@Override
public Tuple2 join(T1 first, T2 second) throws Exception {
outTuple.f0 = first;
outTuple.f1 = second;
return outTuple;
}
}
public static final class ProjectJoinFunction extends JoinFunction {
private static final long serialVersionUID = 1L;
private final int[] fields;
private final boolean[] isFromFirst;
private final R outTuple;
/**
* Instantiates and configures a ProjectJoinFunction.
* Creates output tuples by copying fields of joined input tuples (or a full input object) into an output tuple.
*
* @param fields List of indexes fields that should be copied to the output tuple.
* If the full input object should be copied (for example in case of a non-tuple input) the index should be -1.
* @param isFromFirst List of flags indicating whether the field should be copied from the first (true) or the second (false) input.
* @param outTupleInstance An instance of an output tuple.
*/
private ProjectJoinFunction(int[] fields, boolean[] isFromFirst, R outTupleInstance) {
if(fields.length != isFromFirst.length) {
throw new IllegalArgumentException("Fields and isFromFirst arrays must have same length!");
}
this.fields = fields;
this.isFromFirst = isFromFirst;
this.outTuple = outTupleInstance;
}
public R join(T1 in1, T2 in2) {
for(int i=0; i= 0) {
outTuple.setField(((Tuple)in1).getField(fields[i]), i);
} else {
outTuple.setField(in1, i);
}
} else {
if(fields[i] >= 0) {
outTuple.setField(((Tuple)in2).getField(fields[i]), i);
} else {
outTuple.setField(in2, i);
}
}
}
return outTuple;
}
}
public static final class LeftSemiJoinFunction