eu.stratosphere.api.java.functions.JoinFunction Maven / Gradle / Ivy
/***********************************************************************************************************************
*
* Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
* an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
* specific language governing permissions and limitations under the License.
*
**********************************************************************************************************************/
package eu.stratosphere.api.java.functions;
import eu.stratosphere.api.common.functions.AbstractFunction;
import eu.stratosphere.api.common.functions.GenericJoiner;
import eu.stratosphere.util.Collector;
/**
* The abstract base class for Join functions. Join functions combine two data sets by joining their
* elements on specified keys and calling this function with each pair of joining elements.
* By default, this follows strictly the semantics of an "inner join" in SQL.
* the semantics are those of an "inner join", meaning that elements are filtered out
* if their key is not contained in the other data set.
*
* Per the semantics of an inner join, the function is
*
* The basic syntax for using Join on two data sets is as follows:
*
* DataSet set1 = ...;
* DataSet set2 = ...;
*
* set1.join(set2).where().equalTo().with(new MyJoinFunction());
*
*
* {@code set1} is here considered the first input, {@code set2} the second input.
* The keys can be defined through tuple field positions or key extractors.
* See {@link eu.stratosphere.api.java.operators.Keys} for details.
*
* The Join function is actually not a necessary part of a join operation. If no JoinFunction is provided,
* the result of the operation is a sequence of Tuple2, where the elements in the tuple are those that
* the JoinFunction would have been invoked with.
*
* Note: You can use a {@link CoGroupFunction} to perform an outer join.
*
* All functions need to be serializable, as defined in {@link java.io.Serializable}.
*
* @param The type of the elements in the first input.
* @param The type of the elements in the second input.
* @param The type of the result elements.
*/
public abstract class JoinFunction extends AbstractFunction implements GenericJoiner {
private static final long serialVersionUID = 1L;
/**
* The user-defined method for performing transformations after a join.
* The method is called with matching pairs of elements from the inputs.
*
* @param first The element from first input.
* @param second The element from second input.
* @return The resulting element.
*
* @throws Exception This method may throw exceptions. Throwing an exception will cause the operation
* to fail and may trigger recovery.
*/
public abstract OUT join(IN1 first, IN2 second) throws Exception;
/**
* The user-defined method for performing transformations after a join, for operations that
* produce zero elements, or more than one element.
* By default, this method delegates to the method {@link #join(Object, Object)}. If this method
* is overridden, that method will no longer be called.
*
* @param first The element from first input.
* @param second The element from second input.
* @param out A collector to emit resulting element (zero, one, or many).
*
* @throws Exception This method may throw exceptions. Throwing an exception will cause the operation
* to fail and may trigger recovery.
*/
@Override
public void join(IN1 value1, IN2 value2, Collector out) throws Exception {
out.collect(join(value1, value2));
}
}