com.google.cloud.dataflow.sdk.transforms.FlatMapElements Maven / Gradle / Ivy
Show all versions of google-cloud-dataflow-java-sdk-all Show documentation
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.transforms;
import com.google.cloud.dataflow.sdk.values.PCollection;
import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
import java.lang.reflect.ParameterizedType;
/**
* {@code PTransform}s for mapping a simple function that returns iterables over the elements of a
* {@link PCollection} and merging the results.
*/
public class FlatMapElements
extends PTransform, PCollection> {
/**
* For a {@code SerializableFunction>} {@code fn},
* returns a {@link PTransform} that applies {@code fn} to every element of the input
* {@code PCollection} and outputs all of the elements to the output
* {@code PCollection}.
*
* Example of use in Java 8:
*
{@code
* PCollection words = lines.apply(
* FlatMapElements.via((String line) -> Arrays.asList(line.split(" ")))
* .withOutputType(new TypeDescriptor(){});
* }
*
* In Java 7, the overload {@link #via(SimpleFunction)} is more concise as the output type
* descriptor need not be provided.
*/
public static MissingOutputTypeDescriptor
via(SerializableFunction> fn) {
return new MissingOutputTypeDescriptor<>(fn);
}
/**
* For a {@code SimpleFunction>} {@code fn},
* return a {@link PTransform} that applies {@code fn} to every element of the input
* {@code PCollection} and outputs all of the elements to the output
* {@code PCollection}.
*
* This overload is intended primarily for use in Java 7. In Java 8, the overload
* {@link #via(SerializableFunction)} supports use of lambda for greater concision.
*
*
Example of use in Java 7:
*
{@code
* PCollection lines = ...;
* PCollection words = lines.apply(FlatMapElements.via(
* new SimpleFunction>() {
* public Integer apply(String line) {
* return Arrays.asList(line.split(" "));
* }
* });
* }
*
* To use a Java 8 lambda, see {@link #via(SerializableFunction)}.
*/
public static FlatMapElements
via(SimpleFunction> fn) {
@SuppressWarnings({"rawtypes", "unchecked"}) // safe by static typing
TypeDescriptor> iterableType = (TypeDescriptor) fn.getOutputTypeDescriptor();
@SuppressWarnings("unchecked") // safe by correctness of getIterableElementType
TypeDescriptor outputType =
(TypeDescriptor) getIterableElementType(iterableType);
return new FlatMapElements<>(fn, outputType);
}
/**
* An intermediate builder for a {@link FlatMapElements} transform. To complete the transform,
* provide an output type descriptor to {@link MissingOutputTypeDescriptor#withOutputType}. See
* {@link #via(SerializableFunction)} for a full example of use.
*/
public static final class MissingOutputTypeDescriptor {
private final SerializableFunction> fn;
private MissingOutputTypeDescriptor(
SerializableFunction> fn) {
this.fn = fn;
}
public FlatMapElements withOutputType(TypeDescriptor outputType) {
return new FlatMapElements<>(fn, outputType);
}
}
private static TypeDescriptor> getIterableElementType(
TypeDescriptor> iterableTypeDescriptor) {
// If a rawtype was used, the type token may be for Object, not a subtype of Iterable.
// In this case, we rely on static typing of the function elsewhere to ensure it is
// at least some kind of iterable, and grossly overapproximate the element type to be Object.
if (!iterableTypeDescriptor.isSubtypeOf(new TypeDescriptor>() {})) {
return new TypeDescriptor