com.google.cloud.dataflow.sdk.transforms.Flatten Maven / Gradle / Ivy
Show all versions of google-cloud-dataflow-java-sdk-all Show documentation
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.transforms;
import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
import com.google.cloud.dataflow.sdk.coders.Coder;
import com.google.cloud.dataflow.sdk.coders.IterableLikeCoder;
import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
import com.google.cloud.dataflow.sdk.values.PCollection;
import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
import com.google.cloud.dataflow.sdk.values.PCollectionList;
import java.util.ArrayList;
import java.util.List;
/**
* {@code Flatten} takes multiple {@code PCollection}s bundled
* into a {@code PCollectionList} and returns a single
* {@code PCollection} containing all the elements in all the input
* {@code PCollection}s. The name "Flatten" suggests taking a list of
* lists and flattening them into a single list.
*
* Example of use:
*
{@code
* PCollection pc1 = ...;
* PCollection pc2 = ...;
* PCollection pc3 = ...;
* PCollectionList pcs = PCollectionList.of(pc1).and(pc2).and(pc3);
* PCollection merged = pcs.apply(Flatten.pCollections());
* }
*
* By default, the {@code Coder} of the output {@code PCollection}
* is the same as the {@code Coder} of the first {@code PCollection}
* in the input {@code PCollectionList} (if the
* {@code PCollectionList} is non-empty).
*
*/
public class Flatten {
/**
* Returns a {@link PTransform} that flattens a {@link PCollectionList}
* into a {@link PCollection} containing all the elements of all
* the {@link PCollection}s in its input.
*
*
All inputs must have equal {@link WindowFn}s.
* The output elements of {@code Flatten} are in the same windows and
* have the same timestamps as their corresponding input elements. The output
* {@code PCollection} will have the same
* {@link WindowFn} as all of the inputs.
*
* @param the type of the elements in the input and output
* {@code PCollection}s.
*/
public static FlattenPCollectionList pCollections() {
return new FlattenPCollectionList<>();
}
/**
* Returns a {@code PTransform} that takes a {@code PCollection>}
* and returns a {@code PCollection} containing all the elements from
* all the {@code Iterable}s.
*
* Example of use:
*
{@code
* PCollection> pcOfIterables = ...;
* PCollection pc = pcOfIterables.apply(Flatten.iterables());
* }
*
* By default, the output {@code PCollection} encodes its elements
* using the same {@code Coder} that the input uses for
* the elements in its {@code Iterable}.
*
* @param the type of the elements of the input {@code Iterable} and
* the output {@code PCollection}
*/
public static FlattenIterables iterables() {
return new FlattenIterables<>();
}
/**
* A {@link PTransform} that flattens a {@link PCollectionList}
* into a {@link PCollection} containing all the elements of all
* the {@link PCollection}s in its input.
* Implements {@link #pCollections}.
*
* @param the type of the elements in the input and output
* {@code PCollection}s.
*/
public static class FlattenPCollectionList
extends PTransform, PCollection> {
private FlattenPCollectionList() { }
@Override
public PCollection apply(PCollectionList inputs) {
WindowingStrategy, ?> windowingStrategy;
IsBounded isBounded = IsBounded.BOUNDED;
if (!inputs.getAll().isEmpty()) {
windowingStrategy = inputs.get(0).getWindowingStrategy();
for (PCollection> input : inputs.getAll()) {
WindowingStrategy, ?> other = input.getWindowingStrategy();
if (!windowingStrategy.getWindowFn().isCompatible(other.getWindowFn())) {
throw new IllegalStateException(
"Inputs to Flatten had incompatible window windowFns: "
+ windowingStrategy.getWindowFn() + ", " + other.getWindowFn());
}
if (!windowingStrategy.getTrigger().getSpec()
.isCompatible(other.getTrigger().getSpec())) {
throw new IllegalStateException(
"Inputs to Flatten had incompatible triggers: "
+ windowingStrategy.getTrigger() + ", " + other.getTrigger());
}
isBounded = isBounded.and(input.isBounded());
}
} else {
windowingStrategy = WindowingStrategy.globalDefault();
}
return PCollection.createPrimitiveOutputInternal(
inputs.getPipeline(),
windowingStrategy,
isBounded);
}
@Override
protected Coder> getDefaultOutputCoder(PCollectionList input)
throws CannotProvideCoderException {
// Take coder from first collection
for (PCollection pCollection : input.getAll()) {
return pCollection.getCoder();
}
// No inputs
throw new CannotProvideCoderException(
this.getClass().getSimpleName() + " cannot provide a Coder for"
+ " empty " + PCollectionList.class.getSimpleName());
}
}
/**
* {@code FlattenIterables} takes a {@code PCollection>} and returns a
* {@code PCollection} that contains all the elements from each iterable.
* Implements {@link #iterables}.
*
* @param the type of the elements of the input {@code Iterable}s and
* the output {@code PCollection}
*/
public static class FlattenIterables
extends PTransform>, PCollection> {
@Override
public PCollection apply(PCollection extends Iterable> in) {
Coder extends Iterable> inCoder = in.getCoder();
if (!(inCoder instanceof IterableLikeCoder)) {
throw new IllegalArgumentException(
"expecting the input Coder to be an IterableLikeCoder");
}
@SuppressWarnings("unchecked")
Coder elemCoder = ((IterableLikeCoder) inCoder).getElemCoder();
return in.apply(ParDo.named("FlattenIterables").of(
new DoFn, T>() {
@Override
public void processElement(ProcessContext c) {
for (T i : c.element()) {
c.output(i);
}
}
}))
.setCoder(elemCoder);
}
}
/////////////////////////////////////////////////////////////////////////////
static {
DirectPipelineRunner.registerDefaultTransformEvaluator(
FlattenPCollectionList.class,
new DirectPipelineRunner.TransformEvaluator() {
@Override
public void evaluate(
FlattenPCollectionList transform,
DirectPipelineRunner.EvaluationContext context) {
evaluateHelper(transform, context);
}
});
}
private static void evaluateHelper(
FlattenPCollectionList transform,
DirectPipelineRunner.EvaluationContext context) {
List> outputElems = new ArrayList<>();
PCollectionList inputs = context.getInput(transform);
for (PCollection input : inputs.getAll()) {
outputElems.addAll(context.getPCollectionValuesWithMetadata(input));
}
context.setPCollectionValuesWithMetadata(context.getOutput(transform), outputElems);
}
}