All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.dataflow.sdk.transforms.Flatten Maven / Gradle / Ivy

Go to download

Google Cloud Dataflow Java SDK provides a simple, Java-based interface for processing virtually any size data using Google cloud resources. This artifact includes entire Dataflow Java SDK.

There is a newer version: 2.5.0
Show newest version
/*
 * Copyright (C) 2015 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.google.cloud.dataflow.sdk.transforms;

import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
import com.google.cloud.dataflow.sdk.coders.Coder;
import com.google.cloud.dataflow.sdk.coders.IterableLikeCoder;
import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
import com.google.cloud.dataflow.sdk.values.PCollection;
import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
import com.google.cloud.dataflow.sdk.values.PCollectionList;

import java.util.ArrayList;
import java.util.List;

/**
 * {@code Flatten} takes multiple {@code PCollection}s bundled
 * into a {@code PCollectionList} and returns a single
 * {@code PCollection} containing all the elements in all the input
 * {@code PCollection}s.  The name "Flatten" suggests taking a list of
 * lists and flattening them into a single list.
 *
 * 

Example of use: *

 {@code
 * PCollection pc1 = ...;
 * PCollection pc2 = ...;
 * PCollection pc3 = ...;
 * PCollectionList pcs = PCollectionList.of(pc1).and(pc2).and(pc3);
 * PCollection merged = pcs.apply(Flatten.pCollections());
 * } 
* *

By default, the {@code Coder} of the output {@code PCollection} * is the same as the {@code Coder} of the first {@code PCollection} * in the input {@code PCollectionList} (if the * {@code PCollectionList} is non-empty). * */ public class Flatten { /** * Returns a {@link PTransform} that flattens a {@link PCollectionList} * into a {@link PCollection} containing all the elements of all * the {@link PCollection}s in its input. * *

All inputs must have equal {@link WindowFn}s. * The output elements of {@code Flatten} are in the same windows and * have the same timestamps as their corresponding input elements. The output * {@code PCollection} will have the same * {@link WindowFn} as all of the inputs. * * @param the type of the elements in the input and output * {@code PCollection}s. */ public static FlattenPCollectionList pCollections() { return new FlattenPCollectionList<>(); } /** * Returns a {@code PTransform} that takes a {@code PCollection>} * and returns a {@code PCollection} containing all the elements from * all the {@code Iterable}s. * *

Example of use: *

 {@code
   * PCollection> pcOfIterables = ...;
   * PCollection pc = pcOfIterables.apply(Flatten.iterables());
   * } 
* *

By default, the output {@code PCollection} encodes its elements * using the same {@code Coder} that the input uses for * the elements in its {@code Iterable}. * * @param the type of the elements of the input {@code Iterable} and * the output {@code PCollection} */ public static FlattenIterables iterables() { return new FlattenIterables<>(); } /** * A {@link PTransform} that flattens a {@link PCollectionList} * into a {@link PCollection} containing all the elements of all * the {@link PCollection}s in its input. * Implements {@link #pCollections}. * * @param the type of the elements in the input and output * {@code PCollection}s. */ public static class FlattenPCollectionList extends PTransform, PCollection> { private FlattenPCollectionList() { } @Override public PCollection apply(PCollectionList inputs) { WindowingStrategy windowingStrategy; IsBounded isBounded = IsBounded.BOUNDED; if (!inputs.getAll().isEmpty()) { windowingStrategy = inputs.get(0).getWindowingStrategy(); for (PCollection input : inputs.getAll()) { WindowingStrategy other = input.getWindowingStrategy(); if (!windowingStrategy.getWindowFn().isCompatible(other.getWindowFn())) { throw new IllegalStateException( "Inputs to Flatten had incompatible window windowFns: " + windowingStrategy.getWindowFn() + ", " + other.getWindowFn()); } if (!windowingStrategy.getTrigger().getSpec() .isCompatible(other.getTrigger().getSpec())) { throw new IllegalStateException( "Inputs to Flatten had incompatible triggers: " + windowingStrategy.getTrigger() + ", " + other.getTrigger()); } isBounded = isBounded.and(input.isBounded()); } } else { windowingStrategy = WindowingStrategy.globalDefault(); } return PCollection.createPrimitiveOutputInternal( inputs.getPipeline(), windowingStrategy, isBounded); } @Override protected Coder getDefaultOutputCoder(PCollectionList input) throws CannotProvideCoderException { // Take coder from first collection for (PCollection pCollection : input.getAll()) { return pCollection.getCoder(); } // No inputs throw new CannotProvideCoderException( this.getClass().getSimpleName() + " cannot provide a Coder for" + " empty " + PCollectionList.class.getSimpleName()); } } /** * {@code FlattenIterables} takes a {@code PCollection>} and returns a * {@code PCollection} that contains all the elements from each iterable. * Implements {@link #iterables}. * * @param the type of the elements of the input {@code Iterable}s and * the output {@code PCollection} */ public static class FlattenIterables extends PTransform>, PCollection> { @Override public PCollection apply(PCollection> in) { Coder> inCoder = in.getCoder(); if (!(inCoder instanceof IterableLikeCoder)) { throw new IllegalArgumentException( "expecting the input Coder to be an IterableLikeCoder"); } @SuppressWarnings("unchecked") Coder elemCoder = ((IterableLikeCoder) inCoder).getElemCoder(); return in.apply(ParDo.named("FlattenIterables").of( new DoFn, T>() { @Override public void processElement(ProcessContext c) { for (T i : c.element()) { c.output(i); } } })) .setCoder(elemCoder); } } ///////////////////////////////////////////////////////////////////////////// static { DirectPipelineRunner.registerDefaultTransformEvaluator( FlattenPCollectionList.class, new DirectPipelineRunner.TransformEvaluator() { @Override public void evaluate( FlattenPCollectionList transform, DirectPipelineRunner.EvaluationContext context) { evaluateHelper(transform, context); } }); } private static void evaluateHelper( FlattenPCollectionList transform, DirectPipelineRunner.EvaluationContext context) { List> outputElems = new ArrayList<>(); PCollectionList inputs = context.getInput(transform); for (PCollection input : inputs.getAll()) { outputElems.addAll(context.getPCollectionValuesWithMetadata(input)); } context.setPCollectionValuesWithMetadata(context.getOutput(transform), outputElems); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy