All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.dataflow.sdk.transforms.MapElements Maven / Gradle / Ivy

Go to download

Google Cloud Dataflow Java SDK provides a simple, Java-based interface for processing virtually any size data using Google cloud resources. This artifact includes entire Dataflow Java SDK.

There is a newer version: 2.5.0
Show newest version
/*
 * Copyright (C) 2015 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.google.cloud.dataflow.sdk.transforms;

import com.google.cloud.dataflow.sdk.transforms.display.DisplayData;
import com.google.cloud.dataflow.sdk.values.PCollection;
import com.google.cloud.dataflow.sdk.values.TypeDescriptor;

/**
 * {@code PTransform}s for mapping a simple function over the elements of a {@link PCollection}.
 */
public class MapElements
extends PTransform, PCollection> {

  /**
   * For a {@code SerializableFunction} {@code fn} and output type descriptor,
   * returns a {@code PTransform} that takes an input {@code PCollection} and returns
   * a {@code PCollection} containing {@code fn.apply(v)} for every element {@code v} in
   * the input.
   *
   * 

Example of use in Java 8: *

{@code
   * PCollection wordLengths = words.apply(
   *     MapElements.via((String word) -> word.length())
   *         .withOutputType(new TypeDescriptor() {});
   * }
* *

In Java 7, the overload {@link #via(SimpleFunction)} is more concise as the output type * descriptor need not be provided. */ public static MissingOutputTypeDescriptor via(SerializableFunction fn) { return new MissingOutputTypeDescriptor<>(fn); } /** * For a {@code SimpleFunction} {@code fn}, returns a {@code PTransform} that * takes an input {@code PCollection} and returns a {@code PCollection} * containing {@code fn.apply(v)} for every element {@code v} in the input. * *

This overload is intended primarily for use in Java 7. In Java 8, the overload * {@link #via(SerializableFunction)} supports use of lambda for greater concision. * *

Example of use in Java 7: *

{@code
   * PCollection words = ...;
   * PCollection wordsPerLine = words.apply(MapElements.via(
   *     new SimpleFunction() {
   *       public Integer apply(String word) {
   *         return word.length();
   *       }
   *     }));
   * }
*/ public static MapElements via(final SimpleFunction fn) { return new MapElements<>(fn, fn.getOutputTypeDescriptor()); } /** * An intermediate builder for a {@link MapElements} transform. To complete the transform, provide * an output type descriptor to {@link MissingOutputTypeDescriptor#withOutputType}. See * {@link #via(SerializableFunction)} for a full example of use. */ public static final class MissingOutputTypeDescriptor { private final SerializableFunction fn; private MissingOutputTypeDescriptor(SerializableFunction fn) { this.fn = fn; } public MapElements withOutputType(TypeDescriptor outputType) { return new MapElements<>(fn, outputType); } } /////////////////////////////////////////////////////////////////// private final SerializableFunction fn; private final transient TypeDescriptor outputType; private MapElements( SerializableFunction fn, TypeDescriptor outputType) { this.fn = fn; this.outputType = outputType; } @Override public PCollection apply(PCollection input) { return input.apply(ParDo.named("Map").of(new DoFn() { @Override public void processElement(ProcessContext c) { c.output(fn.apply(c.element())); } @Override public void populateDisplayData(DisplayData.Builder builder) { MapElements.this.populateDisplayData(builder); } })).setTypeDescriptorInternal(outputType); } @Override public void populateDisplayData(DisplayData.Builder builder) { super.populateDisplayData(builder); builder.add(DisplayData.item("mapFn", fn.getClass()) .withLabel("Map Function")); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy