com.google.cloud.dataflow.sdk.transforms.MapElements Maven / Gradle / Ivy
Show all versions of google-cloud-dataflow-java-sdk-all Show documentation
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.transforms;
import com.google.cloud.dataflow.sdk.transforms.display.DisplayData;
import com.google.cloud.dataflow.sdk.values.PCollection;
import com.google.cloud.dataflow.sdk.values.TypeDescriptor;
/**
* {@code PTransform}s for mapping a simple function over the elements of a {@link PCollection}.
*/
public class MapElements
extends PTransform, PCollection> {
/**
* For a {@code SerializableFunction} {@code fn} and output type descriptor,
* returns a {@code PTransform} that takes an input {@code PCollection} and returns
* a {@code PCollection} containing {@code fn.apply(v)} for every element {@code v} in
* the input.
*
* Example of use in Java 8:
*
{@code
* PCollection wordLengths = words.apply(
* MapElements.via((String word) -> word.length())
* .withOutputType(new TypeDescriptor() {});
* }
*
* In Java 7, the overload {@link #via(SimpleFunction)} is more concise as the output type
* descriptor need not be provided.
*/
public static MissingOutputTypeDescriptor
via(SerializableFunction fn) {
return new MissingOutputTypeDescriptor<>(fn);
}
/**
* For a {@code SimpleFunction} {@code fn}, returns a {@code PTransform} that
* takes an input {@code PCollection} and returns a {@code PCollection}
* containing {@code fn.apply(v)} for every element {@code v} in the input.
*
* This overload is intended primarily for use in Java 7. In Java 8, the overload
* {@link #via(SerializableFunction)} supports use of lambda for greater concision.
*
*
Example of use in Java 7:
*
{@code
* PCollection words = ...;
* PCollection wordsPerLine = words.apply(MapElements.via(
* new SimpleFunction() {
* public Integer apply(String word) {
* return word.length();
* }
* }));
* }
*/
public static MapElements
via(final SimpleFunction fn) {
return new MapElements<>(fn, fn.getOutputTypeDescriptor());
}
/**
* An intermediate builder for a {@link MapElements} transform. To complete the transform, provide
* an output type descriptor to {@link MissingOutputTypeDescriptor#withOutputType}. See
* {@link #via(SerializableFunction)} for a full example of use.
*/
public static final class MissingOutputTypeDescriptor {
private final SerializableFunction fn;
private MissingOutputTypeDescriptor(SerializableFunction fn) {
this.fn = fn;
}
public MapElements withOutputType(TypeDescriptor outputType) {
return new MapElements<>(fn, outputType);
}
}
///////////////////////////////////////////////////////////////////
private final SerializableFunction fn;
private final transient TypeDescriptor outputType;
private MapElements(
SerializableFunction fn,
TypeDescriptor outputType) {
this.fn = fn;
this.outputType = outputType;
}
@Override
public PCollection apply(PCollection input) {
return input.apply(ParDo.named("Map").of(new DoFn() {
@Override
public void processElement(ProcessContext c) {
c.output(fn.apply(c.element()));
}
@Override
public void populateDisplayData(DisplayData.Builder builder) {
MapElements.this.populateDisplayData(builder);
}
})).setTypeDescriptorInternal(outputType);
}
@Override
public void populateDisplayData(DisplayData.Builder builder) {
super.populateDisplayData(builder);
builder.add(DisplayData.item("mapFn", fn.getClass())
.withLabel("Map Function"));
}
}