All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.dataflow.sdk.io.Read Maven / Gradle / Ivy

Go to download

Google Cloud Dataflow Java SDK provides a simple, Java-based interface for processing virtually any size data using Google cloud resources. This artifact includes entire Dataflow Java SDK.

There is a newer version: 2.5.0
Show newest version
/*
 * Copyright (C) 2015 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package com.google.cloud.dataflow.sdk.io;

import static com.google.cloud.dataflow.sdk.util.StringUtils.approximateSimpleName;

import com.google.cloud.dataflow.sdk.coders.Coder;
import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
import com.google.cloud.dataflow.sdk.transforms.PTransform;
import com.google.cloud.dataflow.sdk.transforms.display.DisplayData;
import com.google.cloud.dataflow.sdk.util.SerializableUtils;
import com.google.cloud.dataflow.sdk.util.WindowedValue;
import com.google.cloud.dataflow.sdk.util.WindowingStrategy;
import com.google.cloud.dataflow.sdk.values.PCollection;
import com.google.cloud.dataflow.sdk.values.PCollection.IsBounded;
import com.google.cloud.dataflow.sdk.values.PInput;

import org.joda.time.Duration;

import java.util.ArrayList;
import java.util.List;

import javax.annotation.Nullable;

/**
 * A {@link PTransform} for reading from a {@link Source}.
 *
 * 

Usage example: *

 * Pipeline p = Pipeline.create();
 * p.apply(Read.from(new MySource().withFoo("foo").withBar("bar"))
 *             .named("foobar"));
 * 
*/ public class Read { /** * Returns a new {@code Read} {@code PTransform} builder with the given name. */ public static Builder named(String name) { return new Builder(name); } /** * Returns a new {@code Read.Bounded} {@code PTransform} reading from the given * {@code BoundedSource}. */ public static Bounded from(BoundedSource source) { return new Bounded<>(null, source); } /** * Returns a new {@code Read.Unbounded} {@code PTransform} reading from the given * {@code UnboundedSource}. */ public static Unbounded from(UnboundedSource source) { return new Unbounded<>(null, source); } /** * Helper class for building {@code Read} transforms. */ public static class Builder { private final String name; private Builder(String name) { this.name = name; } /** * Returns a new {@code Read.Bounded} {@code PTransform} reading from the given * {@code BoundedSource}. */ public Bounded from(BoundedSource source) { return new Bounded<>(name, source); } /** * Returns a new {@code Read.Unbounded} {@code PTransform} reading from the given * {@code UnboundedSource}. */ public Unbounded from(UnboundedSource source) { return new Unbounded<>(name, source); } } /** * {@link PTransform} that reads from a {@link BoundedSource}. */ public static class Bounded extends PTransform> { private final BoundedSource source; private Bounded(@Nullable String name, BoundedSource source) { super(name); this.source = SerializableUtils.ensureSerializable(source); } /** * Returns a new {@code Bounded} {@code PTransform} that's like this one but * has the given name. * *

Does not modify this object. */ public Bounded named(String name) { return new Bounded(name, source); } @Override protected Coder getDefaultOutputCoder() { return source.getDefaultOutputCoder(); } @Override public final PCollection apply(PInput input) { source.validate(); return PCollection.createPrimitiveOutputInternal(input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.BOUNDED) .setCoder(getDefaultOutputCoder()); } /** * Returns the {@code BoundedSource} used to create this {@code Read} {@code PTransform}. */ public BoundedSource getSource() { return source; } @Override public String getKindString() { return "Read(" + approximateSimpleName(source.getClass()) + ")"; } @Override public void populateDisplayData(DisplayData.Builder builder) { super.populateDisplayData(builder); builder .add(DisplayData.item("source", source.getClass()) .withLabel("Read Source")) .include(source); } static { registerDefaultTransformEvaluator(); } @SuppressWarnings({"rawtypes", "unchecked"}) private static void registerDefaultTransformEvaluator() { DirectPipelineRunner.registerDefaultTransformEvaluator( Bounded.class, new DirectPipelineRunner.TransformEvaluator() { @Override public void evaluate( Bounded transform, DirectPipelineRunner.EvaluationContext context) { evaluateReadHelper(transform, context); } private void evaluateReadHelper( Read.Bounded transform, DirectPipelineRunner.EvaluationContext context) { try { List> output = new ArrayList<>(); BoundedSource source = transform.getSource(); try (BoundedSource.BoundedReader reader = source.createReader(context.getPipelineOptions())) { for (boolean available = reader.start(); available; available = reader.advance()) { output.add( DirectPipelineRunner.ValueWithMetadata.of( WindowedValue.timestampedValueInGlobalWindow( reader.getCurrent(), reader.getCurrentTimestamp()))); } } context.setPCollectionValuesWithMetadata(context.getOutput(transform), output); } catch (Exception e) { throw new RuntimeException(e); } } }); } } /** * {@link PTransform} that reads from a {@link UnboundedSource}. */ public static class Unbounded extends PTransform> { private final UnboundedSource source; private Unbounded(@Nullable String name, UnboundedSource source) { super(name); this.source = SerializableUtils.ensureSerializable(source); } /** * Returns a new {@code Unbounded} {@code PTransform} that's like this one but * has the given name. * *

Does not modify this object. */ public Unbounded named(String name) { return new Unbounded(name, source); } /** * Returns a new {@link BoundedReadFromUnboundedSource} that reads a bounded amount * of data from the given {@link UnboundedSource}. The bound is specified as a number * of records to read. * *

This may take a long time to execute if the splits of this source are slow to read * records. */ public BoundedReadFromUnboundedSource withMaxNumRecords(long maxNumRecords) { return new BoundedReadFromUnboundedSource(source, maxNumRecords, null); } /** * Returns a new {@link BoundedReadFromUnboundedSource} that reads a bounded amount * of data from the given {@link UnboundedSource}. The bound is specified as an amount * of time to read for. Each split of the source will read for this much time. */ public BoundedReadFromUnboundedSource withMaxReadTime(Duration maxReadTime) { return new BoundedReadFromUnboundedSource(source, Long.MAX_VALUE, maxReadTime); } @Override protected Coder getDefaultOutputCoder() { return source.getDefaultOutputCoder(); } @Override public final PCollection apply(PInput input) { source.validate(); return PCollection.createPrimitiveOutputInternal( input.getPipeline(), WindowingStrategy.globalDefault(), IsBounded.UNBOUNDED); } /** * Returns the {@code UnboundedSource} used to create this {@code Read} {@code PTransform}. */ public UnboundedSource getSource() { return source; } @Override public String getKindString() { return "Read(" + approximateSimpleName(source.getClass()) + ")"; } @Override public void populateDisplayData(DisplayData.Builder builder) { super.populateDisplayData(builder); builder .add(DisplayData.item("source", source.getClass()) .withLabel("Read Source")) .include(source); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy