Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
Google Cloud Dataflow Java SDK provides a simple, Java-based
interface for processing virtually any size data using Google cloud
resources. This artifact includes entire Dataflow Java SDK.
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.util;
import com.google.cloud.dataflow.sdk.coders.Coder;
import com.google.cloud.dataflow.sdk.coders.IterableCoder;
import com.google.cloud.dataflow.sdk.options.PipelineOptions;
import com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner;
import com.google.cloud.dataflow.sdk.transforms.Aggregator;
import com.google.cloud.dataflow.sdk.transforms.Combine.CombineFn;
import com.google.cloud.dataflow.sdk.transforms.DoFn;
import com.google.cloud.dataflow.sdk.transforms.DoFn.RequiresWindowAccess;
import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindow;
import com.google.cloud.dataflow.sdk.transforms.windowing.GlobalWindows;
import com.google.cloud.dataflow.sdk.transforms.windowing.PaneInfo;
import com.google.cloud.dataflow.sdk.transforms.windowing.WindowFn;
import com.google.cloud.dataflow.sdk.util.DoFnRunners.OutputManager;
import com.google.cloud.dataflow.sdk.util.ExecutionContext.StepContext;
import com.google.cloud.dataflow.sdk.util.common.CounterSet;
import com.google.cloud.dataflow.sdk.util.state.StateInternals;
import com.google.cloud.dataflow.sdk.values.PCollectionView;
import com.google.cloud.dataflow.sdk.values.TupleTag;
import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.joda.time.Instant;
import org.joda.time.format.PeriodFormat;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
/**
* A base implementation of {@link DoFnRunner}.
*
*
Sub-classes should override {@link #invokeProcessElement}.
*/
public abstract class DoFnRunnerBase implements DoFnRunner {
/** The DoFn being run. */
public final DoFn fn;
/** The context used for running the DoFn. */
public final DoFnContext context;
protected DoFnRunnerBase(
PipelineOptions options,
DoFn fn,
SideInputReader sideInputReader,
OutputManager outputManager,
TupleTag mainOutputTag,
List> sideOutputTags,
StepContext stepContext,
CounterSet.AddCounterMutator addCounterMutator,
WindowingStrategy, ?> windowingStrategy) {
this.fn = fn;
this.context = new DoFnContext<>(
options,
fn,
sideInputReader,
outputManager,
mainOutputTag,
sideOutputTags,
stepContext,
addCounterMutator,
windowingStrategy == null ? null : windowingStrategy.getWindowFn());
}
/**
* An implementation of {@code OutputManager} using simple lists, for testing and in-memory
* contexts such as the {@link DirectPipelineRunner}.
*/
public static class ListOutputManager implements OutputManager {
private Map, List>> outputLists = Maps.newHashMap();
@Override
public void output(TupleTag tag, WindowedValue output) {
@SuppressWarnings({"rawtypes", "unchecked"})
List> outputList = (List) outputLists.get(tag);
if (outputList == null) {
outputList = Lists.newArrayList();
@SuppressWarnings({"rawtypes", "unchecked"})
List> untypedList = (List) outputList;
outputLists.put(tag, untypedList);
}
outputList.add(output);
}
public List> getOutput(TupleTag tag) {
// Safe cast by design, inexpressible in Java without rawtypes
@SuppressWarnings({"rawtypes", "unchecked"})
List> outputList = (List) outputLists.get(tag);
return (outputList != null) ? outputList : Collections.>emptyList();
}
}
@Override
public void startBundle() {
// This can contain user code. Wrap it in case it throws an exception.
try {
fn.startBundle(context);
} catch (Throwable t) {
// Exception in user code.
throw wrapUserCodeException(t);
}
}
@Override
public void processElement(WindowedValue elem) {
if (elem.getWindows().size() <= 1
|| (!RequiresWindowAccess.class.isAssignableFrom(fn.getClass())
&& context.sideInputReader.isEmpty())) {
invokeProcessElement(elem);
} else {
// We could modify the windowed value (and the processContext) to
// avoid repeated allocations, but this is more straightforward.
for (BoundedWindow window : elem.getWindows()) {
invokeProcessElement(WindowedValue.of(
elem.getValue(), elem.getTimestamp(), window, elem.getPane()));
}
}
}
/**
* Invokes {@link DoFn#processElement} after certain pre-processings has been done in
* {@link DoFnRunnerBase#processElement}.
*/
protected abstract void invokeProcessElement(WindowedValue elem);
@Override
public void finishBundle() {
// This can contain user code. Wrap it in case it throws an exception.
try {
fn.finishBundle(context);
} catch (Throwable t) {
// Exception in user code.
throw wrapUserCodeException(t);
}
}
/**
* A concrete implementation of {@code DoFn.Context} used for running a {@link DoFn}.
*
* @param the type of the DoFn's (main) input elements
* @param the type of the DoFn's (main) output elements
*/
private static class DoFnContext
extends DoFn.Context {
private static final int MAX_SIDE_OUTPUTS = 1000;
final PipelineOptions options;
final DoFn fn;
final SideInputReader sideInputReader;
final OutputManager outputManager;
final TupleTag mainOutputTag;
final StepContext stepContext;
final CounterSet.AddCounterMutator addCounterMutator;
final WindowFn, ?> windowFn;
/**
* The set of known output tags, some of which may be undeclared, so we can throw an
* exception when it exceeds {@link #MAX_SIDE_OUTPUTS}.
*/
private Set> outputTags;
public DoFnContext(PipelineOptions options,
DoFn fn,
SideInputReader sideInputReader,
OutputManager outputManager,
TupleTag mainOutputTag,
List> sideOutputTags,
StepContext stepContext,
CounterSet.AddCounterMutator addCounterMutator,
WindowFn, ?> windowFn) {
fn.super();
this.options = options;
this.fn = fn;
this.sideInputReader = sideInputReader;
this.outputManager = outputManager;
this.mainOutputTag = mainOutputTag;
this.outputTags = Sets.newHashSet();
outputTags.add(mainOutputTag);
for (TupleTag> sideOutputTag : sideOutputTags) {
outputTags.add(sideOutputTag);
}
this.stepContext = stepContext;
this.addCounterMutator = addCounterMutator;
this.windowFn = windowFn;
super.setupDelegateAggregators();
}
//////////////////////////////////////////////////////////////////////////////
@Override
public PipelineOptions getPipelineOptions() {
return options;
}
WindowedValue makeWindowedValue(
T output, Instant timestamp, Collection windows, PaneInfo pane) {
final Instant inputTimestamp = timestamp;
if (timestamp == null) {
timestamp = BoundedWindow.TIMESTAMP_MIN_VALUE;
}
if (windows == null) {
try {
// The windowFn can never succeed at accessing the element, so its type does not
// matter here
@SuppressWarnings("unchecked")
WindowFn