
com.google.cloud.dataflow.sdk.runners.DirectPipelineRunner Maven / Gradle / Ivy
/*
* Copyright (C) 2015 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.runners;
import static com.google.common.base.Preconditions.checkArgument;
import com.google.cloud.dataflow.sdk.Pipeline;
import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
import com.google.cloud.dataflow.sdk.PipelineResult;
import com.google.cloud.dataflow.sdk.coders.CannotProvideCoderException;
import com.google.cloud.dataflow.sdk.coders.Coder;
import com.google.cloud.dataflow.sdk.coders.ListCoder;
import com.google.cloud.dataflow.sdk.options.DirectPipelineOptions;
import com.google.cloud.dataflow.sdk.options.PipelineOptions;
import com.google.cloud.dataflow.sdk.options.PipelineOptions.CheckEnabled;
import com.google.cloud.dataflow.sdk.options.PipelineOptionsFactory;
import com.google.cloud.dataflow.sdk.options.PipelineOptionsValidator;
import com.google.cloud.dataflow.sdk.runners.dataflow.MapAggregatorValues;
import com.google.cloud.dataflow.sdk.transforms.Aggregator;
import com.google.cloud.dataflow.sdk.transforms.AppliedPTransform;
import com.google.cloud.dataflow.sdk.transforms.Combine;
import com.google.cloud.dataflow.sdk.transforms.Combine.KeyedCombineFn;
import com.google.cloud.dataflow.sdk.transforms.DoFn;
import com.google.cloud.dataflow.sdk.transforms.PTransform;
import com.google.cloud.dataflow.sdk.transforms.ParDo;
import com.google.cloud.dataflow.sdk.transforms.windowing.BoundedWindow;
import com.google.cloud.dataflow.sdk.util.AppliedCombineFn;
import com.google.cloud.dataflow.sdk.util.IOChannelUtils;
import com.google.cloud.dataflow.sdk.util.SerializableUtils;
import com.google.cloud.dataflow.sdk.util.TestCredential;
import com.google.cloud.dataflow.sdk.util.WindowedValue;
import com.google.cloud.dataflow.sdk.util.common.Counter;
import com.google.cloud.dataflow.sdk.util.common.CounterSet;
import com.google.cloud.dataflow.sdk.values.KV;
import com.google.cloud.dataflow.sdk.values.PCollection;
import com.google.cloud.dataflow.sdk.values.PCollectionList;
import com.google.cloud.dataflow.sdk.values.PCollectionView;
import com.google.cloud.dataflow.sdk.values.PInput;
import com.google.cloud.dataflow.sdk.values.POutput;
import com.google.cloud.dataflow.sdk.values.PValue;
import com.google.cloud.dataflow.sdk.values.TypedPValue;
import com.google.common.base.Function;
import com.google.common.collect.Lists;
import org.joda.time.Instant;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
/**
* Executes the operations in the pipeline directly, in this process, without
* any optimization. Useful for small local execution and tests.
*
* Throws an exception from {@link #run} if execution fails.
*
*
Permissions
* When reading from a Dataflow source or writing to a Dataflow sink using
* {@code DirectPipelineRunner}, the Cloud Platform account that you configured with the
* gcloud executable will need access to the
* corresponding source/sink.
*
* Please see Google Cloud
* Dataflow Security and Permissions for more details.
*/
@SuppressWarnings({"rawtypes", "unchecked"})
public class DirectPipelineRunner
extends PipelineRunner {
private static final Logger LOG = LoggerFactory.getLogger(DirectPipelineRunner.class);
/**
* A source of random data, which can be seeded if determinism is desired.
*/
private Random rand;
/**
* A map from PTransform class to the corresponding
* TransformEvaluator to use to evaluate that transform.
*
* A static map that contains system-wide defaults.
*/
private static Map defaultTransformEvaluators =
new HashMap<>();
/**
* A map from PTransform class to the corresponding
* TransformEvaluator to use to evaluate that transform.
*
* An instance map that contains bindings for this DirectPipelineRunner.
* Bindings in this map override those in the default map.
*/
private Map localTransformEvaluators =
new HashMap<>();
/**
* Records that instances of the specified PTransform class
* should be evaluated by default by the corresponding
* TransformEvaluator.
*/
public static >
void registerDefaultTransformEvaluator(
Class transformClass,
TransformEvaluator super TransformT> transformEvaluator) {
if (defaultTransformEvaluators.put(transformClass, transformEvaluator)
!= null) {
throw new IllegalArgumentException(
"defining multiple evaluators for " + transformClass);
}
}
/**
* Records that instances of the specified PTransform class
* should be evaluated by the corresponding TransformEvaluator.
* Overrides any bindings specified by
* {@link #registerDefaultTransformEvaluator}.
*/
public >
void registerTransformEvaluator(
Class transformClass,
TransformEvaluator transformEvaluator) {
if (localTransformEvaluators.put(transformClass, transformEvaluator)
!= null) {
throw new IllegalArgumentException(
"defining multiple evaluators for " + transformClass);
}
}
/**
* Returns the TransformEvaluator to use for instances of the
* specified PTransform class, or null if none registered.
*/
public >
TransformEvaluator getTransformEvaluator(Class transformClass) {
TransformEvaluator transformEvaluator =
localTransformEvaluators.get(transformClass);
if (transformEvaluator == null) {
transformEvaluator = defaultTransformEvaluators.get(transformClass);
}
return transformEvaluator;
}
/**
* Constructs a DirectPipelineRunner from the given options.
*/
public static DirectPipelineRunner fromOptions(PipelineOptions options) {
DirectPipelineOptions directOptions =
PipelineOptionsValidator.validate(DirectPipelineOptions.class, options);
LOG.debug("Creating DirectPipelineRunner");
return new DirectPipelineRunner(directOptions);
}
/**
* Constructs a runner with default properties for testing.
*
* @return The newly created runner.
*/
public static DirectPipelineRunner createForTest() {
DirectPipelineOptions options = PipelineOptionsFactory.as(DirectPipelineOptions.class);
options.setStableUniqueNames(CheckEnabled.ERROR);
options.setGcpCredential(new TestCredential());
return new DirectPipelineRunner(options);
}
/**
* Enable runtime testing to verify that all functions and {@link Coder}
* instances can be serialized.
*
* Enabled by default.
*
*
This method modifies the {@code DirectPipelineRunner} instance and
* returns itself.
*/
public DirectPipelineRunner withSerializabilityTesting(boolean enable) {
this.testSerializability = enable;
return this;
}
/**
* Enable runtime testing to verify that all values can be encoded.
*
*
Enabled by default.
*
*
This method modifies the {@code DirectPipelineRunner} instance and
* returns itself.
*/
public DirectPipelineRunner withEncodabilityTesting(boolean enable) {
this.testEncodability = enable;
return this;
}
/**
* Enable runtime testing to verify that functions do not depend on order
* of the elements.
*
*
This is accomplished by randomizing the order of elements.
*
*
Enabled by default.
*
*
This method modifies the {@code DirectPipelineRunner} instance and
* returns itself.
*/
public DirectPipelineRunner withUnorderednessTesting(boolean enable) {
this.testUnorderedness = enable;
return this;
}
@Override
public OutputT apply(
PTransform transform, InputT input) {
if (transform instanceof Combine.GroupedValues) {
return (OutputT) applyTestCombine((Combine.GroupedValues) transform, (PCollection) input);
} else {
return super.apply(transform, input);
}
}
private PCollection> applyTestCombine(
Combine.GroupedValues transform,
PCollection>> input) {
PCollection> output = input
.apply(ParDo.of(TestCombineDoFn.create(transform, input, testSerializability, rand)));
try {
output.setCoder(transform.getDefaultOutputCoder(input));
} catch (CannotProvideCoderException exc) {
// let coder inference occur later, if it can
}
return output;
}
/**
* The implementation may split the {@link KeyedCombineFn} into ADD, MERGE and EXTRACT phases (
* see {@code com.google.cloud.dataflow.sdk.runners.worker.CombineValuesFn}). In order to emulate
* this for the {@link DirectPipelineRunner} and provide an experience closer to the service, go
* through heavy serializability checks for the equivalent of the results of the ADD phase, but
* after the {@link com.google.cloud.dataflow.sdk.transforms.GroupByKey} shuffle, and the MERGE
* phase. Doing these checks ensure that not only is the accumulator coder serializable, but
* the accumulator coder can actually serialize the data in question.
*/
public static class TestCombineDoFn
extends DoFn>, KV> {
private static final long serialVersionUID = 0L;
private final KeyedCombineFn super K, ? super InputT, AccumT, OutputT> fn;
private final Coder accumCoder;
private final boolean testSerializability;
private final Random rand;
public static TestCombineDoFn create(
Combine.GroupedValues transform,
PCollection>> input,
boolean testSerializability,
Random rand) {
AppliedCombineFn super K, ? super InputT, ?, OutputT> fn = transform.getAppliedFn(
input.getPipeline().getCoderRegistry(), input.getCoder());
return new TestCombineDoFn(
fn.getFn(),
fn.getAccumulatorCoder(),
testSerializability,
rand);
}
public TestCombineDoFn(
KeyedCombineFn super K, ? super InputT, AccumT, OutputT> fn,
Coder accumCoder,
boolean testSerializability,
Random rand) {
this.fn = fn;
this.accumCoder = accumCoder;
this.testSerializability = testSerializability;
this.rand = rand;
// Check that this does not crash, specifically to catch anonymous CustomCoder subclasses.
this.accumCoder.getEncodingId();
}
@Override
public void processElement(ProcessContext c) throws Exception {
K key = c.element().getKey();
Iterable values = c.element().getValue();
List groupedPostShuffle =
ensureSerializableByCoder(ListCoder.of(accumCoder),
addInputsRandomly(fn, key, values, rand),
"After addInputs of KeyedCombineFn " + fn.toString());
AccumT merged =
ensureSerializableByCoder(accumCoder,
fn.mergeAccumulators(key, groupedPostShuffle),
"After mergeAccumulators of KeyedCombineFn " + fn.toString());
// Note: The serializability of KV is ensured by the
// runner itself, since it's a transform output.
c.output(KV.of(key, fn.extractOutput(key, merged)));
}
/**
* Create a random list of accumulators from the given list of values.
*
* Visible for testing purposes only.
*/
public static List addInputsRandomly(
KeyedCombineFn super K, ? super InputT, AccumT, ?> fn,
K key,
Iterable values,
Random random) {
List out = new ArrayList();
int i = 0;
AccumT accumulator = fn.createAccumulator(key);
boolean hasInput = false;
for (InputT value : values) {
accumulator = fn.addInput(key, accumulator, value);
hasInput = true;
// For each index i, flip a 1/2^i weighted coin for whether to
// create a new accumulator after index i is added, i.e. [0]
// is guaranteed, [1] is an even 1/2, [2] is 1/4, etc. The
// goal is to partition the inputs into accumulators, and make
// the accumulators potentially lumpy.
if (i == 0 || random.nextInt(1 << Math.min(i, 30)) == 0) {
out.add(accumulator);
accumulator = fn.createAccumulator(key);
hasInput = false;
}
i++;
}
if (hasInput) {
out.add(accumulator);
}
Collections.shuffle(out, random);
return out;
}
public T ensureSerializableByCoder(
Coder coder, T value, String errorContext) {
if (testSerializability) {
return SerializableUtils.ensureSerializableByCoder(
coder, value, errorContext);
}
return value;
}
}
@Override
public EvaluationResults run(Pipeline pipeline) {
LOG.info("Executing pipeline using the DirectPipelineRunner.");
Evaluator evaluator = new Evaluator(rand);
evaluator.run(pipeline);
// Log all counter values for debugging purposes.
for (Counter counter : evaluator.getCounters()) {
LOG.info("Final aggregator value: {}", counter);
}
LOG.info("Pipeline execution complete.");
return evaluator;
}
/**
* An evaluator of a PTransform.
*/
public interface TransformEvaluator {
public void evaluate(TransformT transform,
EvaluationContext context);
}
/**
* The interface provided to registered callbacks for interacting
* with the {@code DirectPipelineRunner}, including reading and writing the
* values of {@link PCollection}s and {@link PCollectionView}s.
*/
public interface EvaluationResults extends PipelineResult {
/**
* Retrieves the value of the given PCollection.
* Throws an exception if the PCollection's value hasn't already been set.
*/
List getPCollection(PCollection pc);
/**
* Retrieves the windowed value of the given PCollection.
* Throws an exception if the PCollection's value hasn't already been set.
*/
List> getPCollectionWindowedValues(PCollection pc);
/**
* Retrieves the values of each PCollection in the given
* PCollectionList. Throws an exception if the PCollectionList's
* value hasn't already been set.
*/
List> getPCollectionList(PCollectionList pcs);
/**
* Retrieves the values indicated by the given {@link PCollectionView}.
* Note that within the {@link com.google.cloud.dataflow.sdk.transforms.DoFn.Context}
* implementation a {@link PCollectionView} should convert from this representation to a
* suitable side input value.
*/
Iterable> getPCollectionView(PCollectionView view);
}
/**
* An immutable (value, timestamp) pair, along with other metadata necessary
* for the implementation of {@code DirectPipelineRunner}.
*/
public static class ValueWithMetadata {
/**
* Returns a new {@code ValueWithMetadata} with the {@code WindowedValue}.
* Key is null.
*/
public static ValueWithMetadata of(WindowedValue windowedValue) {
return new ValueWithMetadata<>(windowedValue, null);
}
/**
* Returns a new {@code ValueWithMetadata} with the implicit key associated
* with this value set. The key is the last key grouped by in the chain of
* productions that produced this element.
* These keys are used internally by {@link DirectPipelineRunner} for keeping
* persisted state separate across keys.
*/
public ValueWithMetadata withKey(Object key) {
return new ValueWithMetadata<>(windowedValue, key);
}
/**
* Returns a new {@code ValueWithMetadata} that is a copy of this one, but with
* a different value.
*/
public ValueWithMetadata withValue(T value) {
return new ValueWithMetadata(windowedValue.withValue(value), getKey());
}
/**
* Returns the {@code WindowedValue} associated with this element.
*/
public WindowedValue getWindowedValue() {
return windowedValue;
}
/**
* Returns the value associated with this element.
*
* @see #withValue
*/
public V getValue() {
return windowedValue.getValue();
}
/**
* Returns the timestamp associated with this element.
*/
public Instant getTimestamp() {
return windowedValue.getTimestamp();
}
/**
* Returns the collection of windows this element has been placed into. May
* be null if the {@code PCollection} this element is in has not yet been
* windowed.
*
* @see #getWindows()
*/
public Collection extends BoundedWindow> getWindows() {
return windowedValue.getWindows();
}
/**
* Returns the key associated with this element. May be null if the
* {@code PCollection} this element is in is not keyed.
*
* @see #withKey
*/
public Object getKey() {
return key;
}
////////////////////////////////////////////////////////////////////////////
private final Object key;
private final WindowedValue windowedValue;
private ValueWithMetadata(WindowedValue windowedValue,
Object key) {
this.windowedValue = windowedValue;
this.key = key;
}
}
/**
* The interface provided to registered callbacks for interacting
* with the {@code DirectPipelineRunner}, including reading and writing the
* values of {@link PCollection}s and {@link PCollectionView}s.
*/
public interface EvaluationContext extends EvaluationResults {
/**
* Returns the configured pipeline options.
*/
DirectPipelineOptions getPipelineOptions();
/**
* Returns the input of the currently being processed transform.
*/
InputT getInput(PTransform transform);
/**
* Returns the output of the currently being processed transform.
*/
OutputT getOutput(PTransform, OutputT> transform);
/**
* Sets the value of the given PCollection, where each element also has a timestamp
* and collection of windows.
* Throws an exception if the PCollection's value has already been set.
*/
void setPCollectionValuesWithMetadata(
PCollection pc, List> elements);
/**
* Sets the value of the given PCollection, where each element also has a timestamp
* and collection of windows.
* Throws an exception if the PCollection's value has already been set.
*/
void setPCollectionWindowedValue(PCollection pc, List> elements);
/**
* Shorthand for setting the value of a PCollection where the elements do not have
* timestamps or windows.
* Throws an exception if the PCollection's value has already been set.
*/
void setPCollection(PCollection pc, List elements);
/**
* Retrieves the value of the given PCollection, along with element metadata
* such as timestamps and windows.
* Throws an exception if the PCollection's value hasn't already been set.
*/
List> getPCollectionValuesWithMetadata(PCollection pc);
/**
* Sets the value associated with the given {@link PCollectionView}.
* Throws an exception if the {@link PCollectionView}'s value has already been set.
*/
void setPCollectionView(
PCollectionView pc,
Iterable> value);
/**
* Ensures that the element is encodable and decodable using the
* TypePValue's coder, by encoding it and decoding it, and
* returning the result.
*/
T ensureElementEncodable(TypedPValue pvalue, T element);
/**
* If the evaluation context is testing unorderedness,
* randomly permutes the order of the elements, in a
* copy if !inPlaceAllowed, and returns the permuted list,
* otherwise returns the argument unchanged.
*/
List randomizeIfUnordered(List elements,
boolean inPlaceAllowed);
/**
* If the evaluation context is testing serializability, ensures
* that the argument function is serializable and deserializable
* by encoding it and then decoding it, and returning the result.
* Otherwise returns the argument unchanged.
*/
FunctionT ensureSerializable(FunctionT fn);
/**
* If the evaluation context is testing serializability, ensures
* that the argument Coder is serializable and deserializable
* by encoding it and then decoding it, and returning the result.
* Otherwise returns the argument unchanged.
*/
Coder ensureCoderSerializable(Coder coder);
/**
* If the evaluation context is testing serializability, ensures
* that the given data is serializable and deserializable with the
* given Coder by encoding it and then decoding it, and returning
* the result. Otherwise returns the argument unchanged.
*
* Error context is prefixed to any thrown exceptions.
*/
T ensureSerializableByCoder(Coder coder,
T data, String errorContext);
/**
* Returns a mutator, which can be used to add additional counters to
* this EvaluationContext.
*/
CounterSet.AddCounterMutator getAddCounterMutator();
/**
* Gets the step name for this transform.
*/
public String getStepName(PTransform, ?> transform);
}
/////////////////////////////////////////////////////////////////////////////
class Evaluator implements PipelineVisitor, EvaluationContext {
/**
* A map from PTransform to the step name of that transform. This is the internal name for the
* transform (e.g. "s2").
*/
private final Map, String> stepNames = new HashMap<>();
private final Map store = new HashMap<>();
private final CounterSet counters = new CounterSet();
private AppliedPTransform, ?, ?> currentTransform;
private Map, Collection>> aggregatorSteps = null;
/**
* A map from PTransform to the full name of that transform. This is the user name of the
* transform (e.g. "RemoveDuplicates/Combine/GroupByKey").
*/
private final Map, String> fullNames = new HashMap<>();
private Random rand;
public Evaluator() {
this(new Random());
}
public Evaluator(Random rand) {
this.rand = rand;
}
public void run(Pipeline pipeline) {
pipeline.traverseTopologically(this);
aggregatorSteps = new AggregatorPipelineExtractor(pipeline).getAggregatorSteps();
}
@Override
public DirectPipelineOptions getPipelineOptions() {
return options;
}
@Override
public InputT getInput(PTransform transform) {
checkArgument(currentTransform != null && currentTransform.getTransform() == transform,
"can only be called with current transform");
return (InputT) currentTransform.getInput();
}
@Override
public OutputT getOutput(PTransform, OutputT> transform) {
checkArgument(currentTransform != null && currentTransform.getTransform() == transform,
"can only be called with current transform");
return (OutputT) currentTransform.getOutput();
}
@Override
public void enterCompositeTransform(TransformTreeNode node) {
}
@Override
public void leaveCompositeTransform(TransformTreeNode node) {
}
@Override
public void visitTransform(TransformTreeNode node) {
PTransform, ?> transform = node.getTransform();
fullNames.put(transform, node.getFullName());
TransformEvaluator evaluator =
getTransformEvaluator(transform.getClass());
if (evaluator == null) {
throw new IllegalStateException(
"no evaluator registered for " + transform);
}
LOG.debug("Evaluating {}", transform);
currentTransform = AppliedPTransform.of(
node.getFullName(), node.getInput(), node.getOutput(), (PTransform) transform);
evaluator.evaluate(transform, this);
currentTransform = null;
}
@Override
public void visitValue(PValue value, TransformTreeNode producer) {
LOG.debug("Checking evaluation of {}", value);
if (value.getProducingTransformInternal() == null) {
throw new RuntimeException(
"internal error: expecting a PValue " +
"to have a producingTransform");
}
if (!producer.isCompositeNode()) {
// Verify that primitive transform outputs are already computed.
getPValue(value);
}
}
/**
* Sets the value of the given PValue.
* Throws an exception if the PValue's value has already been set.
*/
void setPValue(PValue pvalue, Object contents) {
if (store.containsKey(pvalue)) {
throw new IllegalStateException(
"internal error: setting the value of " + pvalue +
" more than once");
}
store.put(pvalue, contents);
}
/**
* Retrieves the value of the given PValue.
* Throws an exception if the PValue's value hasn't already been set.
*/
Object getPValue(PValue pvalue) {
if (!store.containsKey(pvalue)) {
throw new IllegalStateException(
"internal error: getting the value of " + pvalue +
" before it has been computed");
}
return store.get(pvalue);
}
/**
* Convert a list of T to a list of {@code ValueWithMetadata}, with a timestamp of 0
* and null windows.
*/
List> toValueWithMetadata(List values) {
List> result = new ArrayList<>(values.size());
for (T value : values) {
result.add(ValueWithMetadata.of(WindowedValue.valueInGlobalWindow(value)));
}
return result;
}
/**
* Convert a list of {@code WindowedValue} to a list of {@code ValueWithMetadata}.
*/
List> toValueWithMetadataFromWindowedValue(
List> values) {
List> result = new ArrayList<>(values.size());
for (WindowedValue value : values) {
result.add(ValueWithMetadata.of(value));
}
return result;
}
@Override
public void setPCollection(PCollection pc, List elements) {
setPCollectionValuesWithMetadata(pc, toValueWithMetadata(elements));
}
@Override
public void setPCollectionWindowedValue(
PCollection pc, List> elements) {
setPCollectionValuesWithMetadata(pc, toValueWithMetadataFromWindowedValue(elements));
}
@Override
public void setPCollectionValuesWithMetadata(
PCollection pc, List> elements) {
LOG.debug("Setting {} = {}", pc, elements);
setPValue(pc, ensurePCollectionEncodable(pc, elements));
}
@Override
public void setPCollectionView(
PCollectionView view,
Iterable> value) {
LOG.debug("Setting {} = {}", view, value);
setPValue(view, value);
}
/**
* Retrieves the value of the given PCollection.
* Throws an exception if the PCollection's value hasn't already been set.
*/
@Override
public List getPCollection(PCollection pc) {
List result = new ArrayList<>();
for (ValueWithMetadata elem : getPCollectionValuesWithMetadata(pc)) {
result.add(elem.getValue());
}
return result;
}
@Override
public List> getPCollectionWindowedValues(PCollection pc) {
return Lists.transform(
getPCollectionValuesWithMetadata(pc),
new Function, WindowedValue>() {
@Override
public WindowedValue apply(ValueWithMetadata input) {
return input.getWindowedValue();
}});
}
@Override
public List> getPCollectionValuesWithMetadata(PCollection pc) {
List> elements = (List>) getPValue(pc);
elements = randomizeIfUnordered(elements, false /* not inPlaceAllowed */);
LOG.debug("Getting {} = {}", pc, elements);
return elements;
}
@Override
public List> getPCollectionList(PCollectionList pcs) {
List> elementsList = new ArrayList<>();
for (PCollection pc : pcs.getAll()) {
elementsList.add(getPCollection(pc));
}
return elementsList;
}
/**
* Retrieves the value indicated by the given {@link PCollectionView}.
* Note that within the {@link DoFnContext} a {@link PCollectionView}
* converts from this representation to a suitable side input value.
*/
@Override
public Iterable> getPCollectionView(PCollectionView view) {
Iterable> value = (Iterable>) getPValue(view);
LOG.debug("Getting {} = {}", view, value);
return value;
}
/**
* If testEncodability, ensures that the PCollection's coder and elements
* are encodable and decodable by encoding them and decoding them,
* and returning the result. Otherwise returns the argument elements.
*/
List> ensurePCollectionEncodable(
PCollection pc, List> elements) {
ensureCoderSerializable(pc.getCoder());
if (!testEncodability) {
return elements;
}
List> elementsCopy = new ArrayList<>(elements.size());
for (ValueWithMetadata element : elements) {
elementsCopy.add(
element.withValue(ensureElementEncodable(pc, element.getValue())));
}
return elementsCopy;
}
@Override
public T ensureElementEncodable(TypedPValue pvalue, T element) {
return ensureSerializableByCoder(
pvalue.getCoder(), element, "Within " + pvalue.toString());
}
@Override
public List randomizeIfUnordered(List elements,
boolean inPlaceAllowed) {
if (!testUnorderedness) {
return elements;
}
List elementsCopy = new ArrayList<>(elements);
Collections.shuffle(elementsCopy, rand);
return elementsCopy;
}
@Override
public FunctionT ensureSerializable(FunctionT fn) {
if (!testSerializability) {
return fn;
}
return SerializableUtils.ensureSerializable(fn);
}
@Override
public Coder ensureCoderSerializable(Coder coder) {
if (testSerializability) {
SerializableUtils.ensureSerializable(coder);
}
return coder;
}
@Override
public T ensureSerializableByCoder(
Coder coder, T value, String errorContext) {
if (testSerializability) {
return SerializableUtils.ensureSerializableByCoder(
coder, value, errorContext);
}
return value;
}
@Override
public CounterSet.AddCounterMutator getAddCounterMutator() {
return counters.getAddCounterMutator();
}
@Override
public String getStepName(PTransform, ?> transform) {
String stepName = stepNames.get(transform);
if (stepName == null) {
stepName = "s" + (stepNames.size() + 1);
stepNames.put(transform, stepName);
}
return stepName;
}
/**
* Returns the CounterSet generated during evaluation, which includes
* user-defined Aggregators and may include system-defined counters.
*/
public CounterSet getCounters() {
return counters;
}
/**
* Returns JobState.DONE in all situations. The Evaluator is not returned
* until the pipeline has been traversed, so it will either be returned
* after a successful run or the run call will terminate abnormally.
*/
@Override
public State getState() {
return State.DONE;
}
@Override
public AggregatorValues getAggregatorValues(Aggregator, T> aggregator) {
Map stepValues = new HashMap<>();
for (PTransform, ?> step : aggregatorSteps.get(aggregator)) {
String stepName = String.format("user-%s-%s", stepNames.get(step), aggregator.getName());
String fullName = fullNames.get(step);
Counter> counter = counters.getExistingCounter(stepName);
if (counter == null) {
throw new IllegalArgumentException(
"Aggregator " + aggregator + " is not used in this pipeline");
}
stepValues.put(fullName, (T) counter.getAggregate());
}
return new MapAggregatorValues<>(stepValues);
}
}
/////////////////////////////////////////////////////////////////////////////
private final DirectPipelineOptions options;
private boolean testSerializability = true;
private boolean testEncodability = true;
private boolean testUnorderedness = true;
/** Returns a new DirectPipelineRunner. */
private DirectPipelineRunner(DirectPipelineOptions options) {
this.options = options;
// (Re-)register standard IO factories. Clobbers any prior credentials.
IOChannelUtils.registerStandardIOFactories(options);
long randomSeed;
if (options.getDirectPipelineRunnerRandomSeed() != null) {
randomSeed = options.getDirectPipelineRunnerRandomSeed();
} else {
randomSeed = new Random().nextLong();
}
LOG.debug("DirectPipelineRunner using random seed {}.", randomSeed);
rand = new Random(randomSeed);
}
public DirectPipelineOptions getPipelineOptions() {
return options;
}
@Override
public String toString() {
return "DirectPipelineRunner#" + hashCode();
}
}