com.google.cloud.dataflow.sdk.runners.inprocess.KeyedPValueTrackingVisitor Maven / Gradle / Ivy
Show all versions of google-cloud-dataflow-java-sdk-all Show documentation
/*
* Copyright (C) 2016 Google Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package com.google.cloud.dataflow.sdk.runners.inprocess;
import static com.google.common.base.Preconditions.checkState;
import com.google.cloud.dataflow.sdk.Pipeline.PipelineVisitor;
import com.google.cloud.dataflow.sdk.runners.TransformTreeNode;
import com.google.cloud.dataflow.sdk.transforms.GroupByKey;
import com.google.cloud.dataflow.sdk.transforms.PTransform;
import com.google.cloud.dataflow.sdk.values.PValue;
import java.util.HashSet;
import java.util.Set;
/**
* A pipeline visitor that tracks all keyed {@link PValue PValues}. A {@link PValue} is keyed if it
* is the result of a {@link PTransform} that produces keyed outputs. A {@link PTransform} that
* produces keyed outputs is assumed to colocate output elements that share a key.
*
* All {@link GroupByKey} transforms, or their runner-specific implementation primitive, produce
* keyed output.
*/
// TODO: Handle Key-preserving transforms when appropriate and more aggressively make PTransforms
// unkeyed
class KeyedPValueTrackingVisitor implements PipelineVisitor {
@SuppressWarnings("rawtypes")
private final Set> producesKeyedOutputs;
private final Set keyedValues;
private boolean finalized;
public static KeyedPValueTrackingVisitor create(
@SuppressWarnings("rawtypes") Set> producesKeyedOutputs) {
return new KeyedPValueTrackingVisitor(producesKeyedOutputs);
}
private KeyedPValueTrackingVisitor(
@SuppressWarnings("rawtypes") Set> producesKeyedOutputs) {
this.producesKeyedOutputs = producesKeyedOutputs;
this.keyedValues = new HashSet<>();
}
@Override
public void enterCompositeTransform(TransformTreeNode node) {
checkState(
!finalized,
"Attempted to use a %s that has already been finalized on a pipeline (visiting node %s)",
KeyedPValueTrackingVisitor.class.getSimpleName(),
node);
}
@Override
public void leaveCompositeTransform(TransformTreeNode node) {
checkState(
!finalized,
"Attempted to use a %s that has already been finalized on a pipeline (visiting node %s)",
KeyedPValueTrackingVisitor.class.getSimpleName(),
node);
if (node.isRootNode()) {
finalized = true;
} else if (producesKeyedOutputs.contains(node.getTransform().getClass())) {
keyedValues.addAll(node.getExpandedOutputs());
}
}
@Override
public void visitTransform(TransformTreeNode node) {}
@Override
public void visitValue(PValue value, TransformTreeNode producer) {
if (producesKeyedOutputs.contains(producer.getTransform().getClass())) {
keyedValues.addAll(value.expand());
}
}
public Set getKeyedPValues() {
checkState(
finalized, "can't call getKeyedPValues before a Pipeline has been completely traversed");
return keyedValues;
}
}