org.apache.beam.runners.fnexecution.control.SdkHarnessClient Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.beam.runners.fnexecution.control;
import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkArgument;
import static org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.base.Preconditions.checkState;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletionStage;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Phaser;
import java.util.concurrent.atomic.AtomicBoolean;
import org.apache.beam.model.fnexecution.v1.BeamFnApi;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionRequest;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.InstructionResponse;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleDescriptor;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressRequest;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleProgressResponse;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleRequest;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleResponse;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleSplitRequest;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleSplitRequest.DesiredSplit;
import org.apache.beam.model.fnexecution.v1.BeamFnApi.ProcessBundleSplitResponse;
import org.apache.beam.model.pipeline.v1.Endpoints;
import org.apache.beam.runners.fnexecution.control.ProcessBundleDescriptors.TimerSpec;
import org.apache.beam.runners.fnexecution.data.FnDataService;
import org.apache.beam.runners.fnexecution.data.RemoteInputDestination;
import org.apache.beam.runners.fnexecution.state.StateDelegator;
import org.apache.beam.runners.fnexecution.state.StateRequestHandler;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.fn.IdGenerator;
import org.apache.beam.sdk.fn.IdGenerators;
import org.apache.beam.sdk.fn.data.BeamFnDataInboundObserver;
import org.apache.beam.sdk.fn.data.BeamFnDataOutboundAggregator;
import org.apache.beam.sdk.fn.data.CloseableFnDataReceiver;
import org.apache.beam.sdk.fn.data.DataEndpoint;
import org.apache.beam.sdk.fn.data.FnDataReceiver;
import org.apache.beam.sdk.fn.data.LogicalEndpoint;
import org.apache.beam.sdk.fn.data.TimerEndpoint;
import org.apache.beam.sdk.util.MoreFutures;
import org.apache.beam.sdk.util.construction.Timer;
import org.apache.beam.sdk.values.KV;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A high-level client for an SDK harness.
*
* This provides a Java-friendly wrapper around {@link InstructionRequestHandler} and {@link
* CloseableFnDataReceiver}, which handle lower-level gRPC message wrangling.
*/
@SuppressWarnings({
"rawtypes", // TODO(https://github.com/apache/beam/issues/20447)
"nullness" // TODO(https://github.com/apache/beam/issues/20497)
})
public class SdkHarnessClient implements AutoCloseable {
private static final Logger LOG = LoggerFactory.getLogger(SdkHarnessClient.class);
/**
* A processor capable of creating bundles for some registered {@link ProcessBundleDescriptor}.
*/
public class BundleProcessor {
private final ProcessBundleDescriptor processBundleDescriptor;
private final List remoteInputs;
private final Map> timerSpecs;
private final StateDelegator stateDelegator;
private BundleProcessor(
ProcessBundleDescriptor processBundleDescriptor,
List remoteInputs,
Map> timerSpecs,
StateDelegator stateDelegator) {
this.processBundleDescriptor = processBundleDescriptor;
this.remoteInputs = remoteInputs;
this.timerSpecs = timerSpecs;
this.stateDelegator = stateDelegator;
}
/**
* Start a new bundle for the given {@link BeamFnApi.ProcessBundleDescriptor} identifier.
*
* The input channels for the returned {@link ActiveBundle} are derived from the instructions
* in the {@link BeamFnApi.ProcessBundleDescriptor}.
*
*
NOTE: It is important to {@link #close()} each bundle after all elements are emitted.
*
*
{@code
* try (ActiveBundle bundle = SdkHarnessClient.newBundle(...)) {
* FnDataReceiver inputReceiver =
* (FnDataReceiver) bundle.getInputReceivers().get(mainPCollectionId);
* // send all main input elements ...
* }
* }
*
* An exception during {@link #close()} will be thrown if the bundle requests finalization or
* attempts to checkpoint by returning a {@link BeamFnApi.DelayedBundleApplication}.
*/
public ActiveBundle newBundle(
Map> outputReceivers,
BundleProgressHandler progressHandler) {
return newBundle(
outputReceivers,
request -> {
throw new UnsupportedOperationException(
String.format(
"The %s does not have a registered state handler.",
ActiveBundle.class.getSimpleName()));
},
progressHandler);
}
/**
* Start a new bundle for the given {@link BeamFnApi.ProcessBundleDescriptor} identifier.
*
* The input channels for the returned {@link ActiveBundle} are derived from the instructions
* in the {@link BeamFnApi.ProcessBundleDescriptor}.
*
*
NOTE: It is important to {@link #close()} each bundle after all elements are emitted.
*
*
{@code
* try (ActiveBundle bundle = SdkHarnessClient.newBundle(...)) {
* FnDataReceiver inputReceiver =
* (FnDataReceiver) bundle.getInputReceivers().get(mainPCollectionId);
* // send all main input elements ...
* }
* }
*
* An exception during {@link #close()} will be thrown if the bundle requests finalization or
* attempts to checkpoint by returning a {@link BeamFnApi.DelayedBundleApplication}.
*/
public ActiveBundle newBundle(
Map> outputReceivers,
StateRequestHandler stateRequestHandler,
BundleProgressHandler progressHandler) {
return newBundle(
outputReceivers,
Collections.emptyMap(),
stateRequestHandler,
progressHandler,
BundleSplitHandler.unsupported(),
request -> {
throw new UnsupportedOperationException(
String.format(
"The %s does not have a registered bundle checkpoint handler.",
ActiveBundle.class.getSimpleName()));
},
bundleId -> {
throw new UnsupportedOperationException(
String.format(
"The %s does not have a registered bundle finalization handler.",
ActiveBundle.class.getSimpleName()));
});
}
/**
* Start a new bundle for the given {@link BeamFnApi.ProcessBundleDescriptor} identifier.
*
* The input channels for the returned {@link ActiveBundle} are derived from the instructions
* in the {@link BeamFnApi.ProcessBundleDescriptor}.
*
*
NOTE: It is important to {@link #close()} each bundle after all elements are emitted.
*
*
{@code
* try (ActiveBundle bundle = SdkHarnessClient.newBundle(...)) {
* FnDataReceiver inputReceiver =
* (FnDataReceiver) bundle.getInputReceivers().get(mainPCollectionId);
* // send all main input elements ...
* }
* }
*
* An exception during {@link #close()} will be thrown if the bundle requests finalization if
* {@link BundleFinalizationHandler} is {@code null} or attempts to checkpoint by returning a
* {@link BeamFnApi.DelayedBundleApplication} .
*/
public ActiveBundle newBundle(
Map> outputReceivers,
Map, RemoteOutputReceiver>> timerReceivers,
StateRequestHandler stateRequestHandler,
BundleProgressHandler progressHandler,
BundleFinalizationHandler finalizationHandler,
BundleCheckpointHandler checkpointHandler) {
return newBundle(
outputReceivers,
timerReceivers,
stateRequestHandler,
progressHandler,
BundleSplitHandler.unsupported(),
checkpointHandler == null
? request -> {
throw new UnsupportedOperationException(
String.format(
"The %s does not have a registered bundle checkpoint handler.",
ActiveBundle.class.getSimpleName()));
}
: checkpointHandler,
finalizationHandler == null
? bundleId -> {
throw new UnsupportedOperationException(
String.format(
"The %s does not have a registered bundle finalization handler.",
ActiveBundle.class.getSimpleName()));
}
: finalizationHandler);
}
/**
* Start a new bundle for the given {@link BeamFnApi.ProcessBundleDescriptor} identifier.
*
* The input channels for the returned {@link ActiveBundle} are derived from the instructions
* in the {@link BeamFnApi.ProcessBundleDescriptor}.
*
*
NOTE: It is important to {@link #close()} each bundle after all elements are emitted.
*
*
{@code
* try (ActiveBundle bundle = SdkHarnessClient.newBundle(...)) {
* FnDataReceiver inputReceiver =
* (FnDataReceiver) bundle.getInputReceivers().get(mainPCollectionId);
* // send all elements ...
* }
* }
*/
public ActiveBundle newBundle(
Map> outputReceivers,
Map, RemoteOutputReceiver>> timerReceivers,
StateRequestHandler stateRequestHandler,
BundleProgressHandler progressHandler,
BundleSplitHandler splitHandler,
BundleCheckpointHandler checkpointHandler,
BundleFinalizationHandler finalizationHandler) {
String bundleId = idGenerator.getId();
final CompletionStage genericResponse =
fnApiControlClient.handle(
BeamFnApi.InstructionRequest.newBuilder()
.setInstructionId(bundleId)
.setProcessBundle(
BeamFnApi.ProcessBundleRequest.newBuilder()
.setProcessBundleDescriptorId(processBundleDescriptor.getId())
.addAllCacheTokens(stateRequestHandler.getCacheTokens()))
.build());
LOG.debug(
"Sent {} with ID {} for {} with ID {}",
ProcessBundleRequest.class.getSimpleName(),
bundleId,
ProcessBundleDescriptor.class.getSimpleName(),
processBundleDescriptor.getId());
CompletionStage specificResponse =
genericResponse.thenApply(InstructionResponse::getProcessBundle);
Optional beamFnDataInboundObserver;
if (outputReceivers.isEmpty() && timerReceivers.isEmpty()) {
beamFnDataInboundObserver = Optional.empty();
} else {
List> dataEndpoints = new ArrayList<>(outputReceivers.size());
for (Map.Entry> receiver : outputReceivers.entrySet()) {
dataEndpoints.add(
DataEndpoint.create(
receiver.getKey(),
(Coder