Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.spotify.scio.grpc.GrpcBatchDoFn Maven / Gradle / Ivy
/*
* Copyright 2023 Spotify AB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.spotify.scio.grpc;
import static java.util.Objects.requireNonNull;
import com.google.common.base.Preconditions;
import com.google.common.util.concurrent.ListenableFuture;
import com.spotify.scio.grpc.GrpcDoFn.ChannelSupplier;
import com.spotify.scio.transforms.BaseAsyncLookupDoFn;
import com.spotify.scio.transforms.BaseAsyncLookupDoFn.CacheSupplier;
import com.spotify.scio.transforms.GuavaAsyncBatchLookupDoFn;
import io.grpc.Channel;
import io.grpc.stub.AbstractStub;
import java.io.Serializable;
import java.util.List;
import org.apache.beam.sdk.transforms.SerializableBiFunction;
import org.apache.beam.sdk.transforms.SerializableFunction;
import org.apache.commons.lang3.tuple.Pair;
/**
* DoFn that makes API calls that can be batched and individually cached over a managed GRPC
* channel.
*
* @param input element type.
* @param batched input element type
* @param batched output element type
* @param client lookup value type.
* @param client type.
*/
public class GrpcBatchDoFn<
Input, BatchRequest, BatchResponse, Output, Client extends AbstractStub>
extends GuavaAsyncBatchLookupDoFn {
private final ChannelSupplier channelSupplier;
private final SerializableFunction newClientFn;
private final SerializableBiFunction>
lookupFn;
public GrpcBatchDoFn(
ChannelSupplier channelSupplier,
SerializableFunction newClientFn,
int batchSize,
SerializableFunction, BatchRequest> batchRequestFn,
SerializableFunction>> batchResponseFn,
SerializableFunction idExtractorFn,
SerializableBiFunction> lookupFn,
int maxPendingRequests) {
super(batchSize, batchRequestFn, batchResponseFn, idExtractorFn, maxPendingRequests);
this.channelSupplier = channelSupplier;
this.newClientFn = newClientFn;
this.lookupFn = lookupFn;
}
public GrpcBatchDoFn(
ChannelSupplier channelSupplier,
SerializableFunction newClientFn,
int batchSize,
SerializableFunction, BatchRequest> batchRequestFn,
SerializableFunction>> batchResponseFn,
SerializableFunction idExtractorFn,
SerializableBiFunction> lookupFn,
int maxPendingRequests,
CacheSupplier cacheSupplier) {
super(
batchSize,
batchRequestFn,
batchResponseFn,
idExtractorFn,
maxPendingRequests,
cacheSupplier);
this.channelSupplier = channelSupplier;
this.newClientFn = newClientFn;
this.lookupFn = lookupFn;
}
@Override
public ResourceType getResourceType() {
return ResourceType.PER_INSTANCE;
}
@Override
public ListenableFuture asyncLookup(Client client, BatchRequest request) {
return lookupFn.apply(client, request);
}
@Override
protected Client newClient() {
return newClientFn.apply(channelSupplier.get());
}
public static <
Input, BatchRequest, BatchResponse, Output, ClientType extends AbstractStub>
Builder newBuilder() {
return new Builder<>();
}
public static class Builder<
Input, BatchRequest, BatchResponse, Output, ClientType extends AbstractStub>
implements Serializable {
private ChannelSupplier channelSupplier;
private SerializableFunction newClientFn;
private SerializableBiFunction>
lookupFn;
private SerializableFunction, BatchRequest> batchRequestFn;
private SerializableFunction>> batchResponseFn;
private SerializableFunction idExtractorFn;
private int maxPendingRequests = GrpcDoFn.DEFAULT_MAX_PENDING_REQUESTS;
private Integer batchSize;
private CacheSupplier cacheSupplier =
new BaseAsyncLookupDoFn.NoOpCacheSupplier<>();
/**
* Sets the {@link ChannelSupplier} for creating gRPC channels.
*
* @param channelSupplier The {@link ChannelSupplier} to use for creating gRPC channels.
* @return The updated {@link Builder} instance.
*/
public Builder withChannelSupplier(
ChannelSupplier channelSupplier) {
this.channelSupplier = channelSupplier;
return this;
}
/**
* Sets a new client function. This method takes a {@link SerializableFunction} that creates a
* gRPC async stub of type {@code } from the provided {@link Channel}. The new
* client function will be used to create the client for making gRPC requests.
*
* @param newClientFn The {@link SerializableFunction} that creates the gRPC async stub.
* @return The updated {@link Builder} instance.
*/
public Builder withNewClientFn(
SerializableFunction newClientFn) {
this.newClientFn = newClientFn;
return this;
}
/**
* Sets the lookup function to be used for performing batch requests. This provided {@link
* SerializableBiFunction} should take a gRPC {@code } and a {@code }
* as input and returns a {@link ListenableFuture} of a {@code }.
*
* @param lookupFn The lookup function to be used for performing batch requests.
* @return The updated {@link Builder} instance.
*/
public Builder withLookupFn(
SerializableBiFunction>
lookupFn) {
this.lookupFn = lookupFn;
return this;
}
/**
* Sets the batch request function. This method takes a {@link SerializableFunction} that takes
* a {@link List} of {@code } objects representing the elements that will go into the
* {@code }. The {@link SerializableFunction} should return the {@code
* } that will be sent via gRPC.
*
* @param batchRequestFn The batch request function.
* @return The updated {@link Builder} instance.
*/
public Builder withBatchRequestFn(
SerializableFunction, BatchRequest> batchRequestFn) {
this.batchRequestFn = batchRequestFn;
return this;
}
/**
* Sets the batch response function.
* The batch response function is a {@link SerializableFunction} that takes the
* {@code } coming from the gRPC endpoint and returns a {@link List} of a
* {@link Pair} containing the ID of the {@code } and the corresponding {@code }.
* The ID returned by the function must match the ID of the {@code } that resulted in
* that {@code .
* If the ID returned does not match any {@code } ID, the pipeline will fail.
*
* @param batchResponseFn The batch response function to be set.
* @return The updated {@link Builder} instance.
*/
public Builder withBatchResponseFn(
SerializableFunction>> batchResponseFn) {
this.batchResponseFn = batchResponseFn;
return this;
}
/**
* Sets the ID extractor function. The ID extractor function is a {@link SerializableFunction}
* that takes a single {@code } as a parameter and returns a String. The returned {@link
* String} represents a unique ID identifying the {@code }. This ID is used to match the
* {@code } inside the {@code }. Additionally, it is passed to the {@link
* com.spotify.scio.util.Cache} as the key to if a {@link CacheSupplier} is provided.
*
* @param idExtractorFn the ID extractor function to set
* @return The updated {@link Builder} instance.
*/
public Builder withIdExtractorFn(
SerializableFunction idExtractorFn) {
this.idExtractorFn = idExtractorFn;
return this;
}
/**
* Sets the maximum number of pending requests allowed. This number represents the maximum
* number of parallel batch requests that can be created per DoFn instance.
*
* @param maxPendingRequests The maximum number of pending requests for the batch processing.
* @return The updated {@link Builder} instance.
*/
public Builder withMaxPendingRequests(
int maxPendingRequests) {
Preconditions.checkArgument(maxPendingRequests > 0, "maxPendingRequests must be positive");
this.maxPendingRequests = maxPendingRequests;
return this;
}
/**
* Sets the batch size for batching elements. The batch size determines the maximum number of
* elements that can be batched into a single {@code }. Batches are created from
* the bundle elements, and we do not batch across bundles.
*
* @param batchSize The batch size to set.
* @return The updated {@link Builder} instance.
*/
public Builder withBatchSize(
int batchSize) {
Preconditions.checkArgument(batchSize > 0, "batchSize must be positive");
this.batchSize = batchSize;
return this;
}
/**
* Sets the cache supplier for the Builder. This method allows you to set a {@link
* CacheSupplier} that is capable of supplying a {@link com.spotify.scio.util.Cache} of type
* {@link String} and {@code }. Where the {@link String} is the ID returned from the
* IdExtractorFn and is matched to a specific {@code } from the {@code }.
*
* @param cacheSupplier the {@link CacheSupplier} to set for the Builder
* @return The updated {@link Builder} instance.
*/
public Builder withCacheSupplier(
CacheSupplier cacheSupplier) {
this.cacheSupplier = cacheSupplier;
return this;
}
public GrpcBatchDoFn build() {
requireNonNull(channelSupplier, "channelSupplier must not be null");
requireNonNull(newClientFn, "newClientFn must not be null");
requireNonNull(lookupFn, "lookupFn must not be null");
requireNonNull(batchRequestFn, "batchRequestFn must not be null");
requireNonNull(batchResponseFn, "batchResponseFn must not be null");
requireNonNull(idExtractorFn, "idExtractorFn must not be null");
requireNonNull(batchSize, "batchSize must not be null");
requireNonNull(cacheSupplier, "cacheSupplier must not be null");
return new GrpcBatchDoFn<>(
channelSupplier,
newClientFn,
batchSize,
batchRequestFn,
batchResponseFn,
idExtractorFn,
lookupFn,
maxPendingRequests,
cacheSupplier);
}
}
}