dev.responsive.kafka.api.async.AsyncProcessorSupplier Maven / Gradle / Ivy
/*
* Copyright 2024 Responsive Computing, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dev.responsive.kafka.api.async;
import static dev.responsive.kafka.api.async.internals.AsyncProcessor.createAsyncProcessor;
import static dev.responsive.kafka.api.async.internals.AsyncUtils.initializeAsyncBuilders;
import dev.responsive.kafka.api.async.internals.AsyncProcessor;
import dev.responsive.kafka.api.async.internals.stores.AbstractAsyncStoreBuilder;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.kafka.common.header.Headers;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.processor.ConnectedStoreProvider;
import org.apache.kafka.streams.processor.api.FixedKeyProcessorSupplier;
import org.apache.kafka.streams.processor.api.Processor;
import org.apache.kafka.streams.processor.api.ProcessorContext;
import org.apache.kafka.streams.processor.api.ProcessorSupplier;
import org.apache.kafka.streams.state.KeyValueStore;
import org.apache.kafka.streams.state.StoreBuilder;
/**
* Instructions:
* 1) Simply wrap your regular {@link ProcessorSupplier} or {@link FixedKeyProcessorSupplier}
* in the async supplier by passing it into the static constructor for the corresponding
* async processor supplier class, ie
* {@link #createAsyncProcessorSupplier(ProcessorSupplier)} or
* {@link AsyncFixedKeyProcessorSupplier#createAsyncProcessorSupplier(FixedKeyProcessorSupplier)}
* You can then turn on async processing by passing in the {@link AsyncProcessorSupplier}
* or {@link AsyncFixedKeyProcessorSupplier} to your application and
* substituting it into the topology wherever you were previously
* passing in a {@link ProcessorSupplier} or {@link FixedKeyProcessorSupplier}.
* The async framework will take care of the rest, and no further code changes are required to
* enable async processing!
* Please review the requirements and current limits for what kind of features
* and semantics are supported at this time. Contact us if you need something
* that is currently not compatible with async processing to discuss adding it
* to the framework.
*
*
*
* Requirements/Setup:
* 1) To use state stores within an async processor, you must connect the state stores via
* automatic connection. In other words, you must have your ProcessorSupplier override the
* {@link ConnectedStoreProvider#stores()} method and supply your store builders there,
* rather than connecting them to the processor manually via APIs like
* {@link StreamsBuilder#addStateStore(StoreBuilder)} (DSL) and
* {@link Topology#addStateStore} (PAPI)
* 2) As is the case with regular, non-async processors, it is strongly recommended to
* use only "safe forwarding" in your processor, as "unsafe forwarding" can break casuality
* and lead to unexpected results. In other words, you should avoid mutating the input record
* and forwarding the mutated record as the output. The "safe forwarding" method
* entails creating a new {@link org.apache.kafka.streams.processor.api.Record} object with
* the desired key, value, and timestamp, as well as making a copy of the input record's
* {@link Headers} if you use headers and need to modify them or do anything other than passing
* them as-is. Headers are inherently mutatable and do not protect their backing array by making
* copies of it in the constructor or anywhere else. Therefore, if you ever add, remove, or
* update the input record's Headers, you must protect them by making a copy of the backing
* array and then creating a new instance of {@link Headers} with the new clone of the array.
* See the {@link ProcessorContext} javadocs for more details and examples of safe vs
* unsafe forwarding techniques.
* 3) It is required to initialize any state stores connected to this processor inside of its
* {@link Processor#init(ProcessorContext)} method. Attempting to call
* {@link ProcessorContext#getStateStore(String)} after #init, for example inside the
* {@link Processor#process} method instead, will result in an exception being thrown.
*
*
*
* Current limitations:
* 0) Does not support read-your-write semantics WITHIN a single invocation of #process -- in
* other words, a #get after a #put on the same key is not necessarily guaranteed to return
* the value that was just inserted with #put, or include the new record in the results of
* a range scan.
* Note that this is only true within an invocation of #process: not from one invocation
* of #process to another, ie between different input records. The async processing framework
* guarantees that all previous input records of the same key will be processed in full
* before a new record with that key is picked up for async execution. This means any #put
* calls made while processing an input record at offset N will be reflected in the results
* of any #get calls on the same key when processing any input record at offset N + 1 or
* beyond.
* 1) Proceed with caution when using range queries or performing any operations that affect,
* or depend on, the results or state of input records associated with a different key
* than that of the record which was passed into the current iteration of #process.
* Cross-key range scans, for example, will not necessarily include all records with
* a lower offset that have a different key. These may behave in a non-deterministic fashion
* as input records are only guaranteed to be processed in offset order relative to other
* input records with the same key. Since by definition, records of different keys are
* processed in parallel during async processing, you may see unpredictable results when
* attempting any operation that affects/uses other keys (such as {@link KeyValueStore#range}
* 2) *Not compatible with punctuators or input records that originate in upstream punctuators
* 3) Key-Value stores only at this time: async window and session stores coming soon
* 4) Cannot be used for global state stores
* 5) Async processors with multiple state stores cannot have state stores of different types,
* ie they must all use the same type of keySerde and the same type of valueSerde (though
* the keySerde and valueSerde themselves can be any type and don't need to match each other)
* 6) Async processing will not be compatible with the new/upcoming "shareable state stores"
* feature -- an async processor must be the sole owner of any state stores connected
* to it. You can still access these stores from outside the async processor by using IQ,
* you just cannot access them from another processor or app by "sharing" them.
* 7) Only single stateful processors are supported at this time. In other words, you can only
* enable async processing for a single node in the topology, which must be expressed as a
* {@link Processor} and requires at least one state store be connected.
*/
public final class AsyncProcessorSupplier
implements ProcessorSupplier {
private final ProcessorSupplier userProcessorSupplier;
private final Map> asyncStoreBuilders;
/**
* Create an AsyncProcessorSupplier that wraps a custom {@link ProcessorSupplier}
* to enable async processing. If you have a fixed-key processor, use
* {@link AsyncFixedKeyProcessorSupplier#createAsyncProcessorSupplier} instead
*
* @param processorSupplier the {@link ProcessorSupplier} that returns a (new) instance
* of your custom {@link Processor} on each invocation of
* {@link ProcessorSupplier#get}
*/
@SuppressWarnings("checkstyle:linelength")
public static AsyncProcessorSupplier createAsyncProcessorSupplier(
final ProcessorSupplier processorSupplier
) {
return new AsyncProcessorSupplier<>(processorSupplier, processorSupplier.stores());
}
private AsyncProcessorSupplier(
final ProcessorSupplier userProcessorSupplier,
final Set> userStoreBuilders
) {
if (userStoreBuilders == null || userStoreBuilders.isEmpty()) {
throw new UnsupportedOperationException(
"Async processing currently requires at least one state store be "
+ "connected to the async processor, and that stores be connected "
+ "by implementing the #stores method in your processor supplier");
}
this.userProcessorSupplier = userProcessorSupplier;
this.asyncStoreBuilders = initializeAsyncBuilders(userStoreBuilders);
}
@Override
public AsyncProcessor get() {
return createAsyncProcessor(userProcessorSupplier.get(), asyncStoreBuilders);
}
@Override
public Set> stores() {
return new HashSet<>(asyncStoreBuilders.values());
}
}