All Downloads are FREE. Search and download functionalities are using the official Maven repository.

dev.responsive.kafka.api.async.AsyncProcessorSupplier Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2024 Responsive Computing, Inc.
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package dev.responsive.kafka.api.async;

import static dev.responsive.kafka.api.async.internals.AsyncProcessor.createAsyncProcessor;
import static dev.responsive.kafka.api.async.internals.AsyncUtils.initializeAsyncBuilders;

import dev.responsive.kafka.api.async.internals.AsyncProcessor;
import dev.responsive.kafka.api.async.internals.stores.AbstractAsyncStoreBuilder;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import org.apache.kafka.common.header.Headers;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.processor.ConnectedStoreProvider;
import org.apache.kafka.streams.processor.api.FixedKeyProcessorSupplier;
import org.apache.kafka.streams.processor.api.Processor;
import org.apache.kafka.streams.processor.api.ProcessorContext;
import org.apache.kafka.streams.processor.api.ProcessorSupplier;
import org.apache.kafka.streams.state.KeyValueStore;
import org.apache.kafka.streams.state.StoreBuilder;

/**
 * Instructions:
 * 1) Simply wrap your regular {@link ProcessorSupplier} or {@link FixedKeyProcessorSupplier}
 *    in the async supplier by passing it into the static constructor for the corresponding
 *    async processor supplier class, ie
 *    {@link #createAsyncProcessorSupplier(ProcessorSupplier)} or
 *    {@link AsyncFixedKeyProcessorSupplier#createAsyncProcessorSupplier(FixedKeyProcessorSupplier)}
 *    You can then turn on async processing by passing in the {@link AsyncProcessorSupplier}
 *    or {@link AsyncFixedKeyProcessorSupplier} to your application and
 *    substituting it into the topology wherever you were previously
 *    passing in a {@link ProcessorSupplier} or {@link FixedKeyProcessorSupplier}.
 *    The async framework will take care of the rest, and no further code changes are required to
 *    enable async processing!
 *    Please review the requirements and current limits for what kind of features
 *    and semantics are supported at this time. Contact us if you need something
 *    that is currently not compatible with async processing to discuss adding it
 *    to the framework.
 *
 * 

* * Requirements/Setup: * 1) To use state stores within an async processor, you must connect the state stores via * automatic connection. In other words, you must have your ProcessorSupplier override the * {@link ConnectedStoreProvider#stores()} method and supply your store builders there, * rather than connecting them to the processor manually via APIs like * {@link StreamsBuilder#addStateStore(StoreBuilder)} (DSL) and * {@link Topology#addStateStore} (PAPI) * 2) As is the case with regular, non-async processors, it is strongly recommended to * use only "safe forwarding" in your processor, as "unsafe forwarding" can break casuality * and lead to unexpected results. In other words, you should avoid mutating the input record * and forwarding the mutated record as the output. The "safe forwarding" method * entails creating a new {@link org.apache.kafka.streams.processor.api.Record} object with * the desired key, value, and timestamp, as well as making a copy of the input record's * {@link Headers} if you use headers and need to modify them or do anything other than passing * them as-is. Headers are inherently mutatable and do not protect their backing array by making * copies of it in the constructor or anywhere else. Therefore, if you ever add, remove, or * update the input record's Headers, you must protect them by making a copy of the backing * array and then creating a new instance of {@link Headers} with the new clone of the array. * See the {@link ProcessorContext} javadocs for more details and examples of safe vs * unsafe forwarding techniques. * 3) It is required to initialize any state stores connected to this processor inside of its * {@link Processor#init(ProcessorContext)} method. Attempting to call * {@link ProcessorContext#getStateStore(String)} after #init, for example inside the * {@link Processor#process} method instead, will result in an exception being thrown. * *

* * Current limitations: * 0) Does not support read-your-write semantics WITHIN a single invocation of #process -- in * other words, a #get after a #put on the same key is not necessarily guaranteed to return * the value that was just inserted with #put, or include the new record in the results of * a range scan. * Note that this is only true within an invocation of #process: not from one invocation * of #process to another, ie between different input records. The async processing framework * guarantees that all previous input records of the same key will be processed in full * before a new record with that key is picked up for async execution. This means any #put * calls made while processing an input record at offset N will be reflected in the results * of any #get calls on the same key when processing any input record at offset N + 1 or * beyond. * 1) Proceed with caution when using range queries or performing any operations that affect, * or depend on, the results or state of input records associated with a different key * than that of the record which was passed into the current iteration of #process. * Cross-key range scans, for example, will not necessarily include all records with * a lower offset that have a different key. These may behave in a non-deterministic fashion * as input records are only guaranteed to be processed in offset order relative to other * input records with the same key. Since by definition, records of different keys are * processed in parallel during async processing, you may see unpredictable results when * attempting any operation that affects/uses other keys (such as {@link KeyValueStore#range} * 2) *Not compatible with punctuators or input records that originate in upstream punctuators * 3) Key-Value stores only at this time: async window and session stores coming soon * 4) Cannot be used for global state stores * 5) Async processors with multiple state stores cannot have state stores of different types, * ie they must all use the same type of keySerde and the same type of valueSerde (though * the keySerde and valueSerde themselves can be any type and don't need to match each other) * 6) Async processing will not be compatible with the new/upcoming "shareable state stores" * feature -- an async processor must be the sole owner of any state stores connected * to it. You can still access these stores from outside the async processor by using IQ, * you just cannot access them from another processor or app by "sharing" them. * 7) Only single stateful processors are supported at this time. In other words, you can only * enable async processing for a single node in the topology, which must be expressed as a * {@link Processor} and requires at least one state store be connected. */ public final class AsyncProcessorSupplier implements ProcessorSupplier { private final ProcessorSupplier userProcessorSupplier; private final Map> asyncStoreBuilders; /** * Create an AsyncProcessorSupplier that wraps a custom {@link ProcessorSupplier} * to enable async processing. If you have a fixed-key processor, use * {@link AsyncFixedKeyProcessorSupplier#createAsyncProcessorSupplier} instead * * @param processorSupplier the {@link ProcessorSupplier} that returns a (new) instance * of your custom {@link Processor} on each invocation of * {@link ProcessorSupplier#get} */ @SuppressWarnings("checkstyle:linelength") public static AsyncProcessorSupplier createAsyncProcessorSupplier( final ProcessorSupplier processorSupplier ) { return new AsyncProcessorSupplier<>(processorSupplier, processorSupplier.stores()); } private AsyncProcessorSupplier( final ProcessorSupplier userProcessorSupplier, final Set> userStoreBuilders ) { if (userStoreBuilders == null || userStoreBuilders.isEmpty()) { throw new UnsupportedOperationException( "Async processing currently requires at least one state store be " + "connected to the async processor, and that stores be connected " + "by implementing the #stores method in your processor supplier"); } this.userProcessorSupplier = userProcessorSupplier; this.asyncStoreBuilders = initializeAsyncBuilders(userStoreBuilders); } @Override public AsyncProcessor get() { return createAsyncProcessor(userProcessorSupplier.get(), asyncStoreBuilders); } @Override public Set> stores() { return new HashSet<>(asyncStoreBuilders.values()); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy