org.apache.kafka.streams.kstream.BranchedKStream Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import java.util.Map;
/**
* Branches the records in the original stream based on the predicates supplied for the branch definitions.
*
* Branches are defined with {@link BranchedKStream#branch(Predicate, Branched)} or
* {@link BranchedKStream#defaultBranch(Branched)} methods. Each record is evaluated against the {@code predicate}
* supplied via {@link Branched} parameters, and is routed to the first branch for which its respective predicate
* evaluates to {@code true}. If a record does not match any predicates, it will be routed to the default branch,
* or dropped if no default branch is created.
*
* Each branch (which is a {@link KStream} instance) then can be processed either by
* a {@link java.util.function.Function} or a {@link java.util.function.Consumer} provided via a {@link Branched}
* parameter. If certain conditions are met, it also can be accessed from the {@link Map} returned by an optional
* {@link BranchedKStream#defaultBranch(Branched)} or {@link BranchedKStream#noDefaultBranch()} method call
* (see usage examples).
*
* The branching happens on a first-match basis: A record in the original stream is assigned to the corresponding result
* stream for the first predicate that evaluates to {@code true}, and is assigned to this stream only. If you need
* to route a record to multiple streams, you can apply multiple {@link KStream#filter(Predicate)} operators
* to the same {@link KStream} instance, one for each predicate, instead of branching.
*
* The process of routing the records to different branches is a stateless record-by-record operation.
*
*
Rules of forming the resulting map
* The keys of the {@code Map>} entries returned by {@link BranchedKStream#defaultBranch(Branched)} or
* {@link BranchedKStream#noDefaultBranch()} are defined by the following rules:
*
* - If {@link Named} parameter was provided for {@link KStream#split(Named)}, its value is used as
* a prefix for each key. By default, no prefix is used
*
- If a branch name is provided in {@link BranchedKStream#branch(Predicate, Branched)} via the
* {@link Branched} parameter, its value is appended to the prefix to form the {@code Map} key
*
- If a name is not provided for the branch, then the key defaults to {@code prefix + position} of the branch
* as a decimal number, starting from {@code "1"}
*
- If a name is not provided for the {@link BranchedKStream#defaultBranch()}, then the key defaults
* to {@code prefix + "0"}
*
* The values of the respective {@code Map>} entries are formed as following:
*
* - If no chain function or consumer is provided in {@link BranchedKStream#branch(Predicate, Branched)} via
* the {@link Branched} parameter, then the branch itself is added to the {@code Map}
*
- If chain function is provided and it returns a non-null value for a given branch, then the value
* is the result returned by this function
*
- If a chain function returns {@code null} for a given branch, then no entry is added to the map
*
- If a consumer is provided for a given branch, then no entry is added to the map
*
* For example:
* {@code
* Map> result =
* source.split(Named.as("foo-"))
* .branch(predicate1, Branched.as("bar")) // "foo-bar"
* .branch(predicate2, Branched.withConsumer(ks->ks.to("A")) // no entry: a Consumer is provided
* .branch(predicate3, Branched.withFunction(ks->null)) // no entry: chain function returns null
* .branch(predicate4, Branched.withFunction(ks->ks)) // "foo-4": chain function returns non-null value
* .branch(predicate5) // "foo-5": name defaults to the branch position
* .defaultBranch() // "foo-0": "0" is the default name for the default branch
* }
*
* Usage examples
*
* Direct Branch Consuming
* In many cases we do not need to have a single scope for all the branches, each branch being processed completely
* independently of others. Then we can use 'consuming' lambdas or method references in {@link Branched} parameter:
*
* {@code
* source.split()
* .branch(predicate1, Branched.withConsumer(ks -> ks.to("A")))
* .branch(predicate2, Branched.withConsumer(ks -> ks.to("B")))
* .defaultBranch(Branched.withConsumer(ks->ks.to("C")));
* }
*
* Collecting branches in a single scope
* In other cases we want to combine branches again after splitting. The map returned by
* {@link BranchedKStream#defaultBranch()} or {@link BranchedKStream#noDefaultBranch()} methods provides
* access to all the branches in the same scope:
*
* {@code
* Map> branches = source.split(Named.as("split-"))
* .branch((key, value) -> value == null, Branched.withFunction(s -> s.mapValues(v->"NULL"), "null")
* .defaultBranch(Branched.as("non-null"));
*
* KStream merged = branches.get("split-non-null").merge(branches.get("split-null"));
* }
*
* Dynamic branching
* There is also a case when we might need to create branches dynamically, e.g. one per enum value:
*
* {@code
* BranchedKStream branched = stream.split();
* for (RecordType recordType : RecordType.values())
* branched.branch((k, v) -> v.getRecType() == recordType,
* Branched.withConsumer(recordType::processRecords));
* }
*
* @param Type of keys
* @param Type of values
*
* @see KStream
*/
public interface BranchedKStream {
/**
* Define a branch for records that match the predicate.
*
* @param predicate
* A {@link Predicate} instance, against which each record will be evaluated.
* If this predicate returns {@code true} for a given record, the record will be
* routed to the current branch and will not be evaluated against the predicates
* for the remaining branches.
*
* @return {@code this} to facilitate method chaining
*/
BranchedKStream branch(Predicate super K, ? super V> predicate);
/**
* Define a branch for records that match the predicate.
*
* @param predicate
* A {@link Predicate} instance, against which each record will be evaluated.
* If this predicate returns {@code true} for a given record, the record will be
* routed to the current branch and will not be evaluated against the predicates
* for the remaining branches.
* @param branched
* A {@link Branched} parameter, that allows to define a branch name, an in-place
* branch consumer or branch mapper (see code examples
* for {@link BranchedKStream})
*
* @return {@code this} to facilitate method chaining
*/
BranchedKStream branch(Predicate super K, ? super V> predicate, Branched branched);
/**
* Finalize the construction of branches and defines the default branch for the messages not intercepted
* by other branches. Calling {@code defaultBranch} or {@link #noDefaultBranch()} is optional.
*
* @return {@link Map} of named branches. For rules of forming the resulting map, see {@code BranchedKStream}
* description.
*/
Map> defaultBranch();
/**
* Finalize the construction of branches and defines the default branch for the messages not intercepted
* by other branches. Calling {@code defaultBranch} or {@link #noDefaultBranch()} is optional.
*
* @param branched
* A {@link Branched} parameter, that allows to define a branch name, an in-place
* branch consumer or branch mapper (see code examples
* for {@link BranchedKStream})
*
* @return {@link Map} of named branches. For rules of forming the resulting map, see {@link BranchedKStream}
* description.
*/
Map> defaultBranch(Branched branched);
/**
* Finalize the construction of branches without forming a default branch. Calling {@code #noDefaultBranch()}
* or {@link #defaultBranch()} is optional.
*
* @return {@link Map} of named branches. For rules of forming the resulting map, see {@link BranchedKStream}
* description.
*/
Map> noDefaultBranch();
}