org.apache.kafka.streams.kstream.Transformer Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.kafka.streams.kstream;
import java.time.Duration;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.processor.ProcessorContext;
import org.apache.kafka.streams.processor.PunctuationType;
import org.apache.kafka.streams.processor.Punctuator;
import org.apache.kafka.streams.processor.StateStore;
import org.apache.kafka.streams.processor.To;
/**
* The {@code Transformer} interface is for stateful mapping of an input record to zero, one, or multiple new output
* records (both key and value type can be altered arbitrarily).
* This is a stateful record-by-record operation, i.e, {@link #transform(Object, Object)} is invoked individually for
* each record of a stream and can access and modify a state that is available beyond a single call of
* {@link #transform(Object, Object)} (cf. {@link KeyValueMapper} for stateless record transformation).
* Additionally, this {@code Transformer} can {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule}
* a method to be {@link Punctuator#punctuate(long) called periodically} with the provided context.
*
* Use {@link TransformerSupplier} to provide new instances of {@code Transformer} to Kafka Stream's runtime.
*
* If only a record's value should be modified {@link ValueTransformer} can be used.
*
* @param key type
* @param value type
* @param {@link KeyValue} return type (both key and value type can be set
* arbitrarily)
* @see TransformerSupplier
* @see KStream#transform(TransformerSupplier, String...)
* @see ValueTransformer
* @see KStream#map(KeyValueMapper)
* @see KStream#flatMap(KeyValueMapper)
*/
public interface Transformer {
/**
* Initialize this transformer.
* This is called once per instance when the topology gets initialized.
* When the framework is done with the transformer, {@link #close()} will be called on it; the
* framework may later re-use the transformer by calling {@link #init(ProcessorContext)} again.
*
* The provided {@link ProcessorContext context} can be used to access topology and record meta data, to
* {@link ProcessorContext#schedule(Duration, PunctuationType, Punctuator) schedule} a method to be
* {@link Punctuator#punctuate(long) called periodically} and to access attached {@link StateStore}s.
*
* Note, that {@link ProcessorContext} is updated in the background with the current record's meta data.
* Thus, it only contains valid record meta data when accessed within {@link #transform(Object, Object)}.
*
* @param context the context
*/
void init(final ProcessorContext context);
/**
* Transform the record with the given key and value.
* Additionally, any {@link StateStore state} that is {@link KStream#transform(TransformerSupplier, String...)
* attached} to this operator can be accessed and modified
* arbitrarily (cf. {@link ProcessorContext#getStateStore(String)}).
*
* If only one record should be forward downstream, {@code transform} can return a new {@link KeyValue}. If
* more than one output record should be forwarded downstream, {@link ProcessorContext#forward(Object, Object)}
* and {@link ProcessorContext#forward(Object, Object, To)} can be used.
* If no record should be forwarded downstream, {@code transform} can return {@code null}.
*
* Note that returning a new {@link KeyValue} is merely for convenience. The same can be achieved by using
* {@link ProcessorContext#forward(Object, Object)} and returning {@code null}.
*
* @param key the key for the record
* @param value the value for the record
* @return new {@link KeyValue} pair—if {@code null} no key-value pair will
* be forwarded to down stream
*/
R transform(final K key, final V value);
/**
* Close this transformer and clean up any resources. The framework may
* later re-use this transformer by calling {@link #init(ProcessorContext)} on it again.
*
* To generate new {@link KeyValue} pairs {@link ProcessorContext#forward(Object, Object)} and
* {@link ProcessorContext#forward(Object, Object, To)} can be used.
*/
void close();
}