All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.pincette.jes.util.Streams Maven / Gradle / Ivy

There is a newer version: 3.2.1
Show newest version
package net.pincette.jes.util;

import static java.lang.Runtime.getRuntime;
import static net.pincette.util.Util.tryToGetWithRethrow;
import static org.apache.kafka.streams.KafkaStreams.State.ERROR;
import static org.apache.kafka.streams.StreamsConfig.AT_LEAST_ONCE;
import static org.apache.kafka.streams.StreamsConfig.DEFAULT_DESERIALIZATION_EXCEPTION_HANDLER_CLASS_CONFIG;
import static org.apache.kafka.streams.StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG;
import static org.apache.kafka.streams.StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG;
import static org.apache.kafka.streams.StreamsConfig.PROCESSING_GUARANTEE_CONFIG;

import com.typesafe.config.Config;
import java.time.Duration;
import java.util.Optional;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;
import java.util.function.BiFunction;
import net.pincette.function.SideEffect;
import net.pincette.util.TimedCache;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.KeyValue;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.errors.LogAndContinueExceptionHandler;
import org.apache.kafka.streams.kstream.KStream;

/**
 * A few Kafka Streams utilities.
 *
 * @author Werner Donn\u00e9
 * @since 1.0
 */
public class Streams {
  private Streams() {}

  /**
   * Filters the stream leaving out duplicates according to the criterion
   * over the time window. The stream will be marked for re-partitioning, but the keys
   * won't change.
   *
   * @param stream the given stream.
   * @param criterion the given criterion.
   * @param window the time window to look at.
   * @param  the key type.
   * @param  the value type.
   * @param  the criterion type.
   * @return The filtered stream.
   * @since 1.0.2
   */
  public static  KStream duplicateFilter(
      final KStream stream, final BiFunction criterion, final Duration window) {
    final TimedCache cache = new TimedCache<>(window);

    return stream
        .filterNot((k, v) -> cache.get(criterion.apply(k, v)).isPresent())
        .map(
            (k, v) ->
                Optional.of(criterion.apply(k, v))
                    .map(
                        c ->
                            SideEffect.>run(() -> cache.put(c, c))
                                .andThenGet(() -> new KeyValue<>(k, v)))
                    .orElse(null));
  }

  private static Properties copy(final Properties properties) {
    final Properties result = new Properties();

    properties
        .stringPropertyNames()
        .forEach(name -> result.setProperty(name, properties.getProperty(name)));

    return result;
  }

  /**
   * Starting from path all underlying properties are collected in a properties object.
   * If there is structure in the underlying properties then they are flattened, resulting in
   * dot-separated keys.
   *
   * @param config the configuration object to read from.
   * @param path the path in the configuration from which to start reading.
   * @return The collected properties object.
   * @since 1.0
   */
  public static Properties fromConfig(final Config config, final String path) {
    return config.getConfig(path).entrySet().stream()
        .reduce(
            new Properties(),
            (p, e) -> {
              p.put(e.getKey(), e.getValue().unwrapped().toString());
              return p;
            },
            (p1, p2) -> p1);
  }

  /**
   * This starts the topology and waits for its completion. When the JVM is interrupted
   * the streams are closed.
   *
   * @param topology the topology to run.
   * @param properties the Kafka properties to run it with. The keys should be strings and the
   *     values JSON strings. In the KStreams and KTables the type for the values is always 
   *     javax.json.JsonObjectThe processing guarantee will be set to exactly once.
   * @return Indicates success of failure.
   * @see JsonObject
   * @since 1.0
   */
  public static boolean start(final Topology topology, final Properties properties) {
    final boolean[] error = new boolean[1];
    final CountDownLatch latch = new CountDownLatch(1);

    return tryToGetWithRethrow(
            () -> new KafkaStreams(topology, streamsConfig(properties)),
            streams -> {
              streams.setStateListener(
                  (newState, oldState) -> {
                    if (newState.equals(ERROR)) {
                      error[0] = true;
                      latch.countDown();
                    }
                  });

              getRuntime()
                  .addShutdownHook(
                      new Thread(
                          () -> {
                            streams.close();
                            latch.countDown();
                          }));

              streams.start();
              latch.await();

              return !error[0];
            })
        .orElse(false);
  }

  /**
   * Creates a new properties object with the default values for the streams configuration. It sets
   * the JSON serializer. The processing guarantee is set to "at least once". This is done only when
   * those properties are not yet present in kafkaConfig.
   *
   * @param kafkaConfig the given Kafka configuration.
   * @return The new configuration with the default values.
   * @see JsonSerde
   * @since 1.1
   */
  public static Properties streamsConfig(final Properties kafkaConfig) {
    final Properties result = copy(kafkaConfig);

    if (result.getProperty(DEFAULT_DESERIALIZATION_EXCEPTION_HANDLER_CLASS_CONFIG) == null) {
      result.put(
          DEFAULT_DESERIALIZATION_EXCEPTION_HANDLER_CLASS_CONFIG,
          LogAndContinueExceptionHandler.class);
    }

    if (result.getProperty(DEFAULT_KEY_SERDE_CLASS_CONFIG) == null) {
      result.put(DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
    }

    if (result.getProperty(DEFAULT_VALUE_SERDE_CLASS_CONFIG) == null) {
      result.put(DEFAULT_VALUE_SERDE_CLASS_CONFIG, JsonSerde.class);
    }

    if (result.getProperty(PROCESSING_GUARANTEE_CONFIG) == null) {
      result.put(PROCESSING_GUARANTEE_CONFIG, AT_LEAST_ONCE);
    }

    return result;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy