All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.atleon.core.DeduplicatingTransformer Maven / Gradle / Ivy

package io.atleon.core;

import org.reactivestreams.Publisher;
import reactor.core.publisher.Flux;
import reactor.core.publisher.GroupedFlux;
import reactor.core.publisher.Mono;
import reactor.core.scheduler.Scheduler;
import reactor.core.scheduler.Schedulers;

import java.util.List;
import java.util.function.BinaryOperator;
import java.util.function.Function;

final class DeduplicatingTransformer implements Function, Publisher> {

    private final DeduplicationConfig config;

    private final Deduplicator deduplicator;

    private final Scheduler sourceScheduler;

    private DeduplicatingTransformer(
        DeduplicationConfig config,
        Deduplicator deduplicator,
        Scheduler sourceScheduler
    ) {
        this.config = config;
        this.deduplicator = deduplicator;
        this.sourceScheduler = sourceScheduler;
    }

    static  DeduplicatingTransformer
    identity(DeduplicationConfig config, Deduplication deduplication, Scheduler sourceScheduler) {
        return new DeduplicatingTransformer<>(config, Deduplicator.identity(deduplication), sourceScheduler);
    }

    static  DeduplicatingTransformer>
    alo(DeduplicationConfig config, Deduplication deduplication, Scheduler sourceScheduler) {
        return new DeduplicatingTransformer<>(config, Deduplicator.alo(deduplication), sourceScheduler);
    }

    @Override
    public Publisher apply(Publisher publisher) {
        // When enabled, apply transformation on occurrence of the first signal. This is needed (in
        // comparison to a simple transform) to avoid changing the initial downstream subscription
        // thread which would otherwise be switched due to required subscribeOn on in the transform
        return config.isEnabled()
            ? Flux.from(publisher).switchOnFirst((signal, flux) -> flux.transform(this::applyDeduplication))
            : publisher;
    }

    private Flux applyDeduplication(Publisher publisher) {
        // - Use Scheduler with single worker for publishing, buffering, and subscribing
        //   (https://github.com/reactor/reactor-core/issues/2352)
        // - Each deduplication key gets its own Group
        // - Buffer max in-flight groups bounded in Duration and size
        Scheduler scheduler = Schedulers.single(sourceScheduler);
        return Flux.from(publisher)
            .publishOn(scheduler, config.getDeduplicationSourcePrefetch())
            .groupBy(deduplicator::extractKey)
            .flatMap(groupedFlux -> deduplicateGroup(groupedFlux, scheduler), config.getDeduplicationConcurrency())
            .subscribeOn(scheduler);
    }

    private Mono deduplicateGroup(GroupedFlux groupedFlux, Scheduler scheduler) {
        return groupedFlux.take(config.getDeduplicationDuration(), scheduler)
            .take(config.getMaxDeduplicationSize())
            .collectList()
            .map(deduplicator::deduplicate);
    }

    private static final class Deduplicator {

        private final Function dataExtractor;

        private final Function keyExtractor;

        private final Function, T> reducer;

        private Deduplicator(Function dataExtractor, Function keyExtractor, Function, T> reducer) {
            this.dataExtractor = dataExtractor;
            this.keyExtractor = keyExtractor;
            this.reducer = reducer;
        }

        public static  Deduplicator identity(Deduplication deduplication) {
            Function, T> reducer = group -> reduceToSingle(group, deduplication::reduceDuplicates);
            return new Deduplicator<>(Function.identity(), deduplication::extractKey, reducer);
        }

        public static  Deduplicator, T> alo(Deduplication deduplication) {
            Function>, Alo> aloReducer = group -> reduceToSingleAlo(group, deduplication::reduceDuplicates);
            return new Deduplicator<>(Alo::get, deduplication::extractKey, aloReducer);
        }

        public Object extractKey(T t) {
            return keyExtractor.apply(dataExtractor.apply(t));
        }

        public T deduplicate(List list) {
            return reducer.apply(list);
        }

        private static  Alo reduceToSingleAlo(List> group, BinaryOperator accumulator) {
            if (group.isEmpty()) {
                throw newEmptyDeduplicationGroupException();
            } else {
                return group.size() == 1 ? group.get(0) : AloOps.fanIn(group).map(it -> reduceToSingle(it, accumulator));
            }
        }

        private static  T reduceToSingle(List group, BinaryOperator accumulator) {
            return group.stream().reduce(accumulator).orElseThrow(Deduplicator::newEmptyDeduplicationGroupException);
        }

        private static IllegalStateException newEmptyDeduplicationGroupException() {
            return new IllegalStateException("Something bad has happened. Deduplication group was empty.");
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy