com.hazelcast.jet.Observable Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hazelcast-jdbc Show documentation
Hazelcast JDBC Driver
The newest version!
/*
 * Copyright (c) 2008-2024, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet;

import com.hazelcast.config.RingbufferConfig;
import com.hazelcast.jet.function.Observer;
import com.hazelcast.jet.impl.observer.BlockingIteratorObserver;
import com.hazelcast.jet.pipeline.Sinks;
import com.hazelcast.ringbuffer.Ringbuffer;

import javax.annotation.Nonnull;
import java.util.Iterator;
import java.util.Objects;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.function.Function;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

/**
 * Represents a flowing sequence of events produced by {@linkplain
 * Sinks#observable(String) observable sinks}. To observe the events, call
 * {@link #addObserver jet.getObservable(name).addObserver(myObserver)}.
 * 
 * The {@code Observable} is backed by a {@link Ringbuffer}, which, once
 * created, has a fixed capacity for storing messages. It supports reading
 * by multiple {@code Observer Observers}, which will all observe the same
 * sequence of messages. A new {@code Observer} will start reading
 * automatically from the oldest sequence available. Once the capacity is
 * full, the oldest messages will be overwritten as new ones arrive.
 * 

 * The {@code Ringbuffer}'s capacity defaults to {@value
 * RingbufferConfig#DEFAULT_CAPACITY}, but can be changed (via the {@link
 * #configureCapacity(int)} method), as long as the {@code Ringbuffer} hasn't
 * been created yet (see the "Lifecycle" section below).
 * 

 * In addition to data events, the {@code Observer} can also observe
 * completion and failure events. Completion means that no further values
 * will appear in the sequence. Failure means that something went wrong
 * during the job execution .
 * 

 * Lifecycle
 * 

 * When talking about the lifecycle of an {@code Observable} (which is
 * basically just a client side object and has a lifecycle just like any
 * other POJO) it's better to actually consider the lifecycle of the
 * underlying {@code Ringbuffer}, since that is the significant
 * distributed entity.
 * 

 * The lifecycle of the {@code Ringbuffer} is decoupled from the lifecycle
 * of the job. The {@code Ringbuffer} is created either when the user
 * gets a reference to its equivalent {@code Observable} (through
 * {@link JetService#getObservable(String) JetService.getObservable()})
 * and registers the first {@link Observer} on it (through
 * {@link Observable#addObserver(Observer) Observable.addObserver()})
 * or when the job containing the sink for it starts executing.
 * 

 * The {@code Ringbuffer} must be explicitly destroyed when it's no longer
 * in use, or data will be retained in the cluster. This is done via the
 * {@link #destroy() Observable.destroy()} method. Note: even if the
 * {@code Observable} POJO gets lost and its underlying {@code Ringbuffer}
 * is leaked in the cluster, it's still possible to manually destroy
 * it later by creating another {@code Observable} instance with the same
 * name and calling {@code destroy()} on that.
 * 

 * Important: The same {@code Observable} must
 * not be used again in a new job since this will cause
 * completion events interleaving and causing data loss or other unexpected
 * behaviour. Using one observable name in multiple
 * {@link Sinks#observable(String) observable sinks} in the same job is
 * allowed, this will not produce multiple completion or error events (just
 * an intermingling of the results from the two sinks, but that should be
 * fine in some use cases).
 *
 * @param  type of the values in the sequence
 *
 * @since Jet 4.0
 */
public interface Observable extends Iterable {

    /**
     * Name of this instance.
     */
    @Nonnull
    String name();

    /**
     * Registers an {@link Observer} to this {@code Observable}. It will
     * receive all events currently in the backing {@link Ringbuffer} and
     * then continue receiving any future events.
     *
     * @return registration ID associated with the added {@code Observer},
     * can be used to remove the {@code Observer} later
     */
    @Nonnull
    UUID addObserver(@Nonnull Observer observer);

    /**
     * Removes a previously added {@link Observer} identified by its
     * assigned registration ID. A removed {@code Observer} will not get
     * notified about further events.
     */
    void removeObserver(@Nonnull UUID registrationId);

    /**
     * Set the capacity of the underlying {@link Ringbuffer}, which defaults to
     * {@value RingbufferConfig#DEFAULT_CAPACITY}.
     * 

     * This method can be called only before the {@code Ringbuffer} gets
     * created. This means before any {@link Observer Observers} are added
     * to the {@code Observable} and before any jobs containing
     * {@link com.hazelcast.jet.pipeline.Sinks#observable(String) observable
     * sinks} (with the same observable name) are submitted for execution.
     * 

     * Important: only configure capacity once, multiple
     * configuration are currently not supported.
     *
     * @throws IllegalStateException if the {@code Ringbuffer} has already been
     * created
     */
    Observable configureCapacity(int capacity);

    /**
     * Returns the configured capacity of the underlying {@link Ringbuffer}..
     * 

     * This method only works if the backing {@code Ringbuffer} has already
     * been created. If so, it will be queried for its actual capacity,
     * which can't be changed any longer. (Reminder: the {@code Ringbuffer}
     * gets created either when the first {@link Observer} is added or when
     * the job containing the {@link com.hazelcast.jet.pipeline.Sinks#observable(String)
     * observable sink} (with the same observable name) is submitted for
     * execution.)
     *
     * @throws IllegalStateException if the backing {@code Ringbuffer} has not
     * yet been created
     */
    int getConfiguredCapacity();

    /**
     * Returns an iterator over the sequence of events produced by this
     * {@code Observable}. If there are currently no events to observe,
     * the iterator's {@code hasNext()} and {@code next()} methods will block.
     * A completion event completes the iterator ({@code hasNext()} will return
     * false) and a failure event makes the iterator's methods throw the
     * underlying exception.
     * 

     * If used against an {@code Observable} populated from a streaming job,
     * the iterator will complete only in the case of an error or job
     * cancellation.
     * 

     * The iterator is not thread-safe.
     * 

     * The iterator is backed by a blocking concurrent queue which stores all
     * events until consumed.
     */
    @Nonnull @Override
    default Iterator iterator() {
        BlockingIteratorObserver observer = new BlockingIteratorObserver<>();
        addObserver(observer);
        return observer;
    }

    /**
     * Allows you to post-process the results of a Jet job on the client side
     * using the standard Java {@link java.util.stream Stream API}. You provide
     * a function that will receive the job results as a {@code Stream} and
     * return a single result (which can in fact be another {@code Stream},
     * if so desired).
     * 

     * Returns a {@link CompletableFuture CompletableFuture} that will become
     * completed once your function has received all the job results through
     * its {@code Stream} and returned the final result.
     * 

     * A trivial example is counting, like this: {@code observable.toFuture(Stream::count)},
     * however the Stream API is quite rich and you can perform arbitrary
     * transformations and aggregations.
     * 

     * This feature is intended to be used only on the results of a batch job.
     * On an unbounded streaming job the stream-collecting operation will never
     * reach the final result.
     *
     * @param fn transform function which takes the stream of observed values
     *           and produces an altered value from it, which could also
     *           be a stream
     */
    @Nonnull
    default  CompletableFuture toFuture(@Nonnull Function, R> fn) {
        Objects.requireNonNull(fn, "fn");

        Iterator iterator = iterator();
        return CompletableFuture.supplyAsync(() -> {
            Spliterator spliterator = Spliterators.spliteratorUnknownSize(iterator, 0);
            return fn.apply(StreamSupport.stream(spliterator, false));
        });
    }

    /**
     * Removes all previously registered observers and destroys the backing
     * {@link Ringbuffer}.
     * 
     * Note: if you call this while a job that publishes to this
     * {@code Observable} is still active, it will silently create a new {@code
     * Ringbuffer} and go on publishing to it.
     */
    void destroy();

}