All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.cognite.client.stream.RawPublisher Maven / Gradle / Ivy

package com.cognite.client.stream;

import com.cognite.client.RawRows;
import com.cognite.client.Request;
import com.cognite.client.dto.RawRow;
import com.google.auto.value.AutoValue;
import com.google.common.base.Preconditions;
import org.apache.commons.lang3.RandomStringUtils;

import javax.annotation.Nullable;
import java.time.Duration;
import java.time.Instant;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.*;
import java.util.function.Consumer;

/**
 * This class produces a continuous data stream of rows from a raw table. The raw table is monitored for changes and
 * all new or changed rows are streamed.
 *
 * The publisher polls Raw for updates at the {@code pollingInterval} (default every 5 sec.) and push the resulting
 * batch of {@link RawRow} to the registered {@code Consumer}.
 *
 */
@AutoValue
public abstract class RawPublisher extends AbstractPublisher {

    // Defaults and boundary values
    private static final Duration DEFAULT_POLLING_OFFSET = Duration.ofSeconds(2L);

    private static Builder builder() {
        return new AutoValue_RawPublisher.Builder()
                .setPollingInterval(DEFAULT_POLLING_INTERVAL)
                .setPollingOffset(DEFAULT_POLLING_OFFSET)
                .setStartTime(MIN_START_TIME.plusSeconds(1))
                .setEndTime(MAX_END_TIME)
                ;
    }

    /**
     * For internal use.
     *
     * Configures a publisher to stream rows from the specified raw table.
     *
     * @param rawRows The read raw rows api to use for querying Raw.
     * @param rawDbName The raw database to read from.
     * @param rawTableName The raw table to read from.
     * @return The configured {@link RawPublisher}
     */
    public static RawPublisher of(RawRows rawRows,
                                  String rawDbName,
                                  String rawTableName) {
        return RawPublisher.builder()
                .setRawRows(rawRows)
                .setRawDbName(rawDbName)
                .setRawTableName(rawTableName)
                .build();
    }

    abstract Builder toBuilder();

    abstract RawRows getRawRows();
    abstract String getRawDbName();
    abstract String getRawTableName();
    @Nullable
    abstract Consumer> getConsumer();

    /**
     * Add the consumer of the data stream.
     *
     * The consumer will be called for each batch of {@link RawRow}. This is potentially a blocking operation,
     * so you should take care to process the batch efficiently (or spin off processing to a separate thread).
     *
     * @param consumer The function to call for each batch of {@link RawRow}.
     * @return The {@link RawPublisher} with the consumer configured.
     */
    public RawPublisher withConsumer(Consumer> consumer) {
        return toBuilder().setConsumer(consumer).build();
    }

    /**
     * Sets the start time (i.e. the earliest possible created/changed time of the CDF Raw Row) of the data stream.
     *
     * The default start time is at Unix epoch. I.e. the publisher will read all existing rows (if any) in the raw table.
     * @param startTime The start time instant
     * @return The {@link RawPublisher} with the consumer configured.
     */
    public RawPublisher withStartTime(Instant startTime) {
        Preconditions.checkArgument(startTime.isAfter(MIN_START_TIME) && startTime.isBefore(MAX_END_TIME),
                "Start time must be after Unix Epoch and before Instant.MAX.minus(1, ChronoUnit.YEARS).");
        return toBuilder().setStartTime(startTime).build();
    }

    /**
     * Sets the end time (i.e. the latest possible created/changed time of the CDF Raw Row) of the data stream.
     *
     * The default end time is {@code Instant.MAX}. I.e. the publisher will stream data indefinitely, or until
     * aborted.
     * @param endTime The end time instant
     * @return The {@link RawPublisher} with the consumer configured.
     */
    public RawPublisher withEndTime(Instant endTime) {
        Preconditions.checkArgument(endTime.isAfter(MIN_START_TIME) && endTime.isBefore(MAX_END_TIME),
                "End time must be after Unix Epoch and before Instant.MAX.minus(1, ChronoUnit.YEARS).");
        return toBuilder().setEndTime(endTime).build();
    }

    /**
     * Sets the polling interval to check for updates to the source raw table. The default polling interval is
     * every 5 seconds. You can configure a more or less frequent interval, down to every 0.5 seconds.
     *
     * @param interval The interval to check the source raw table for updates.
     * @return The {@link RawPublisher} with the consumer configured.
     */
    public RawPublisher withPollingInterval(Duration interval) {
        Preconditions.checkArgument(interval.compareTo(MIN_POLLING_INTERVAL) > 0
                        && interval.compareTo(MAX_POLLING_INTERVAL) < 0,
                String.format("Polling interval must be greater than %s and less than %s.",
                        MIN_POLLING_INTERVAL,
                        MAX_POLLING_INTERVAL));
        return toBuilder().setPollingInterval(interval).build();
    }

    /**
     * Sets the polling offset. The offset is a time window "buffer" subtracted from the current time when polling
     * for data from CDF Raw. It is intended as a safeguard for clock differences between the client (running this
     * publisher) and the CDF service.
     *
     * For example, if the polling offset is 2 seconds, then this publisher will look for data updates up to (and including)
     * T-2 seconds. That is, data will be streamed with a 2 second fixed latency/delay.
     *
     * @param interval The interval to check the source raw table for updates.
     * @return The {@link RawPublisher} with the consumer configured.
     */
    public RawPublisher withPollingOffset(Duration interval) {
        Preconditions.checkArgument(interval.compareTo(MIN_POLLING_OFFSET) > 0
                        && interval.compareTo(MAX_POLLING_OFFSET) < 0,
                String.format("Polling offset must be greater than %s and less than %s.",
                        MIN_POLLING_OFFSET,
                        MAX_POLLING_OFFSET));
        return toBuilder().setPollingOffset(interval).build();
    }

    /**
     * Starts the streaming job.
     *
     * The job is executed on a separate thread and this method will immediately return to the caller. It returns
     * a {@link Future} that you can use to block the execution of your own code if you want to explicitly
     * wait for completion of the streaming job.
     *
     * @return A Future hosting the end state of the streaming job. The future returns {@code true} when the
     * polling loop completes (at its specified end time). {@code false} if the job is aborted before the
     * specified end time.
     */
    public Future start() {
        ExecutorService executorService = Executors.newSingleThreadExecutor();
        Future future = executorService.submit(this::run);
        executorService.shutdown();
        return future;
    }

    /**
     * Start the main polling loop for reading rows from a raw table.
     *
     * @return {@code true} when the polling loop completes (at the specified end time). {@code false} if the
     * job is aborted before the specified end time.
     * @throws Exception
     */
    boolean run() throws Exception {
        final String loggingPrefix = "streaming() [" + RandomStringUtils.randomAlphanumeric(4) + "] - ";
        Preconditions.checkNotNull(getConsumer(),
                loggingPrefix + "You must specify a Consumer via withConsumer(Consumer>)");
        Preconditions.checkState(getStartTime().isBefore(getEndTime()),
                String.format(loggingPrefix + "Start time must be before end time. Start time: %s. End time: %s",
                        getStartTime(),
                        getEndTime()));
        LOG.info(loggingPrefix + "Setting up streaming read from CDF.Raw: [{}.{}]. Time window start: {}. End: {}",
                getRawDbName(),
                getRawTableName(),
                getStartTime().toString(),
                getEndTime().toString());
        state = State.RUNNING;

        // Set the time range for the first query
        long startRange = getStartTime().toEpochMilli();
        long endRange = Instant.now().minus(getPollingOffset()).toEpochMilli();

        while (Instant.now().isBefore(getEndTime().plus(getPollingOffset())) && !abortStream.get()) {
            endRange = Instant.now().minus(getPollingOffset()).toEpochMilli();
            LOG.debug(loggingPrefix + "Enter polling loop with startRange: [{}] and endRange: [{}]",
                    startRange,
                    endRange);
            if (startRange < endRange) {
                Request query = Request.create()
                        .withRootParameter("minLastUpdatedTime", startRange)
                        .withRootParameter("maxLastUpdatedTime", endRange);
                LOG.debug(loggingPrefix + "Send request to read CDF Raw: {}",
                        query);

                Iterator> iterator = getRawRows().list(getRawDbName(), getRawTableName(), query);
                while (iterator.hasNext() && !abortStream.get()) {
                    List batch = iterator.next();
                    if (batch.size() > 0) {
                        getConsumer().accept(batch);
                    }
                }
            }

            LOG.debug(loggingPrefix + "Finished polling loop with startRange: [{}] and endRange: [{}]. Sleeping for {}",
                    startRange,
                    endRange,
                    getPollingInterval().toString());

            startRange = endRange + 1; // endRange is inclusive in the raw request, so we must bump the startRange
            // Sleep for a polling interval
            try {
                Thread.sleep(getPollingInterval().toMillis());
            } catch (Exception e) {
                LOG.warn(loggingPrefix + "Exception when reading: " + e.toString());
                abortStream.set(true);
            }
        }
        state = State.STOPPED;
        return !abortStream.get();
    }

    @AutoValue.Builder
    abstract static class Builder extends AbstractPublisher.Builder {
        abstract Builder setRawRows(RawRows value);
        abstract Builder setRawDbName(String value);
        abstract Builder setRawTableName(String value);
        abstract Builder setConsumer(Consumer> value);

        abstract RawPublisher build();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy