All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.finos.tracdap.common.storage.CommonFileReader Maven / Gradle / Ivy

Go to download

TRAC D.A.P. data library, interfaces and core functionality for working with primary data

There is a newer version: 0.7.0
Show newest version
/*
 * Copyright 2023 Accenture Global Solutions Limited
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.finos.tracdap.common.storage;

import org.finos.tracdap.common.data.IDataContext;

import org.apache.arrow.memory.ArrowBuf;
import org.apache.arrow.memory.BufferAllocator;

import org.finos.tracdap.common.exception.ETracInternal;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.nio.ByteBuffer;
import java.util.ArrayDeque;
import java.util.Queue;
import java.util.concurrent.Flow;
import java.util.concurrent.atomic.AtomicBoolean;

import static org.finos.tracdap.common.storage.CommonFileStorage.READ_OPERATION;
import static org.finos.tracdap.common.storage.StorageErrors.ExplicitError.DUPLICATE_SUBSCRIPTION;


public abstract class CommonFileReader implements Flow.Publisher {

    private static final long DEFAULT_CHUNK_SIZE = 2 * 1048576;  // 2 MB
    private static final int DEFAULT_CHUNK_BUFFER_TARGET = 2;
    private static final int DEFAULT_CLIENT_BUFFER_TARGET = 32;

    protected final Logger log = LoggerFactory.getLogger(getClass());

    private final IDataContext dataContext;
    private final BufferAllocator allocator;
    private final StorageErrors errors;

    private final String storageKey;
    private final String storagePath;

    private final long chunkSize;
    private final int chunkBufferTarget;
    private final int clientBufferTarget;

    private final AtomicBoolean subscriberSet;
    private Flow.Subscriber subscriber;

    private final Queue pendingChunks;
    private ArrowBuf currentChunk;

    private long nRequested;
    private long nDelivered;
    private long clientRequested;
    private long clientReceived;
    private long bytesReceived;
    private boolean gotComplete;
    private boolean gotCancel;
    private boolean gotError;

    protected abstract void clientStart();
    protected abstract void clientRequest(long n);
    protected abstract void clientCancel();


    protected CommonFileReader(
            IDataContext dataContext, StorageErrors errors,
            String storageKey, String storagePath,
            long chunkSize, int chunkBufferTarget, int clientBufferTarget) {

        this.dataContext = dataContext;
        this.allocator = dataContext.arrowAllocator();
        this.errors = errors;

        this.storageKey = storageKey;
        this.storagePath = storagePath;

        this.chunkSize = chunkSize;
        this.chunkBufferTarget = chunkBufferTarget;
        this.clientBufferTarget = clientBufferTarget;

        this.subscriberSet = new AtomicBoolean(false);
        this.pendingChunks = new ArrayDeque<>();
    }

    protected CommonFileReader(
            IDataContext dataContext, StorageErrors errors,
            String storageKey, String storagePath) {

        this(dataContext, errors, storageKey, storagePath,
                DEFAULT_CHUNK_SIZE,
                DEFAULT_CHUNK_BUFFER_TARGET,
                DEFAULT_CLIENT_BUFFER_TARGET);
    }

    @Override
    public final void subscribe(Flow.Subscriber subscriber) {

        var subscribeOk = subscriberSet.compareAndSet(false, true);

        if (!subscribeOk) {

            // According to Java API docs, errors in subscribe() should be reported as IllegalStateException
            // https://docs.oracle.com/en/java/javase/11/docs/api/java.base/java/util/concurrent/Flow.Publisher.html#subscribe(java.util.concurrent.Flow.Subscriber)

            var eStorage = errors.explicitError(READ_OPERATION, storagePath, DUPLICATE_SUBSCRIPTION);
            var eFlowState = new IllegalStateException(eStorage.getMessage(), eStorage);
            subscriber.onError(eFlowState);
            return;
        }

        this.subscriber = subscriber;

        // Make sure the doStart action goes into the event loop before calling subscriber.onSubscribe()
        // This makes sure that doStart is called before any requests from the subscription get processed

        dataContext.eventLoopExecutor().submit(this::start);

        // Now activate the subscription, before doStart gets executed
        // This approach allows errors to be reported normally during onStart (e.g. file not found)
        // Otherwise, if the subscription is not yet active, errors should be reported with IllegalStateException
        // File not found is an expected error, reporting it with EStorage makes for cleaner error handling

        subscriber.onSubscribe(new Subscription());
    }

    private class Subscription implements Flow.Subscription {

        @Override
        public void request(long n) {
            dataContext.eventLoopExecutor().submit(() -> CommonFileReader.this.request(n));
        }

        @Override
        public void cancel() {
            dataContext.eventLoopExecutor().submit(CommonFileReader.this::cancel);
        }
    }

    private void start() {

        try {

            clientStart();

            var initialRequest = (long) 2 * clientBufferTarget;

            clientRequested = initialRequest;
            clientRequest(initialRequest);
        }
        catch (Exception e) {
            throw new ETracInternal(e.getMessage(), e);  // todo
        }
    }

    private void request(long n) {

        // Do not accept the request if the read operation has finished for any reason
        if (gotComplete || gotError || gotCancel)
            return;

        try {

            nRequested += n;

            sendPendingChunks();

            askForMore();
        }
        catch (Exception e) {
            throw new ETracInternal(e.getMessage(), e);  // todo
        }
    }

    private void cancel() {

        // Do not process cancellation if the read operation has finished for any reason
        if (gotComplete || gotError || gotCancel)
            return;

        try {

            log.info("READ CANCELLED: [{}]", storagePath);

            gotCancel = true;

            clientCancel();

            // Do not send any signal to the subscriber for a clean cancel
        }
        catch (Exception e) {

            log.error("There was an error cancelling the read operation: {} [{}]", e.getMessage(), storagePath, e);

            gotError = true;

            // If the cancel results in an error closing the file, do not send the onError message
        }
        finally {

            releasePendingChunks();
        }
    }

    protected final boolean isDone() {

        return gotError || gotCancel || gotComplete;
    }

    protected final ArrowBuf allocateChunk(long size) {

        return allocator.buffer(size);
    }

    protected final void onChunk(ByteBuffer chunk) {

        try {

            clientReceived += 1;
            bytesReceived += chunk.remaining();

            while (chunk.remaining() > 0) {

                if (currentChunk == null)
                    currentChunk = allocateChunk(chunkSize);

                var nBytes = (int) Math.min(chunk.remaining(), currentChunk.writableBytes());
                var newPosition = chunk.position() + nBytes;

                currentChunk.setBytes(currentChunk.writerIndex(), chunk, chunk.position(), nBytes);
                currentChunk.writerIndex(currentChunk.writerIndex() + nBytes);

                chunk.position(newPosition);

                if (currentChunk.writableBytes() == 0) {
                    sendChunk(currentChunk);
                    currentChunk = null;
                }
            }

            askForMore();
        }
        catch (Exception e) {
            throw new ETracInternal(e.getMessage(), e);  // todo
        }
    }

    protected final void onChunk(ArrowBuf chunk) {

        try {

            clientReceived += 1;
            bytesReceived += chunk.readableBytes();

            sendChunk(chunk);

            askForMore();
        }
        catch (Exception e) {
            throw new ETracInternal(e.getMessage(), e);  // todo
        }
    }

    protected final void onComplete() {

        if (gotError || gotCancel || gotComplete)
            return;

        try {

            if (currentChunk != null) {
                sendChunk(currentChunk);
                currentChunk = null;
            }

            if (pendingChunks.isEmpty())
                subscriber.onComplete();
            else
                gotComplete = true;
        }
        catch (Exception e) {
            throw new ETracInternal(e.getMessage(), e);  // todo
        }
    }

    protected final void onError(Throwable error) {

        try {

            var tracError = errors.handleException(READ_OPERATION, storagePath, error);

            if (gotError) {
                log.warn("{} {} [{}]: Read operation already failed, then another error occurred",
                        READ_OPERATION, storageKey, storagePath, tracError);
            }
            else if (gotCancel) {
                log.warn("{} {} [{}]: Read operation was cancelled, then an error occurred",
                        READ_OPERATION, storageKey, storagePath, tracError);
            }
            else {
                log.error("{} {} [{}]: {}",
                        READ_OPERATION, storageKey, storagePath, tracError.getMessage(), tracError);

                gotError = true;
                subscriber.onError(tracError);
            }
        }
        finally {
            releasePendingChunks();
        }
    }

    private void askForMore() {

        if (pendingChunks.size() < chunkBufferTarget) {
            if (clientRequested - clientReceived < clientBufferTarget) {
                clientRequested += clientBufferTarget;
                clientRequest(clientBufferTarget);
            }
        }
    }

    private void sendChunk(ArrowBuf chunk) {

        if (nDelivered < nRequested && pendingChunks.isEmpty()) {
            nDelivered += 1;
            subscriber.onNext(chunk);
        }
        else {
            pendingChunks.add(chunk);
        }
    }

    private void sendPendingChunks() {

        while (nDelivered < nRequested && !pendingChunks.isEmpty()) {
            nDelivered += 1;
            subscriber.onNext(pendingChunks.remove());
        }

        if (pendingChunks.isEmpty() && gotComplete) {
            // Clear the flag, in case sendPendingChunks() is queued multiple times on the executor
            gotComplete = false;
            subscriber.onComplete();
        }
    }

    private void releasePendingChunks() {

        while (!pendingChunks.isEmpty())
            pendingChunks.remove().close();

        if (currentChunk != null) {
            currentChunk.close();
            currentChunk = null;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy