All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.jet.impl.execution.TaskletExecutionService Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2008-2024, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.jet.impl.execution;

import com.hazelcast.internal.metrics.MetricDescriptor;
import com.hazelcast.internal.metrics.MetricsRegistry;
import com.hazelcast.internal.metrics.Probe;
import com.hazelcast.internal.util.RuntimeAvailableProcessors;
import com.hazelcast.internal.util.concurrent.BackoffIdleStrategy;
import com.hazelcast.internal.util.concurrent.IdleStrategy;
import com.hazelcast.internal.util.counters.Counter;
import com.hazelcast.internal.util.counters.MwCounter;
import com.hazelcast.internal.util.counters.SwCounter;
import com.hazelcast.jet.JetException;
import com.hazelcast.jet.core.metrics.MetricTags;
import com.hazelcast.jet.impl.execution.init.Contexts;
import com.hazelcast.jet.impl.util.NonCompletableFuture;
import com.hazelcast.jet.impl.util.ProgressState;
import com.hazelcast.jet.impl.util.ProgressTracker;
import com.hazelcast.jet.impl.util.Util;
import com.hazelcast.logging.ILogger;
import com.hazelcast.spi.impl.NodeEngine;
import com.hazelcast.spi.impl.executionservice.ExecutionService;
import com.hazelcast.spi.properties.HazelcastProperties;
import com.hazelcast.spi.properties.HazelcastProperty;
import com.hazelcast.sql.impl.ResultLimitReachedException;

import javax.annotation.Nonnull;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CancellationException;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.Semaphore;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.LockSupport;
import java.util.function.Consumer;

import static com.hazelcast.internal.util.executor.ExecutorType.CACHED;
import static com.hazelcast.jet.impl.util.ExceptionUtil.peel;
import static com.hazelcast.internal.util.ExceptionUtil.sneakyThrow;
import static com.hazelcast.internal.util.ExceptionUtil.withTryCatch;
import static com.hazelcast.jet.impl.util.Util.doWithClassLoader;
import static com.hazelcast.jet.impl.util.Util.uncheckRun;
import static com.hazelcast.spi.properties.ClusterProperty.JET_IDLE_COOPERATIVE_MAX_MICROSECONDS;
import static com.hazelcast.spi.properties.ClusterProperty.JET_IDLE_COOPERATIVE_MIN_MICROSECONDS;
import static com.hazelcast.spi.properties.ClusterProperty.JET_IDLE_NONCOOPERATIVE_MAX_MICROSECONDS;
import static com.hazelcast.spi.properties.ClusterProperty.JET_IDLE_NONCOOPERATIVE_MIN_MICROSECONDS;
import static java.lang.Thread.currentThread;
import static java.util.Collections.emptyList;
import static java.util.concurrent.Executors.newCachedThreadPool;
import static java.util.concurrent.TimeUnit.NANOSECONDS;
import static java.util.stream.Collectors.partitioningBy;
import static java.util.stream.Collectors.toList;

public class TaskletExecutionService {

    public static final String TASKLET_INIT_CLOSE_EXECUTOR_NAME = "jet:tasklet_initClose";

    private final ExecutorService blockingTaskletExecutor = newCachedThreadPool(new BlockingTaskThreadFactory());
    private final ExecutionService hzExecutionService;
    private final CooperativeWorker[] cooperativeWorkers;
    private final Thread[] cooperativeThreadPool;
    private final String hzInstanceName;
    private final ILogger logger;
    private int cooperativeThreadIndex;
    @Probe(name = "blockingWorkerCount")
    private final Counter blockingWorkerCount = MwCounter.newMwCounter();
    private volatile boolean isShutdown;
    private final Object lock = new Object();
    private final IdleStrategy idlerCooperative;
    private final IdleStrategy idlerNonCooperative;

    public TaskletExecutionService(NodeEngine nodeEngine, int threadCount, HazelcastProperties properties) {
        hzExecutionService = nodeEngine.getExecutionService();
        hzExecutionService.register(TASKLET_INIT_CLOSE_EXECUTOR_NAME,
                RuntimeAvailableProcessors.get(), Integer.MAX_VALUE, CACHED);
        this.hzInstanceName = nodeEngine.getHazelcastInstance().getName();
        this.cooperativeWorkers = new CooperativeWorker[threadCount];
        this.cooperativeThreadPool = new Thread[threadCount];
        this.logger = nodeEngine.getLoggingService().getLogger(TaskletExecutionService.class);

        idlerCooperative = createIdler(
            properties, JET_IDLE_COOPERATIVE_MIN_MICROSECONDS, JET_IDLE_COOPERATIVE_MAX_MICROSECONDS
        );
        idlerNonCooperative = createIdler(
            properties, JET_IDLE_NONCOOPERATIVE_MIN_MICROSECONDS, JET_IDLE_NONCOOPERATIVE_MAX_MICROSECONDS
        );

        Arrays.setAll(cooperativeWorkers, i -> new CooperativeWorker());
        Arrays.setAll(cooperativeThreadPool, i -> new CooperativeWorkerThread(cooperativeWorkers[i],
                String.format("hz.%s.jet.cooperative.thread-%d", hzInstanceName, i)));
        Arrays.stream(cooperativeThreadPool).forEach(Thread::start);

        // register metrics
        MetricsRegistry registry = nodeEngine.getMetricsRegistry();
        MetricDescriptor descriptor = registry.newMetricDescriptor()
                                                     .withTag(MetricTags.MODULE, "jet");
        registry.registerStaticMetrics(descriptor, this);
        for (int i = 0; i < cooperativeWorkers.length; i++) {
            registry.registerStaticMetrics(
                    descriptor.withDiscriminator(MetricTags.COOPERATIVE_WORKER, String.valueOf(i)),
                    cooperativeWorkers[i]
            );
        }
    }

    /**
     * Submits the tasklets for execution and returns a future which gets
     * completed when the execution of all the tasklets has completed. If an
     * exception occurs or the execution gets cancelled, the future will be
     * completed exceptionally, but only after all the tasklets have finished
     * executing. The returned future does not support cancellation, instead
     * the supplied {@code cancellationFuture} should be used.
     *
     * @param tasklets            tasklets to run
     * @param cancellationFuture  future that, if cancelled, will cancel the execution of the tasklets
     * @param jobClassLoader      classloader to use when running the tasklets
     */
    CompletableFuture beginExecute(
            @Nonnull List tasklets,
            @Nonnull CompletableFuture cancellationFuture,
            @Nonnull ClassLoader jobClassLoader
    ) {
        final ExecutionTracker executionTracker = new ExecutionTracker(tasklets.size(), cancellationFuture);
        try {
            final Map> byCooperation =
                    tasklets.stream().collect(partitioningBy(
                            tasklet -> doWithClassLoader(jobClassLoader, tasklet::isCooperative)
                    ));
            submitCooperativeTasklets(executionTracker, jobClassLoader, byCooperation.get(true));
            submitBlockingTasklets(executionTracker, jobClassLoader, byCooperation.get(false));
        } catch (Throwable t) {
            executionTracker.future.internalCompleteExceptionally(t);
        }
        return executionTracker.future;
    }

    public void shutdown() {
        isShutdown = true;
        Arrays.stream(cooperativeWorkers).forEach(thread -> thread.newTaskletSemaphore.release());
        blockingTaskletExecutor.shutdownNow();
        hzExecutionService.shutdownExecutor(TASKLET_INIT_CLOSE_EXECUTOR_NAME);
    }

    private void submitBlockingTasklets(ExecutionTracker executionTracker, ClassLoader jobClassLoader,
                                        List tasklets) {
        CountDownLatch startedLatch = new CountDownLatch(tasklets.size());
        executionTracker.blockingFutures = tasklets
                .stream()
                .map(t -> new BlockingWorker(new TaskletTracker(t, executionTracker, jobClassLoader), startedLatch))
                .map(blockingTaskletExecutor::submit)
                .collect(toList());

        // Do not return from this method until all workers have started. Otherwise
        // on cancellation there is a race where the executor might not have started
        // the worker yet. This would result in taskletDone() never being called for
        // a worker.
        uncheckRun(startedLatch::await);
    }

    private void submitCooperativeTasklets(
            ExecutionTracker executionTracker, ClassLoader jobClassLoader, List tasklets
    ) {
        @SuppressWarnings("unchecked")
        final List[] trackersByThread = new List[cooperativeWorkers.length];
        Arrays.setAll(trackersByThread, i -> new ArrayList<>());
        List> futures = tasklets
                .stream()
                .map(tasklet -> hzExecutionService.submit(TASKLET_INIT_CLOSE_EXECUTOR_NAME, () ->
                        Util.doWithClassLoader(jobClassLoader, tasklet::init)))
                .collect(toList());
        awaitAll(futures);

        // We synchronize so that no two jobs submit their tasklets in
        // parallel. If two jobs submit in parallel, the tasklets of one of
        // them could happen to not use all threads. When the other one ends,
        // some worker might have no tasklet.
        synchronized (lock) {
            for (Tasklet t : tasklets) {
                trackersByThread[cooperativeThreadIndex].add(new TaskletTracker(t, executionTracker, jobClassLoader));
                cooperativeThreadIndex = (cooperativeThreadIndex + 1) % trackersByThread.length;
            }
        }
        for (int i = 0; i < trackersByThread.length; i++) {
            cooperativeWorkers[i].trackers.addAll(0, trackersByThread[i]);
            cooperativeWorkers[i].newTaskletSemaphore.release();
        }
        Arrays.stream(cooperativeThreadPool).forEach(LockSupport::unpark);
    }

    private void awaitAll(List> futures) {
        Throwable firstFailure = null;
        int failureCount = 0;
        for (Future future : futures) {
            try {
                future.get();
            } catch (InterruptedException | ExecutionException e) {
                Throwable peeled = peel(e);
                logger.severe("Tasklet initialization failed", peeled);
                firstFailure = firstFailure != null ? firstFailure : peeled;
                failureCount++;
            }
        }
        if (firstFailure != null) {
            String message = String.format(
                    "%,d of %,d tasklets failed to initialize." +
                            " One of the failures is attached as the cause and its summary is %s",
                    failureCount, futures.size(), firstFailure
            );
            TaskletExecutionException silencingException = new TaskletExecutionException(message, firstFailure);
            throw new JetException(message, silencingException);
        }
    }

    /**
     * Blocks until all workers terminate (cooperative & blocking).
     */
    public void awaitWorkerTermination() {
        assert isShutdown : "Not shut down";
        try {
            while (!blockingTaskletExecutor.awaitTermination(1, TimeUnit.MINUTES)) {
                logger.warning("Blocking tasklet executor did not terminate in 1 minute");
            }
            for (Thread t : cooperativeThreadPool) {
                t.join();
            }
        } catch (InterruptedException e) {
            sneakyThrow(e);
        }
    }

    private BackoffIdleStrategy createIdler(
        HazelcastProperties props, HazelcastProperty minProp, HazelcastProperty maxProp
    ) {
        int min = props.getInteger(minProp);
        int max = props.getInteger(maxProp);
        String minName = minProp.getName();
        String maxName = maxProp.getName();
        if (min >= max) {
            logger.warning(
                String.format(
                    "The property %s must be set less than or equal to %s but current values are: %s=%d, %s=%d." +
                        " Using minimum value as maximum instead.",
                    minName, maxName, minName, min, maxName, max));
            max = min;
        }

        logger.finest("Creating idler with %s=%dµs,%s=%dµs", minName, min, maxName, max);
        return new BackoffIdleStrategy(0, 0,
            minProp.getTimeUnit().toNanos(min), maxProp.getTimeUnit().toNanos(max)
        );
    }

    private void handleTaskletExecutionError(TaskletTracker t, Throwable e) {
        if (e instanceof CancellationException) {
            logger.fine("Job was cancelled by the user.");
            t.executionTracker.exception(e);
        } else if (e instanceof ResultLimitReachedException) {
            logger.fine("SQL LIMIT reached.");
            t.executionTracker.exception(e);
        } else {
            logger.info("Exception in " + t.tasklet, e);
            String message = "Exception in " + t.tasklet + ": " + e;
            Exception silencingException = new TaskletExecutionException(message, e);
            t.executionTracker.exception(new JetException(message, silencingException));
        }
    }

    private final class BlockingWorker implements Runnable {
        private final TaskletTracker tracker;
        private final CountDownLatch startedLatch;

        private BlockingWorker(TaskletTracker tracker, CountDownLatch startedLatch) {
            this.tracker = tracker;
            this.startedLatch = startedLatch;
        }

        @Override
        public void run() {
            final ClassLoader clBackup = currentThread().getContextClassLoader();
            final Tasklet t = tracker.tasklet;
            currentThread().setContextClassLoader(tracker.jobClassLoader);
            IdleStrategy idlerLocal = idlerNonCooperative;
            Contexts.Container contextContainer = Contexts.container();

            try {
                blockingWorkerCount.inc();
                contextContainer.setContext(t.getProcessorContext());
                startedLatch.countDown();
                t.init();
                long idleCount = 0;
                ProgressState result;
                do {
                    result = t.call();
                    if (result.isMadeProgress()) {
                        idleCount = 0;
                    } else {
                        idlerLocal.idle(++idleCount);
                    }
                } while (!result.isDone()
                        && !tracker.executionTracker.executionCompletedExceptionally()
                        && !isShutdown);
            } catch (Throwable e) {
                handleTaskletExecutionError(tracker, e);
            } finally {
                blockingWorkerCount.inc(-1L);
                contextContainer.setContext(null);
                currentThread().setContextClassLoader(clBackup);
                tracker.executionTracker.taskletDone();
            }
        }
    }

    private final class CooperativeWorker implements Runnable {
        private static final int COOPERATIVE_LOGGING_THRESHOLD = 5;

        @Probe(name = "taskletCount")
        private final CopyOnWriteArrayList trackers;
        @Probe(name = "iterationCount")
        private final Counter iterationCount = SwCounter.newSwCounter();

        private final ProgressTracker progressTracker = new ProgressTracker();
        // prevent lambda allocation on each iteration
        private final Consumer runTasklet = this::runTasklet;

        private final Semaphore newTaskletSemaphore = new Semaphore(0);

        private boolean finestLogEnabled;
        private Thread myThread;
        private Contexts.Container contextContainer;

        CooperativeWorker() {
            this.trackers = new CopyOnWriteArrayList<>();
        }

        @Override
        public void run() {
            myThread = currentThread();
            contextContainer = Contexts.container();

            IdleStrategy idlerLocal = idlerCooperative;
            long idleCount = 0;

            while (!isShutdown) {
                finestLogEnabled = logger.isFinestEnabled();
                progressTracker.reset();
                // garbage-free iteration -- relies on implementation in COWArrayList that doesn't use an Iterator
                trackers.forEach(runTasklet);
                iterationCount.inc();
                if (!progressTracker.isMadeProgress() && newTaskletSemaphore.drainPermits() > 0) {
                    progressTracker.madeProgress();
                }
                if (progressTracker.isMadeProgress()) {
                    idleCount = 0;
                } else {
                    if (trackers.isEmpty()) {
                        newTaskletSemaphore.drainPermits();
                        if (trackers.isEmpty() && !isShutdown) {
                            try {
                                newTaskletSemaphore.acquire();
                            } catch (InterruptedException e) {
                                logger.severe("Cooperative worker interrupted", e);
                                return;
                            }
                        }
                    } else {
                        idlerLocal.idle(++idleCount);
                    }
                }
            }
            trackers.forEach(t -> t.executionTracker.taskletDone());
            trackers.clear();
        }

        private void runTasklet(TaskletTracker t) {
            long start = 0;
            if (finestLogEnabled) {
                start = System.nanoTime();
            }
            try {
                myThread.setContextClassLoader(t.jobClassLoader);
                contextContainer.setContext(t.tasklet.getProcessorContext());
                final ProgressState result = t.tasklet.call();
                if (result.isDone()) {
                    dismissTasklet(t);
                }
                progressTracker.mergeWith(result);
            } catch (Throwable e) {
                handleTaskletExecutionError(t, e);
            } finally {
                contextContainer.setContext(null);
            }
            if (t.executionTracker.executionCompletedExceptionally()) {
                dismissTasklet(t);
            }

            if (finestLogEnabled) {
                long elapsedMs = NANOSECONDS.toMillis((System.nanoTime() - start));
                if (elapsedMs > COOPERATIVE_LOGGING_THRESHOLD) {
                    logger.finest("Cooperative tasklet call of '" + t.tasklet + "' took more than "
                            + COOPERATIVE_LOGGING_THRESHOLD + " ms: " + elapsedMs + "ms");
                }
            }
        }

        private void dismissTasklet(TaskletTracker t) {
            logger.finest("Tasklet %s is done", t.tasklet);
            t.executionTracker.taskletDone();
            trackers.remove(t);
        }
    }

    private static final class TaskletTracker {
        final Tasklet tasklet;
        final ExecutionTracker executionTracker;
        final ClassLoader jobClassLoader;

        TaskletTracker(Tasklet tasklet, ExecutionTracker executionTracker, ClassLoader jobClassLoader) {
            this.tasklet = tasklet;
            this.executionTracker = executionTracker;
            this.jobClassLoader = jobClassLoader;
        }

        @Override
        public String toString() {
            return "Tracking " + tasklet;
        }
    }

    private final class BlockingTaskThreadFactory implements ThreadFactory {
        private final AtomicInteger seq = new AtomicInteger();

        @Override
        public Thread newThread(@Nonnull Runnable r) {
            return new Thread(r,
                    String.format("hz.%s.jet.blocking.thread-%d", hzInstanceName, seq.getAndIncrement()));
        }
    }


    /**
     * Internal utility class to track the overall state of tasklet execution.
     * There's one instance of this class per job.
     */
    private final class ExecutionTracker {

        final NonCompletableFuture future = new NonCompletableFuture();
        volatile List> blockingFutures = emptyList();

        private final AtomicInteger completionLatch;
        private final AtomicReference executionException = new AtomicReference<>();

        ExecutionTracker(int taskletCount, CompletableFuture cancellationFuture) {
            this.completionLatch = new AtomicInteger(taskletCount);
            cancellationFuture.whenComplete(withTryCatch(logger, (r, e) -> {
                if (e == null) {
                    e = new IllegalStateException("cancellationFuture must be completed exceptionally");
                }
                exception(e);
                // Don't interrupt the threads. We require that they do not block for too long,
                // interrupting them might make the termination faster, but can also cause
                // troubles, for example as in https://github.com/hazelcast/hazelcast-jet/issues/1946
                blockingFutures.forEach(f -> f.cancel(false));
            }));
        }

        void exception(Throwable t) {
            executionException.compareAndSet(null, t);
        }

        void taskletDone() {
            if (completionLatch.decrementAndGet() == 0) {
                Throwable ex = executionException.get();
                if (ex == null) {
                    future.internalComplete();
                } else {
                    future.internalCompleteExceptionally(ex);
                }
            }
        }

        boolean executionCompletedExceptionally() {
            return executionException.get() != null;
        }
    }

    private static final class CooperativeWorkerThread extends Thread implements CooperativeThread {
        CooperativeWorkerThread(Runnable target, String name) {
            super(target, name);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy