All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.databasesandlife.util.ThreadPool Maven / Gradle / Ivy

There is a newer version: 21.0.1
Show newest version
package com.databasesandlife.util;

import com.databasesandlife.util.gwtsafe.IdentityHashSet;
import org.slf4j.LoggerFactory;

import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

/**
 * Runs a number of {@link Runnable} tasks in a number of threads (over a number of CPU cores).
 *    

* Usage: *

 *      ThreadPool pool = new ThreadPool();
 *
 *      // optional
 *      pool.setThreadNamePrefix("foo"); // for debugger output
 *      pool.setThreadCount(5);
 *
 *      // add one or more seed tasks
 *      pool.addTask(new Runnable() { ... });
 *
 *      // start threads, execute the seed tasks, and execute any tasks they create
 *      pool.execute();
 * 
*

* In the case that any task throws an exception, this exception is thrown by the {@link #execute()} method. * If all tasks run to completion, the {@link #execute()} method returns with no value. *

* Tasks can depend on other tasks. Use the {@link #addTaskWithDependencies(List, Runnable...)} method to add a new task, * which will only start in the first parameter after all the tasks in the second parameter have run to completion. All tasks in the List should have been * previously added using the normal {@link #addTask(Runnable...)} method, or themselves with {@link #addTaskWithDependencies(List, Runnable...)}. *

* If task A must be executed before task B, normally task A is added first, and B (with dependency on A) is added afterwards. * Therefore, when B is added, if A cannot be found, it is assumed to be already finished and B is scheduled immediately. * However, if it is unknown in which order tasks will be added, then A can implement {@link ScheduleDependencyInAnyOrder}. *

* Tasks can run "off pool". For example, in a thread pool doing CPU-intensive tasks, a long-running HTTP request should not * block the threads from performing their CPU-intensive tasks. An "off pool" task runs in its own thread (not a thread that's * a member of the thread pool). The thread may still participate in dependency relationships, that is to say it's possible * to schedule a normal task to occur after an "off pool" task has completed. See {@link #addTaskOffPool(Runnable...)} * and {@link #addTaskWithDependenciesOffPool(List, Runnable...)}. *

* The difference to an {@link ExecutorService} is: *

    *
  • The processing of tasks can * add additional tasks to the queue (whereas to an {@link ExecutorService} the client adds a fixed number of tasks, * and they are then executed, without the executing tasks being able to add more tasks). * Such functionality is mandatory for web crawlers, which, during the processing of pages, discover links which * point to additional pages which require processing.

    *
  • It is impossible to forget to "shutdown" a ThreadPool and cause a leakage of threads, as is easily possible with {@link ExecutorService}. * If the {@link #execute()} method is never called then no threads are ever started and the object can be garbage collected normally. * If the {@link #execute()} method is called then that method makes sure all threads it creates are destroyed. *
* * @author This source is copyright Adrian Smith and licensed under the LGPL 3. * @see Project on GitHub */ public class ThreadPool { /** A dependency which implements this can be added before or after the task that depends on it */ public interface ScheduleDependencyInAnyOrder extends Runnable { } /** A task that performs no work, but upon which can be waited, and which can be added when some other work is finished. */ public static class SynchronizationPoint implements ScheduleDependencyInAnyOrder { @Override public void run() { } } protected static class TaskWithDependencies { boolean offPool; @Nonnull Runnable task; @Nonnull IdentityHashSet dependencies; } protected @Nonnull String threadNamePrefix = getClass().getSimpleName(); protected int threadCount = Runtime.getRuntime().availableProcessors(); protected int offPoolThreadCounter = 0; protected final IdentityHashSet readyTasks = new IdentityHashSet<>(); protected final IdentityHashSet executingTasks = new IdentityHashSet<>(); protected final Map> blockerTasks = new IdentityHashMap<>(); protected final IdentityHashSet blockedTasks = new IdentityHashSet<>(); protected final IdentityHashSet doneAnyOrderDependencies = new IdentityHashSet<>(); protected @CheckForNull Throwable exceptionOrNull = null; protected synchronized void onTaskCompleted(Runnable task) { LoggerFactory.getLogger(getClass()).debug("--- Processing removal of " + task + ":"); executingTasks.remove(task); if (exceptionOrNull != null) return; if (task instanceof ScheduleDependencyInAnyOrder) doneAnyOrderDependencies.add((ScheduleDependencyInAnyOrder) task); for (var d : blockerTasks.getOrDefault(task, List.of())) { d.dependencies.remove(task); if (d.dependencies.isEmpty()) { if (d.offPool) { LoggerFactory.getLogger(getClass()).debug("Will add "+d.task+" (off pool)"); addTaskOffPool(d.task); } else { LoggerFactory.getLogger(getClass()).debug("Will add "+d.task+" to ready tasks"); readyTasks.add(d.task); } blockedTasks.remove(d.task); } } blockerTasks.remove(task); } protected class RunnerRunnable implements Runnable { @Override public void run() { while (true) { final @CheckForNull Runnable nextTaskOrNull; synchronized (ThreadPool.this) { if (executingTasks.isEmpty() && readyTasks.isEmpty()) break; // It's finished successfully if (exceptionOrNull != null) break; // It's failed, no point continuing nextTaskOrNull = readyTasks.isEmpty() ? null : readyTasks.iterator().next(); if (nextTaskOrNull != null) { readyTasks.remove(nextTaskOrNull); executingTasks.add(nextTaskOrNull); } } if (nextTaskOrNull != null) { try { nextTaskOrNull.run(); } // Also catch e.g. StackOverflowExceptions here, // otherwise ThreadPool.execute appears to "succeed" but stuff that should have happened has not happened. catch (Throwable e) { synchronized (ThreadPool.this) { exceptionOrNull = e; } } finally { onTaskCompleted(nextTaskOrNull); } } else { // it might be that other tasks are running, and they will produce lots more tasks // so keep the thread alive and polling until all work is done. try { Thread.sleep(10); } catch (InterruptedException ignored) { } } } } } public void setThreadCount(int count) { threadCount = count; } public void setThreadNamePrefix(String prefix) { threadNamePrefix = prefix; } public synchronized void addTaskWithDependencies(List dependencies, Runnable... after) { List stillScheduledDependencies = dependencies.stream() .filter(dep -> dep instanceof ScheduleDependencyInAnyOrder || executingTasks.contains(dep) || readyTasks.contains(dep) || blockedTasks.contains(dep)) .filter(dep -> ! (dep instanceof ScheduleDependencyInAnyOrder && doneAnyOrderDependencies.contains((ScheduleDependencyInAnyOrder) dep))) .collect(Collectors.toList()); if (stillScheduledDependencies.isEmpty()) { readyTasks.addAll(after); } else { for (var job : after) { blockedTasks.add(job); var d = new TaskWithDependencies(); d.task = job; d.dependencies = new IdentityHashSet<>(stillScheduledDependencies); for (var dep : stillScheduledDependencies) { blockerTasks.putIfAbsent(dep, new ArrayList<>()); blockerTasks.get(dep).add(d); } } } } /** @return the runnable that's been scheduled for execution, as convenience */ public @Nonnull R addTaskWithDependencies(@Nonnull List dependencies, @Nonnull R after) { addTaskWithDependencies(dependencies, new Runnable[] { after }); return after; } public void addTask(Runnable... tasks) { for (var r : tasks) addTaskWithDependencies(List.of(), r); } /** @return the runnable that's been scheduled for execution, as convenience */ public @Nonnull R addTask(@Nonnull R after) { addTask(new Runnable[] { after }); return after; } public void addTasks(Collection tasks) { addTask(tasks.toArray(new Runnable[0])); } /** * Add a task which runs in its own thread. * Intended for CPU-bound thread pools which require a task to be completed which does not * consume CPU for example an HTTP request. */ public synchronized void addTaskOffPool(Runnable... tasks) { for (var t : tasks) { executingTasks.add(t); new Thread(() -> { try { t.run(); } catch (Exception e) { synchronized (ThreadPool.this) { exceptionOrNull = e; } } finally { onTaskCompleted(t); } }, threadNamePrefix + "-offPool-" + (++offPoolThreadCounter)).start(); } } /** @return the runnable that's been scheduled for execution, as convenience */ public @Nonnull R addTaskOffPool(@Nonnull R task) { addTaskOffPool(new Runnable[] { task }); return task; } public synchronized void addTaskWithDependenciesOffPool(List dependencies, Runnable... after) { List stillScheduledDependencies = dependencies.stream() .filter(dep -> dep instanceof ScheduleDependencyInAnyOrder || executingTasks.contains(dep) || readyTasks.contains(dep) || blockedTasks.contains(dep)) .filter(dep -> ! (dep instanceof ScheduleDependencyInAnyOrder && doneAnyOrderDependencies.contains((ScheduleDependencyInAnyOrder) dep))) .collect(Collectors.toList()); if (stillScheduledDependencies.isEmpty()) { addTaskOffPool(after); } else { for (var job : after) { blockedTasks.add(job); var d = new TaskWithDependencies(); d.offPool = true; d.task = job; d.dependencies = new IdentityHashSet<>(stillScheduledDependencies); for (var dep : stillScheduledDependencies) { blockerTasks.putIfAbsent(dep, new ArrayList<>()); blockerTasks.get(dep).add(d); } } } } public @Nonnull R addTaskWithDependenciesOffPool(List dependencies, R after) { addTaskWithDependenciesOffPool(dependencies, new Runnable[] { after }); return after; } /** See {@link #unwrapException(RuntimeException, Class)} to how to handle checked exceptions */ public void execute() { var threads = IntStream.range(0, threadCount) .mapToObj(i -> new Thread(new RunnerRunnable(), threadNamePrefix+"-"+(i+1))) .collect(Collectors.toList()); for (var t : threads) t.start(); for (var t : threads) try { t.join(); } catch (InterruptedException e) { exceptionOrNull = e; } if (exceptionOrNull != null) throw new RuntimeException(exceptionOrNull); } /** * After {@link #execute()} runs, use this method, once per checked exception that your Runnables might throw. * This can handle the case that you wrap your exception in a RuntimeException, or the case that you use @SneakyThrows. *

* For example: *

     * catch (RuntimeException e) {
     *     unwrapException(e, RequestInvalidException.class);
     *     unwrapException(e, TransformationFailedException.class);
     *     throw e;
     * }
     * 
*

*/ @SuppressWarnings("unchecked") public static void unwrapException(@Nonnull RuntimeException e, @Nonnull Class exceptionClass) throws E { // runnable must wrap checked exception, then threads.execute wraps it again if (e.getCause() != null && e.getCause().getCause() != null && exceptionClass.isAssignableFrom(e.getCause().getCause().getClass())) throw (E) e.getCause().getCause(); // @SneakyThrows doesn't wrap, then threads.execute wraps it if (e.getCause() != null && exceptionClass.isAssignableFrom(e.getCause().getClass())) throw (E) e.getCause(); } protected void addTasksToString(StringBuilder result, String indent, Runnable task, int blockedByCount) { result.append(indent).append(task.toString()); var dependents = blockerTasks.getOrDefault(task, List.of()); if (blockedByCount > 0) result.append(" (blocked by ").append(blockedByCount).append(" tasks)"); if ( ! dependents.isEmpty()) result.append(", and thereafter:"); result.append("\n"); for (var dependent : dependents) addTasksToString(result, indent+" ", dependent.task, dependent.dependencies.size()); } @Override public String toString() { var result = new StringBuilder(); for (var r : executingTasks) addTasksToString(result, "", r, 0); for (var r : readyTasks) addTasksToString(result, "", r, 0); return result.toString(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy