com.databasesandlife.util.ThreadPool Maven / Gradle / Ivy
Show all versions of java-common Show documentation
package com.databasesandlife.util;
import com.databasesandlife.util.gwtsafe.IdentityHashSet;
import org.slf4j.LoggerFactory;
import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
import java.util.*;
import java.util.concurrent.ExecutorService;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
/**
* Runs a number of {@link Runnable} tasks in a number of threads (over a number of CPU cores).
*
* Usage:
*
* ThreadPool pool = new ThreadPool();
*
* // optional
* pool.setThreadNamePrefix("foo"); // for debugger output
* pool.setThreadCount(5);
*
* // add one or more seed tasks
* pool.addTask(new Runnable() { ... });
*
* // start threads, execute the seed tasks, and execute any tasks they create
* pool.execute();
*
*
* In the case that any task throws an exception, this exception is thrown by the {@link #execute()} method.
* If all tasks run to completion, the {@link #execute()} method returns with no value.
*
* Tasks can depend on other tasks. Use the {@link #addTaskWithDependencies(List, Runnable...)} method to add a new task,
* which will only start in the first parameter after all the tasks in the second parameter have run to completion. All tasks in the List should have been
* previously added using the normal {@link #addTask(Runnable...)} method, or themselves with {@link #addTaskWithDependencies(List, Runnable...)}.
*
* If task A must be executed before task B, normally task A is added first, and B (with dependency on A) is added afterwards.
* Therefore, when B is added, if A cannot be found, it is assumed to be already finished and B is scheduled immediately.
* However, if it is unknown in which order tasks will be added, then A can implement {@link ScheduleDependencyInAnyOrder}.
*
* Tasks can run "off pool". For example, in a thread pool doing CPU-intensive tasks, a long-running HTTP request should not
* block the threads from performing their CPU-intensive tasks. An "off pool" task runs in its own thread (not a thread that's
* a member of the thread pool). The thread may still participate in dependency relationships, that is to say it's possible
* to schedule a normal task to occur after an "off pool" task has completed. See {@link #addTaskOffPool(Runnable...)}
* and {@link #addTaskWithDependenciesOffPool(List, Runnable...)}.
*
* The difference to an {@link ExecutorService} is:
*
* - The processing of tasks can
* add additional tasks to the queue (whereas to an {@link ExecutorService} the client adds a fixed number of tasks,
* and they are then executed, without the executing tasks being able to add more tasks).
* Such functionality is mandatory for web crawlers, which, during the processing of pages, discover links which
* point to additional pages which require processing.
* - It is impossible to forget to "shutdown" a ThreadPool and cause a leakage of threads, as is easily possible with {@link ExecutorService}.
* If the {@link #execute()} method is never called then no threads are ever started and the object can be garbage collected normally.
* If the {@link #execute()} method is called then that method makes sure all threads it creates are destroyed.
*
*
* @author This source is copyright Adrian Smith and licensed under the LGPL 3.
* @see Project on GitHub
*/
public class ThreadPool {
/** A dependency which implements this can be added before or after the task that depends on it */
public interface ScheduleDependencyInAnyOrder extends Runnable { }
/** A task that performs no work, but upon which can be waited, and which can be added when some other work is finished. */
public static class SynchronizationPoint implements ScheduleDependencyInAnyOrder {
@Override public void run() { }
}
protected static class TaskWithDependencies {
boolean offPool;
@Nonnull Runnable task;
@Nonnull IdentityHashSet dependencies;
}
protected @Nonnull String threadNamePrefix = getClass().getSimpleName();
protected int threadCount = Runtime.getRuntime().availableProcessors();
protected int offPoolThreadCounter = 0;
protected final IdentityHashSet readyTasks = new IdentityHashSet<>();
protected final IdentityHashSet executingTasks = new IdentityHashSet<>();
protected final Map> blockerTasks = new IdentityHashMap<>();
protected final IdentityHashSet blockedTasks = new IdentityHashSet<>();
protected final IdentityHashSet doneAnyOrderDependencies = new IdentityHashSet<>();
protected @CheckForNull Throwable exceptionOrNull = null;
protected synchronized void onTaskCompleted(Runnable task) {
LoggerFactory.getLogger(getClass()).debug("--- Processing removal of " + task + ":");
executingTasks.remove(task);
if (exceptionOrNull != null) return;
if (task instanceof ScheduleDependencyInAnyOrder)
doneAnyOrderDependencies.add((ScheduleDependencyInAnyOrder) task);
for (var d : blockerTasks.getOrDefault(task, List.of())) {
d.dependencies.remove(task);
if (d.dependencies.isEmpty()) {
if (d.offPool) {
LoggerFactory.getLogger(getClass()).debug("Will add "+d.task+" (off pool)");
addTaskOffPool(d.task);
} else {
LoggerFactory.getLogger(getClass()).debug("Will add "+d.task+" to ready tasks");
readyTasks.add(d.task);
}
blockedTasks.remove(d.task);
}
}
blockerTasks.remove(task);
}
protected class RunnerRunnable implements Runnable {
@Override public void run() {
while (true) {
final @CheckForNull Runnable nextTaskOrNull;
synchronized (ThreadPool.this) {
if (executingTasks.isEmpty() && readyTasks.isEmpty()) break; // It's finished successfully
if (exceptionOrNull != null) break; // It's failed, no point continuing
nextTaskOrNull = readyTasks.isEmpty() ? null : readyTasks.iterator().next();
if (nextTaskOrNull != null) { readyTasks.remove(nextTaskOrNull); executingTasks.add(nextTaskOrNull); }
}
if (nextTaskOrNull != null) {
try {
nextTaskOrNull.run();
}
// Also catch e.g. StackOverflowExceptions here,
// otherwise ThreadPool.execute appears to "succeed" but stuff that should have happened has not happened.
catch (Throwable e) {
synchronized (ThreadPool.this) {
exceptionOrNull = e;
}
}
finally {
onTaskCompleted(nextTaskOrNull);
}
} else {
// it might be that other tasks are running, and they will produce lots more tasks
// so keep the thread alive and polling until all work is done.
try { Thread.sleep(10); }
catch (InterruptedException ignored) { }
}
}
}
}
public void setThreadCount(int count) { threadCount = count; }
public void setThreadNamePrefix(String prefix) { threadNamePrefix = prefix; }
public synchronized void addTaskWithDependencies(List extends Runnable> dependencies, Runnable... after) {
List stillScheduledDependencies = dependencies.stream()
.filter(dep -> dep instanceof ScheduleDependencyInAnyOrder ||
executingTasks.contains(dep) || readyTasks.contains(dep) || blockedTasks.contains(dep))
.filter(dep -> ! (dep instanceof ScheduleDependencyInAnyOrder && doneAnyOrderDependencies.contains((ScheduleDependencyInAnyOrder) dep)))
.collect(Collectors.toList());
if (stillScheduledDependencies.isEmpty()) {
readyTasks.addAll(after);
} else {
for (var job : after) {
blockedTasks.add(job);
var d = new TaskWithDependencies();
d.task = job;
d.dependencies = new IdentityHashSet<>(stillScheduledDependencies);
for (var dep : stillScheduledDependencies) {
blockerTasks.putIfAbsent(dep, new ArrayList<>());
blockerTasks.get(dep).add(d);
}
}
}
}
/** @return the runnable that's been scheduled for execution, as convenience */
public @Nonnull R addTaskWithDependencies(@Nonnull List extends Runnable> dependencies, @Nonnull R after) {
addTaskWithDependencies(dependencies, new Runnable[] { after });
return after;
}
public void addTask(Runnable... tasks) {
for (var r : tasks)
addTaskWithDependencies(List.of(), r);
}
/** @return the runnable that's been scheduled for execution, as convenience */
public @Nonnull R addTask(@Nonnull R after) {
addTask(new Runnable[] { after });
return after;
}
public void addTasks(Collection tasks) {
addTask(tasks.toArray(new Runnable[0]));
}
/**
* Add a task which runs in its own thread.
* Intended for CPU-bound thread pools which require a task to be completed which does not
* consume CPU for example an HTTP request.
*/
public synchronized void addTaskOffPool(Runnable... tasks) {
for (var t : tasks) {
executingTasks.add(t);
new Thread(() -> {
try {
t.run();
}
catch (Exception e) {
synchronized (ThreadPool.this) {
exceptionOrNull = e;
}
}
finally {
onTaskCompleted(t);
}
}, threadNamePrefix + "-offPool-" + (++offPoolThreadCounter)).start();
}
}
/** @return the runnable that's been scheduled for execution, as convenience */
public @Nonnull R addTaskOffPool(@Nonnull R task) {
addTaskOffPool(new Runnable[] { task });
return task;
}
public synchronized void addTaskWithDependenciesOffPool(List extends Runnable> dependencies, Runnable... after) {
List stillScheduledDependencies = dependencies.stream()
.filter(dep -> dep instanceof ScheduleDependencyInAnyOrder ||
executingTasks.contains(dep) || readyTasks.contains(dep) || blockedTasks.contains(dep))
.filter(dep -> ! (dep instanceof ScheduleDependencyInAnyOrder && doneAnyOrderDependencies.contains((ScheduleDependencyInAnyOrder) dep)))
.collect(Collectors.toList());
if (stillScheduledDependencies.isEmpty()) {
addTaskOffPool(after);
} else {
for (var job : after) {
blockedTasks.add(job);
var d = new TaskWithDependencies();
d.offPool = true;
d.task = job;
d.dependencies = new IdentityHashSet<>(stillScheduledDependencies);
for (var dep : stillScheduledDependencies) {
blockerTasks.putIfAbsent(dep, new ArrayList<>());
blockerTasks.get(dep).add(d);
}
}
}
}
public @Nonnull R addTaskWithDependenciesOffPool(List extends Runnable> dependencies, R after) {
addTaskWithDependenciesOffPool(dependencies, new Runnable[] { after });
return after;
}
/** See {@link #unwrapException(RuntimeException, Class)} to how to handle checked exceptions */
public void execute() {
var threads = IntStream.range(0, threadCount)
.mapToObj(i -> new Thread(new RunnerRunnable(), threadNamePrefix+"-"+(i+1)))
.collect(Collectors.toList());
for (var t : threads) t.start();
for (var t : threads) try { t.join(); } catch (InterruptedException e) { exceptionOrNull = e; }
if (exceptionOrNull != null) throw new RuntimeException(exceptionOrNull);
}
/**
* After {@link #execute()} runs, use this method, once per checked exception that your Runnables might throw.
* This can handle the case that you wrap your exception in a RuntimeException, or the case that you use @SneakyThrows.
*
* For example:
*
* catch (RuntimeException e) {
* unwrapException(e, RequestInvalidException.class);
* unwrapException(e, TransformationFailedException.class);
* throw e;
* }
*
*
*/
@SuppressWarnings("unchecked")
public static void unwrapException(@Nonnull RuntimeException e, @Nonnull Class exceptionClass) throws E {
// runnable must wrap checked exception, then threads.execute wraps it again
if (e.getCause() != null
&& e.getCause().getCause() != null
&& exceptionClass.isAssignableFrom(e.getCause().getCause().getClass()))
throw (E) e.getCause().getCause();
// @SneakyThrows doesn't wrap, then threads.execute wraps it
if (e.getCause() != null
&& exceptionClass.isAssignableFrom(e.getCause().getClass()))
throw (E) e.getCause();
}
protected void addTasksToString(StringBuilder result, String indent, Runnable task, int blockedByCount) {
result.append(indent).append(task.toString());
var dependents = blockerTasks.getOrDefault(task, List.of());
if (blockedByCount > 0) result.append(" (blocked by ").append(blockedByCount).append(" tasks)");
if ( ! dependents.isEmpty()) result.append(", and thereafter:");
result.append("\n");
for (var dependent : dependents)
addTasksToString(result, indent+" ", dependent.task, dependent.dependencies.size());
}
@Override public String toString() {
var result = new StringBuilder();
for (var r : executingTasks) addTasksToString(result, "", r, 0);
for (var r : readyTasks) addTasksToString(result, "", r, 0);
return result.toString();
}
}