com.bigdata.util.concurrent.ThreadPoolExecutorStatisticsTask Maven / Gradle / Ivy
package com.bigdata.util.concurrent;
import java.lang.ref.WeakReference;
import java.util.concurrent.Callable;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import org.apache.log4j.Logger;
import com.bigdata.counters.CounterSet;
import com.bigdata.counters.Instrument;
import com.bigdata.journal.AbstractTask;
import com.bigdata.journal.WriteExecutorService;
import com.bigdata.util.concurrent.IQueueCounters.ITaskCounters;
import com.bigdata.util.concurrent.IQueueCounters.IThreadPoolExecutorCounters;
import com.bigdata.util.concurrent.IQueueCounters.IThreadPoolExecutorTaskCounters;
import com.bigdata.util.concurrent.IQueueCounters.IWriteServiceExecutorCounters;
/**
* Class tracks a variety of information about a {@link ThreadPoolExecutor}
* including the moving average of its queue length, queuing times, etc.
*
* @todo refactor to layer {@link QueueSizeMovingAverageTask} then
* {@link ThreadPoolExecutorBaseStatisticsTask}, then this class, then a
* derived class for the {@link WriteServiceExecutor}.
*
* @author Bryan Thompson
* @version $Id$
*/
public class ThreadPoolExecutorStatisticsTask implements Runnable {
private static final Logger log = Logger.getLogger(ThreadPoolExecutorStatisticsTask.class);
/**
* The label for the executor service (used in log messages).
*/
private final String serviceName;
/**
* The executor service that is being monitored.
*/
// private final ThreadPoolExecutor service;
private final WeakReference serviceRef;
private ThreadPoolExecutor getService() {
final ThreadPoolExecutor service = serviceRef.get();
if (service == null) {
// Throw exception which should cause the task to stop executing.
throw new RuntimeException("Service was shutdown.");
}
return service;
}
// /**
// * The time when we started to collect data about the {@link #service} (set by the ctor).
// */
// private final long startNanos;
/**
* The weight used to compute the moving average.
*/
private final double w;
/**
* #of samples taken so far.
*/
private long nsamples = 0;
/*
* There are several different moving averages which are computed.
*/
// private double averageQueueSize = 0d;
private double averageActiveCount = 0d;
private double averageQueueLength = 0d;
private double averageActiveCountWithLocksHeld = 0d;
/**
* Data collected about {@link AbstractTask}s run on a service (optional).
*/
private final TaskCounters taskCounters;
/*
* These are moving averages based on the optional TaskCounters.
*
* Note: We also store the old value for these since they are cumulative
* counters but we need only the delta from the last value for the counter
* to compute the moving averages.
*
* @todo there are other data that could be computed and reported out.
*/
/** time waiting on the queue until the task begins to execute. */
private double averageQueueWaitingTime = 0d;
/** time waiting for resource locks. */
private double averageLockWaitingTime = 0d;
/** time doing work (does not include time to acquire resources locks or commit time). */
private double averageServiceTime = 0d;
/** time checkpointing indices (included in the {@link #averageServiceTime}). */
private double averageCheckpointTime = 0d;
/** total time from submit to completion. */
private double averageQueuingTime = 0d;
private double averageCommitWaitingTime = 0d;
private double averageCommitServiceTime = 0d;
private double averageCommitGroupSize = 0d;
private double averageByteCountPerCommit = 0d;
/*
* private variables used to compute the delta in various counters since
* they were last sampled.
*/
private long queueWaitingTime = 0L;
private long lockWaitingTime = 0L;
private long serviceTime = 0L;
private long checkpointTime = 0L; // Note: checkpointTime is included in the serviceTime.
private long queuingTime = 0L;
private long commitWaitingTime = 0L;
private long commitServiceTime = 0L;
private double averageReadyCount;
/**
* Scaling factor converts nanoseconds to milliseconds.
*/
static final double scalingFactor = 1d / TimeUnit.NANOSECONDS.convert(1,
TimeUnit.MILLISECONDS);
/**
* The weight used to compute the moving average.
*/
public double getWeight() {
return w;
}
/**
* #of samples taken so far.
*/
public long getSampleCount() {
return nsamples;
}
/**
* The recommended default weight.
*/
public static final double DEFAULT_WEIGHT = .2d;
/**
* Ctor variant when the {@link ThreadPoolExecutor} does not have hooks for
* an {@link AbstractTask} and therefore does not update
* {@link TaskCounters}s.
*
* @param serviceName
* The label for the service.
*
* @param service
* The service to be monitored.
*/
public ThreadPoolExecutorStatisticsTask(final String serviceName,
final ThreadPoolExecutor service) {
this(serviceName, service, null/* taskCounters */, DEFAULT_WEIGHT);
}
/**
* Ctor variant when the {@link ThreadPoolExecutor} has hooks for an
* {@link AbstractTask} and updates the given {@link TaskCounters}s.
*
* @param serviceName
* The label for the service.
* @param service
* The service to be monitored.
* @param taskCounters
* The per-task counters used to compute the latency data for
* tasks run on that service.
*/
public ThreadPoolExecutorStatisticsTask(final String serviceName,
final ThreadPoolExecutor service, final TaskCounters taskCounters) {
this(serviceName, service, taskCounters, DEFAULT_WEIGHT);
}
/**
* Core impl.
*
* @param serviceName
* The label for the service.
* @param service
* The service to be monitored.
* @param taskCounters
* The per-task counters used to compute the latency data for
* tasks run on that service.
* @param w
* The weight to be used by
* {@link #getMovingAverage(double, double, double)}
*/
public ThreadPoolExecutorStatisticsTask(final String serviceName,
final ThreadPoolExecutor service, final TaskCounters taskCounters,
final double w) {
if (serviceName == null)
throw new IllegalArgumentException();
if (service == null)
throw new IllegalArgumentException();
// Note: MAY be null (useful for generic executor services).
// if (taskCounters == null)
// throw new IllegalArgumentException();
if (w <= 0d || w >= 1d)
throw new IllegalArgumentException();
this.serviceName = serviceName;
// this.service = service;
this.serviceRef = new WeakReference(service);
// this.startNanos = System.nanoTime();
this.taskCounters = taskCounters;
this.w = w;
}
/**
* Compute a moving average: (1 - w) * avg + w * q
*
* @param avg
* The previous average and initially zero (0.0).
* @param q
* The current value (e.g., the instantaneous measurement of the
* #of active tasks plus the length of the queue).
* @param w
* The weight for the moving average in (0:1). Values around .2
* seem appropriate.
*
* @return The updated moving average.
*/
public double getMovingAverage(double avg, double q, double w) {
return (1 - w) * avg + (w * q);
}
/**
* The moving average of the queue size.
*/
private final MovingAverageTask queueSizeTask = new MovingAverageTask(
"queueSize", new Callable() {
public Integer call() {
return getService().getQueue().size();
}
});
/**
* The moving average of the change in the total inter-arrival time.
*
* @see TaskCounters#interArrivalNanoTime
*/
private DeltaMovingAverageTask interArrivalNanoTimeTask = new DeltaMovingAverageTask(
"interArrivalTime", new Callable() {
public Long call() {
return taskCounters.interArrivalNanoTime.get();
}
});
/**
* The moving average of the change in the total task service time.
*
* @see TaskCounters#serviceNanoTime
*/
private DeltaMovingAverageTask serviceNanoTimeTask = new DeltaMovingAverageTask(
"serviceNanoTime", new Callable() {
public Long call() {
return taskCounters.serviceNanoTime.get();
}
});
/**
* This should be invoked once per second to sample various counters in
* order to turn their values into moving averages.
*
* Note: don't throw anything from here or it will cause the scheduled task
* executing this to no longer be run!
*/
public void run() {
/*
* Note: This will throw a RuntimeException if the weak reference has
* been cleared. This decouples the task from the monitored service
* which let's the monitored service shutdown when it is no longer
* referenced by the application (assuming that it implements a
* finalize() method).
*/
final ThreadPoolExecutor service = getService();
try {
{
queueSizeTask.run();
// queueSize := #of tasks in the queue.
final int queueSize = service.getQueue().size();
// activeCount := #of tasks assigned a worker thread
final int activeCount = service.getActiveCount();
//// This is just the tasks that are currently waiting to run (not
//// assigned to any thread).
// averageQueueSize = getMovingAverage(averageQueueSize,
// queueSize, w);
// This is just the tasks that are currently running (assigned
// to a worker thread).
averageActiveCount = getMovingAverage(averageActiveCount,
activeCount, w);
/*
* Note: this is the primary average of interest - it includes
* both the tasks waiting to be run and those that are currently
* running in the definition of the "queue length".
*/
averageQueueLength = getMovingAverage(averageQueueLength,
(activeCount + queueSize), w);
}
if (service instanceof WriteExecutorService) {
/*
* Note: For the WriteExecutorService we compute a variant of
* [activeCount] the which only counts tasks that are currently
* holding their exclusive resource lock(s). This is the real
* concurrency of the write service since tasks without locks
* are waiting on other tasks so that they can obtain their
* lock(s) and "run".
*/
final int activeCountWithLocksHeld = ((WriteExecutorService) service)
.getActiveTaskCountWithLocksHeld();
averageActiveCountWithLocksHeld = getMovingAverage(
averageActiveCountWithLocksHeld, activeCountWithLocksHeld, w);
}
if (taskCounters != null) {
/*
* Compute some latency data that relies on the task counters.
*/
// #of tasks that have been submitted so far.
final long taskCount = taskCounters.taskCompleteCount.get();
if (taskCount > 0) {
/*
* Time waiting on the queue to begin execution.
*/
{
final long newValue = taskCounters.queueWaitingNanoTime.get();
final long delta = newValue - queueWaitingTime;
assert delta >= 0 : "" + delta;
queueWaitingTime = newValue;
averageQueueWaitingTime = getMovingAverage(
averageQueueWaitingTime,
(delta * scalingFactor / taskCounters.taskCompleteCount.get()),
w);
}
/*
* Time waiting on resource lock(s).
*/
if(service instanceof WriteExecutorService) {
final long newValue = ((WriteTaskCounters) taskCounters).lockWaitingNanoTime
.get();
final long delta = newValue - lockWaitingTime;
assert delta >= 0 : "" + delta;
lockWaitingTime = newValue;
averageLockWaitingTime = getMovingAverage(
averageLockWaitingTime,
(delta * scalingFactor / taskCounters.taskCompleteCount.get()),
w);
}
/*
* Time that the task is being serviced (after its obtained
* any locks).
*/
{
final long newValue = taskCounters.serviceNanoTime.get();
final long delta = newValue - serviceTime;
assert delta >= 0 : "" + delta;
serviceTime = newValue;
averageServiceTime = getMovingAverage(
averageServiceTime,
(delta * scalingFactor / taskCounters.taskCompleteCount.get()),
w);
}
/*
* The moving average of the change in the cumulative
* inter-arrival time.
*/
interArrivalNanoTimeTask.run();
/*
* The moving average of the change in the total task
* service time.
*/
serviceNanoTimeTask.run();
/*
* Time that the task is busy checkpoint its indices (this
* is already reported as part of the service time but which
* is broken out here as a detail).
*/
{
final long newValue = taskCounters.checkpointNanoTime.get();
final long delta = newValue - checkpointTime;
assert delta >= 0 : "" + delta;
checkpointTime = newValue;
averageCheckpointTime = getMovingAverage(
averageCheckpointTime,
(delta * scalingFactor / taskCounters.taskCompleteCount.get()),
w);
}
/*
* Queuing time (elapsed time from submit until completion).
*/
{
final long newValue = taskCounters.queuingNanoTime.get();
final long delta = newValue - queuingTime;
assert delta >= 0 : "" + delta;
queuingTime = newValue;
averageQueuingTime = getMovingAverage(
averageQueuingTime,
(delta * scalingFactor / taskCounters.taskCompleteCount.get()),
w);
}
}
if (service instanceof WriteExecutorService) {
final WriteExecutorService tmp = (WriteExecutorService) service;
final WriteTaskCounters writeTaskCounters = (WriteTaskCounters) taskCounters;
final long groupCommitCount = tmp.getGroupCommitCount();
if (groupCommitCount > 0) {
// Time waiting for the commit.
{
final long newValue = writeTaskCounters.commitWaitingNanoTime
.get();
final long delta = newValue - commitWaitingTime;
assert delta >= 0 : "" + delta;
commitWaitingTime = newValue;
averageCommitWaitingTime = getMovingAverage(
averageCommitWaitingTime,
(delta * scalingFactor / groupCommitCount),
w);
}
// Time servicing the commit.
{
final long newValue = writeTaskCounters.commitServiceNanoTime
.get();
final long delta = newValue - commitServiceTime;
assert delta >= 0 : "" + delta;
commitServiceTime = newValue;
averageCommitServiceTime = getMovingAverage(
averageCommitServiceTime,
(delta * scalingFactor / groupCommitCount),
w);
}
}
// moving average of the size nready.
averageReadyCount = getMovingAverage(
averageReadyCount, tmp.getReadyCount(), w);
// moving average of the size of the commit groups.
averageCommitGroupSize = getMovingAverage(
averageCommitGroupSize, tmp.getCommitGroupSize(), w);
// moving average of the #of bytes written since the
// previous commit.
averageByteCountPerCommit = getMovingAverage(
averageByteCountPerCommit, tmp
.getByteCountPerCommit(), w);
} // end (if service instanceof WriteExecutorService )
}
nsamples++;
} catch (Exception ex) {
log.warn(serviceName, ex);
}
}
/**
* Adds counters for all innate variables defined for a
* {@link ThreadPoolExecutor} and for each of the variables computed by this
* class. Note that some variables (e.g., the lock waiting time) are only
* available when the service specified to the ctor is a
* {@link WriteExecutorService}.
*
* @param counterSet
* The counters will be added to this {@link CounterSet}.
*
* @return The caller's counterSet
*/
public CounterSet getCounters() {
final CounterSet counterSet = new CounterSet();
// Reference to the service : MAY have been cleared by GC.
final ThreadPoolExecutor service = serviceRef.get();
/*
* Defined for ThreadPoolExecutor.
*/
// Note: reported as moving average instead.
// counterSet.addCounter("#active",
// new Instrument() {
// public void sample() {
// setValue(service.getActiveCount());
// }
// });
//
// Note: reported as moving average instead.
// counterSet.addCounter("#queued",
// new Instrument() {
// public void sample() {
// setValue(service.getQueue().size());
// }
// });
//
/*
* Computed variables based on just the Queue.
*/
{
// counterSet.addCounter(IQueueCounters.AverageQueueSize,
// new Instrument() {
// @Override
// protected void sample() {
// setValue(averageQueueSize);
// }
// });
counterSet.addCounter(
IQueueCounters.AverageQueueSize,
new Instrument() {
@Override
protected void sample() {
setValue(queueSizeTask
.getMovingAverage());
}
});
}
/*
* Computed variables based on the service itself.
*/
{
if(service != null) {
if (taskCounters == null) {
/*
* Report iff not being reported via the TaskCounters.
*/
counterSet.addCounter(
IThreadPoolExecutorCounters.TaskCompleteCount,
new Instrument() {
public void sample() {
setValue(service.getCompletedTaskCount());
}
});
}
counterSet.addCounter(IThreadPoolExecutorCounters.PoolSize,
new Instrument() {
public void sample() {
setValue(service.getPoolSize());
}
});
counterSet.addCounter(IThreadPoolExecutorCounters.LargestPoolSize,
new Instrument() {
public void sample() {
setValue(service.getLargestPoolSize());
}
});
}
counterSet.addCounter(
IThreadPoolExecutorCounters.AverageActiveCount,
new Instrument() {
@Override
protected void sample() {
setValue(averageActiveCount);
}
});
counterSet.addCounter(
IThreadPoolExecutorCounters.AverageQueueLength,
new Instrument() {
@Override
protected void sample() {
setValue(averageQueueLength);
}
});
}
/*
* Computed variables based on the per-task counters.
*/
if (taskCounters != null) {
/*
* Simple counters.
*/
// count of all tasks submitted to the service.
counterSet.addCounter(
IThreadPoolExecutorTaskCounters.TaskSubmitCount,
new Instrument() {
public void sample() {
setValue(taskCounters.taskSubmitCount.get());
}
});
// count of all tasks completed by the service (failed +
// success).
counterSet.addCounter(
IThreadPoolExecutorCounters.TaskCompleteCount,
new Instrument() {
public void sample() {
setValue(taskCounters.taskCompleteCount.get());
}
});
// count of all tasks which failed during execution.
counterSet.addCounter(
IThreadPoolExecutorTaskCounters.TaskFailCount,
new Instrument() {
public void sample() {
setValue(taskCounters.taskFailCount.get());
}
});
// count of all tasks which were successfully executed.
counterSet.addCounter(
IThreadPoolExecutorTaskCounters.TaskSuccessCount,
new Instrument() {
public void sample() {
setValue(taskCounters.taskSuccessCount.get());
}
});
/*
* Running totals for various things.
*/
{
counterSet.addCounter(ITaskCounters.InterArrivalTime,
new Instrument() {
public void sample() {
setValue(TimeUnit.NANOSECONDS
.toMillis(taskCounters.interArrivalNanoTime
.get()));
}
});
counterSet.addCounter(ITaskCounters.QueueWaitingTime,
new Instrument() {
public void sample() {
setValue(TimeUnit.NANOSECONDS
.toMillis(taskCounters.queueWaitingNanoTime
.get()));
}
});
counterSet.addCounter(ITaskCounters.ServiceTime,
new Instrument() {
public void sample() {
setValue(TimeUnit.NANOSECONDS
.toMillis(taskCounters.serviceNanoTime
.get()));
}
});
counterSet.addCounter(ITaskCounters.CheckpointTime,
new Instrument() {
public void sample() {
setValue(TimeUnit.NANOSECONDS
.toMillis(taskCounters.checkpointNanoTime
.get()));
}
});
counterSet.addCounter(ITaskCounters.QueuingTime,
new Instrument() {
public void sample() {
setValue(TimeUnit.NANOSECONDS
.toMillis(taskCounters.queuingNanoTime
.get()));
}
});
}
/*
* Moving averages.
*/
counterSet.addCounter(
IWriteServiceExecutorCounters.AverageArrivalRate,
new Instrument() {
@Override
protected void sample() {
final double t = interArrivalNanoTimeTask
.getMovingAverage()
* scalingFactor;
if (t != 0d)
setValue(1d / t);
}
});
counterSet.addCounter(
IWriteServiceExecutorCounters.AverageServiceRate,
new Instrument() {
@Override
protected void sample() {
final double t = serviceNanoTimeTask
.getMovingAverage()
* scalingFactor;
if (t != 0d)
setValue(1d / t);
}
});
counterSet
.addCounter(
IThreadPoolExecutorTaskCounters.AverageQueueWaitingTime,
new Instrument() {
@Override
protected void sample() {
setValue(averageQueueWaitingTime);
}
});
counterSet.addCounter(
IThreadPoolExecutorTaskCounters.AverageServiceTime,
new Instrument() {
@Override
protected void sample() {
setValue(averageServiceTime);
}
});
counterSet.addCounter(
IThreadPoolExecutorTaskCounters.AverageQueuingTime,
new Instrument() {
@Override
protected void sample() {
setValue(averageQueuingTime);
}
});
}
/*
* These data are available only for the write service.
*/
if (service instanceof WriteExecutorService) {
final WriteExecutorService writeService = (WriteExecutorService) service;
/*
* Simple counters.
*/
counterSet.addCounter(IWriteServiceExecutorCounters.CommitCount,
new Instrument() {
public void sample() {
setValue(writeService.getGroupCommitCount());
}
});
counterSet.addCounter(IWriteServiceExecutorCounters.AbortCount,
new Instrument() {
public void sample() {
setValue(writeService.getAbortCount());
}
});
counterSet.addCounter(IWriteServiceExecutorCounters.OverflowCount,
new Instrument() {
public void sample() {
setValue(writeService.getOverflowCount());
}
});
counterSet.addCounter(IWriteServiceExecutorCounters.RejectedExecutionCount,
new Instrument() {
public void sample() {
setValue(writeService
.getRejectedExecutionCount());
}
});
/*
* Maximum observed values.
*/
counterSet.addCounter(IWriteServiceExecutorCounters.MaxCommitWaitingTime,
new Instrument() {
public void sample() {
setValue(writeService.getMaxCommitWaitingTime());
}
});
counterSet.addCounter(IWriteServiceExecutorCounters.MaxCommitServiceTime,
new Instrument() {
public void sample() {
setValue(writeService.getMaxCommitServiceTime());
}
});
counterSet.addCounter(IWriteServiceExecutorCounters.MaxCommitGroupSize,
new Instrument() {
public void sample() {
setValue((long) writeService
.getMaxCommitGroupSize());
}
});
counterSet.addCounter(IWriteServiceExecutorCounters.MaxRunning,
new Instrument() {
public void sample() {
setValue(writeService.getMaxRunning());
}
});
/*
* Moving averages available only for the write executor
* service.
*/
counterSet
.addCounter(
IWriteServiceExecutorCounters.AverageActiveCountWithLocksHeld,
new Instrument() {
@Override
protected void sample() {
setValue(averageActiveCountWithLocksHeld);
}
});
counterSet.addCounter(
IWriteServiceExecutorCounters.AverageReadyCount,
new Instrument() {
@Override
protected void sample() {
setValue(averageReadyCount);
}
});
counterSet.addCounter(
IWriteServiceExecutorCounters.AverageCommitGroupSize,
new Instrument() {
@Override
protected void sample() {
setValue(averageCommitGroupSize);
}
});
counterSet.addCounter(
IWriteServiceExecutorCounters.AverageLockWaitingTime,
new Instrument() {
@Override
protected void sample() {
setValue(averageLockWaitingTime);
}
});
counterSet.addCounter(
IWriteServiceExecutorCounters.AverageCheckpointTime,
new Instrument() {
@Override
protected void sample() {
setValue(averageCheckpointTime);
}
});
counterSet.addCounter(
IWriteServiceExecutorCounters.AverageCommitWaitingTime,
new Instrument() {
@Override
protected void sample() {
setValue(averageCommitWaitingTime);
}
});
counterSet.addCounter(
IWriteServiceExecutorCounters.AverageCommitServiceTime,
new Instrument() {
@Override
protected void sample() {
setValue(averageCommitServiceTime);
}
});
counterSet
.addCounter(
IWriteServiceExecutorCounters.AverageByteCountPerCommit,
new Instrument() {
@Override
protected void sample() {
setValue(averageByteCountPerCommit);
}
});
}
return counterSet;
}
}