![JAR search and dependency download from the Maven repository](/logo.png)
fish.payara.nucleus.healthcheck.preliminary.HoggingThreadsHealthCheck Maven / Gradle / Ivy
/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
*
* Copyright (c) 2016-2022 Payara Foundation and/or its affiliates. All rights reserved.
*
* The contents of this file are subject to the terms of either the GNU
* General Public License Version 2 only ("GPL") or the Common Development
* and Distribution License("CDDL") (collectively, the "License"). You
* may not use this file except in compliance with the License. You can
* obtain a copy of the License at
* https://github.com/payara/Payara/blob/master/LICENSE.txt
* See the License for the specific
* language governing permissions and limitations under the License.
*
* When distributing the software, include this License Header Notice in each
* file and include the License file at glassfish/legal/LICENSE.txt.
*
* GPL Classpath Exception:
* The Payara Foundation designates this particular file as subject to the "Classpath"
* exception as provided by the Payara Foundation in the GPL Version 2 section of the License
* file that accompanied this code.
*
* Modifications:
* If applicable, add the following below the License Header, with the fields
* enclosed by brackets [] replaced by your own identifying information:
* "Portions Copyright [year] [name of copyright owner]"
*
* Contributor(s):
* If you wish your version of this file to be governed by only the CDDL or
* only the GPL Version 2, indicate your decision by adding "[Contributor]
* elects to include this software in this distribution under the [CDDL or GPL
* Version 2] license." If you don't indicate a single choice of license, a
* recipient has the option to distribute your version of this file under
* either the CDDL, the GPL Version 2 or to extend the choice of license to
* its licensees as provided above. However, if you add GPL Version 2 code
* and therefore, elected the GPL Version 2 license, then the option applies
* only if the new code is made subject to such option by the copyright
* holder.
*/
package fish.payara.nucleus.healthcheck.preliminary;
import fish.payara.nucleus.healthcheck.HealthCheckHoggingThreadsExecutionOptions;
import fish.payara.nucleus.healthcheck.HealthCheckResult;
import fish.payara.monitoring.collect.MonitoringData;
import fish.payara.monitoring.collect.MonitoringDataCollector;
import fish.payara.monitoring.collect.MonitoringDataSource;
import fish.payara.monitoring.collect.MonitoringWatchCollector;
import fish.payara.monitoring.collect.MonitoringWatchSource;
import fish.payara.notification.healthcheck.HealthCheckResultEntry;
import fish.payara.notification.healthcheck.HealthCheckResultStatus;
import fish.payara.nucleus.healthcheck.configuration.HoggingThreadsChecker;
import org.glassfish.api.StartupRunLevel;
import org.glassfish.hk2.runlevel.RunLevel;
import org.jvnet.hk2.annotations.Service;
import jakarta.annotation.PostConstruct;
import java.lang.management.ManagementFactory;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import static fish.payara.internal.notification.TimeUtil.prettyPrintDuration;
/**
* A "hogging thread" is a thread that uses most of the CPU during the measured period.
*
* @author mertcaliskan (initial version)
* @author Jan Bernitt (consumer based and monitoring)
*/
@Service(name = "healthcheck-threads")
@RunLevel(StartupRunLevel.VAL)
public class HoggingThreadsHealthCheck
extends BaseHealthCheck
implements MonitoringDataSource, MonitoringWatchSource {
@FunctionalInterface
private interface HoggingThreadConsumer {
void accept(int percentage, int threshold, long totalTimeHogging, String initialMethod, ThreadInfo info);
}
/**
* Book-keeping record for each thread. All times are in milliseconds.
*/
private static final class ThreadCpuTimeRecord {
ThreadCpuTimeRecord() {
// make visible
}
/**
* Timestamp in milliseconds from when the measuring interval started
*/
long startOfIntervalTimestamp;
/**
* Total number of milliseconds spend by the thread doing CPU at the start of the measuring interval.
* This is not zero as only ever increases from zero after the thread has been created.
*/
long startOfIntervalCpuTime;
/**
* Timestamp in milliseconds from when the thread first exceeded the threshold and was identified as "hogging".
*/
long startOfExceedingThresholdTimestamp;
/**
* Number of times in a row the check has identified the thread as "hogging"
*/
int identifiedAsHoggingCount;
/**
* This is the method on top of the stack trace when thread first is identified as "hogging". This method is the
* most likely candidate. Using the "current" method often is misleading as worker threads at some point get
* back to idle in the pool which would show the parking as the last method.
*/
String identifiedAsHoggingMethod;
}
private boolean supported;
private final Map checkRecordsByThreadId = new ConcurrentHashMap<>();
private final Map colletionRecordsByThreadId = new ConcurrentHashMap<>();
@PostConstruct
void postConstruct() {
postConstruct(this, HoggingThreadsChecker.class);
supported = ManagementFactory.getThreadMXBean().isCurrentThreadCpuTimeSupported();
}
@Override
public HealthCheckHoggingThreadsExecutionOptions constructOptions(HoggingThreadsChecker checker) {
return new HealthCheckHoggingThreadsExecutionOptions(Boolean.valueOf(checker.getEnabled()),
Long.parseLong(checker.getTime()), asTimeUnit(checker.getUnit()),
Boolean.valueOf(checker.getAddToMicroProfileHealth()),
Long.parseLong(checker.getThresholdPercentage()),
Integer.parseInt(checker.getRetryCount()));
}
@Override
public String getDescription() {
return "healthcheck.description.hoggingThreads";
}
@Override
protected HealthCheckResult doCheckInternal() {
HealthCheckResult result = new HealthCheckResult();
if (!supported) {
result.add(new HealthCheckResultEntry(HealthCheckResultStatus.CHECK_ERROR, "JVM implementation or OS does" +
" not support getting CPU times"));
return result;
}
acceptHoggingThreads(checkRecordsByThreadId,
(percentage, threshold, totalTimeHogging, initialMethod, info) ->
result.add(new HealthCheckResultEntry(HealthCheckResultStatus.CRITICAL,
"Thread with : " + info.getThreadId() + "-" + info.getThreadName() +
" is a hogging thread for the last " +
prettyPrintDuration(totalTimeHogging) + "\n" + prettyPrintStackTrace(info.getStackTrace())))
);
return result;
}
@Override
@MonitoringData(ns = "health", intervalSeconds = 4)
public void collect(MonitoringDataCollector collector) {
if (options == null || !options.isEnabled() || !supported) {
return;
}
AtomicInteger hoggingThreadCount = new AtomicInteger(0);
AtomicLong hoggingThreadMaxDuration = new AtomicLong(0L);
acceptHoggingThreads(colletionRecordsByThreadId,
(percentage, threshold, totalTimeHogging, initialMethod, info) -> {
String thread = info.getThreadName();
if (thread == null || thread.isEmpty()) {
thread = String.valueOf(info.getThreadId());
}
collector.annotate("HoggingThreadDuration", totalTimeHogging, true, //
"Thread", thread, //
"Usage%", String.valueOf(percentage), //
"Threshold%", String.valueOf(threshold), //
"Method", initialMethod, //
"Exited", String.valueOf(!initialMethod.equals(getMethod(info))));
hoggingThreadCount.incrementAndGet();
hoggingThreadMaxDuration.updateAndGet(value -> Math.max(value, totalTimeHogging));
});
collector
.collect("HoggingThreadCount", hoggingThreadCount)
.collect("HoggingThreadDuration", hoggingThreadMaxDuration);
}
@Override
public void collect(MonitoringWatchCollector collector) {
if (options == null || !options.isEnabled() || !supported) {
return;
}
collector.watch("ns:health HoggingThreadCount", "Hogging Threads", "count")
.green(-1, 1, false, null, null, false)
.amber(0, -2, false, null, null, false)
.red(1, -2, false, null, null, false);
}
private void acceptHoggingThreads(Map recordsById, HoggingThreadConsumer consumer) {
final ThreadMXBean bean = ManagementFactory.getThreadMXBean();
final long now = System.currentTimeMillis();
final long currentThreadId = Thread.currentThread().getId();
final int retryCount = options.getRetryCount();
final int threshold = options.getThresholdPercentage().intValue();
for (long threadId : bean.getAllThreadIds()) {
if (threadId != currentThreadId) {
final long cpuTimeInNanos = bean.getThreadCpuTime(threadId);
if (cpuTimeInNanos == -1)
continue;
long cpuTime = TimeUnit.NANOSECONDS.toMillis(cpuTimeInNanos);
// from here all times are in millis
ThreadCpuTimeRecord record = recordsById.get(threadId);
if (record == null) {
record = new ThreadCpuTimeRecord();
recordsById.put(threadId, record);
} else {
acceptHoggingThread(bean, now, retryCount, threshold, threadId, cpuTime, record, consumer);
}
record.startOfIntervalTimestamp = now;
record.startOfIntervalCpuTime = cpuTime;
}
}
}
private static void acceptHoggingThread(final ThreadMXBean bean, final long now, final int retryCount, final int threshold,
long threadId, long cpuTime, ThreadCpuTimeRecord record, HoggingThreadConsumer consumer) {
long intervalLength = now - record.startOfIntervalTimestamp;
long intervalCpuTime = cpuTime - record.startOfIntervalCpuTime;
if (intervalLength <= 0) {
return;
}
int percentage = (int) (intervalCpuTime * 100L / intervalLength);
if (percentage > threshold) {
if (record.identifiedAsHoggingCount == 0) {
record.startOfExceedingThresholdTimestamp = record.startOfIntervalTimestamp;
record.identifiedAsHoggingMethod = getMethod(bean.getThreadInfo(threadId, 1));
}
record.identifiedAsHoggingCount++;
if (record.identifiedAsHoggingCount > retryCount) {
ThreadInfo info = bean.getThreadInfo(threadId, 1);
long totalTimeHogging = now - record.startOfExceedingThresholdTimestamp;
consumer.accept(percentage, threshold, totalTimeHogging, record.identifiedAsHoggingMethod, info);
}
} else {
record.identifiedAsHoggingCount = 0;
}
}
static String getMethod(ThreadInfo info) {
if (info.getStackTrace().length == 0) {
return "?";
}
StackTraceElement frame = info.getStackTrace()[0];
return frame.getClassName() + "#" + frame.getMethodName() + ":" + frame.getLineNumber();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy