All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.monitor.jvm.HotThreads Maven / Gradle / Ivy

There is a newer version: 8.14.0
Show newest version
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */

package org.elasticsearch.monitor.jvm;

import org.apache.logging.log4j.LogManager;
import org.apache.lucene.util.CollectionUtil;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.time.DateFormatter;
import org.elasticsearch.common.unit.ByteSizeValue;
import org.elasticsearch.core.TimeValue;

import java.lang.management.ManagementFactory;
import java.lang.management.ThreadInfo;
import java.lang.management.ThreadMXBean;
import java.time.Clock;
import java.time.LocalDateTime;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.TimeUnit;
import java.util.function.ToLongFunction;

public class HotThreads {

    private static final Object mutex = new Object();

    private static final StackTraceElement[] EMPTY = new StackTraceElement[0];
    private static final DateFormatter DATE_TIME_FORMATTER = DateFormatter.forPattern("date_optional_time");
    private static final long INVALID_TIMING = -1L;

    private int busiestThreads = 3;
    private TimeValue interval = new TimeValue(500, TimeUnit.MILLISECONDS);
    private TimeValue threadElementsSnapshotDelay = new TimeValue(10, TimeUnit.MILLISECONDS);
    private int threadElementsSnapshotCount = 10;
    private ReportType type = ReportType.CPU;
    private SortOrder sortOrder = SortOrder.TOTAL;

    private boolean ignoreIdleThreads = true;

    private static final List knownIdleStackFrames = Arrays.asList(
        new String[] { "java.util.concurrent.ThreadPoolExecutor", "getTask" },
        new String[] { "sun.nio.ch.SelectorImpl", "select" },
        new String[] { "org.elasticsearch.threadpool.ThreadPool$CachedTimeThread", "run" },
        new String[] { "org.elasticsearch.indices.ttl.IndicesTTLService$Notifier", "await" },
        new String[] { "java.util.concurrent.LinkedTransferQueue", "poll" },
        new String[] { "com.sun.jmx.remote.internal.ServerCommunicatorAdmin$Timeout", "run" }
    );

    // NOTE: these are JVM dependent and JVM version dependent
    private static final List knownJDKInternalThreads = Arrays.asList(
        "Signal Dispatcher",
        "Finalizer",
        "Reference Handler",
        "Notification Thread",
        "Common-Cleaner",
        "process reaper",
        "DestroyJavaVM"
    );

    public enum ReportType {

        CPU("cpu"),
        WAIT("wait"),
        BLOCK("block"),
        MEM("mem");

        private final String type;

        ReportType(String type) {
            this.type = type;
        }

        public String getTypeValue() {
            return type;
        }

        // Custom enum from string because of legacy support. The standard Enum.valueOf is static
        // and cannot be overriden to show a nice error message in case the enum value isn't found
        public static ReportType of(String type) {
            for (ReportType report : values()) {
                if (report.type.equals(type)) {
                    return report;
                }
            }
            throw new IllegalArgumentException("type not supported [" + type + "]");
        }
    }

    public enum SortOrder {

        TOTAL("total"),
        CPU("cpu");

        private final String order;

        SortOrder(String order) {
            this.order = order;
        }

        public String getOrderValue() {
            return order;
        }

        // Custom enum from string because of legacy support. The standard Enum.valueOf is static
        // and cannot be overriden to show a nice error message in case the enum value isn't found
        public static SortOrder of(String order) {
            for (SortOrder sortOrder : values()) {
                if (sortOrder.order.equals(order)) {
                    return sortOrder;
                }
            }
            throw new IllegalArgumentException("sort order not supported [" + order + "]");
        }
    }

    public HotThreads interval(TimeValue interval) {
        this.interval = interval;
        return this;
    }

    public HotThreads busiestThreads(int busiestThreads) {
        this.busiestThreads = busiestThreads;
        return this;
    }

    public HotThreads ignoreIdleThreads(boolean ignoreIdleThreads) {
        this.ignoreIdleThreads = ignoreIdleThreads;
        return this;
    }

    public HotThreads threadElementsSnapshotDelay(TimeValue threadElementsSnapshotDelay) {
        this.threadElementsSnapshotDelay = threadElementsSnapshotDelay;
        return this;
    }

    public HotThreads threadElementsSnapshotCount(int threadElementsSnapshotCount) {
        this.threadElementsSnapshotCount = threadElementsSnapshotCount;
        return this;
    }

    public HotThreads type(ReportType type) {
        this.type = type;
        return this;
    }

    public HotThreads sortOrder(SortOrder order) {
        this.sortOrder = order;
        return this;
    }

    public String detect() throws Exception {
        synchronized (mutex) {
            return innerDetect(ManagementFactory.getThreadMXBean(), SunThreadInfo.INSTANCE, Thread.currentThread().getId(), (interval) -> {
                Thread.sleep(interval);
                return null;
            });
        }
    }

    static boolean isKnownJDKThread(ThreadInfo threadInfo) {
        return (knownJDKInternalThreads.stream()
            .anyMatch(jvmThread -> threadInfo.getThreadName() != null && threadInfo.getThreadName().equals(jvmThread)));
    }

    static boolean isKnownIdleStackFrame(String className, String methodName) {
        return (knownIdleStackFrames.stream().anyMatch(pair -> pair[0].equals(className) && pair[1].equals(methodName)));
    }

    static boolean isIdleThread(ThreadInfo threadInfo) {
        if (isKnownJDKThread(threadInfo)) {
            return true;
        }

        for (StackTraceElement frame : threadInfo.getStackTrace()) {
            if (isKnownIdleStackFrame(frame.getClassName(), frame.getMethodName())) {
                return true;
            }
        }

        return false;
    }

    Map getAllValidThreadInfos(ThreadMXBean threadBean, SunThreadInfo sunThreadInfo, long currentThreadId) {
        long[] threadIds = threadBean.getAllThreadIds();
        ThreadInfo[] threadInfos = threadBean.getThreadInfo(threadIds);
        Map result = new HashMap<>(threadIds.length);

        for (int i = 0; i < threadIds.length; i++) {
            if (threadInfos[i] == null || threadIds[i] == currentThreadId) {
                continue;
            }
            long cpuTime = threadBean.getThreadCpuTime(threadIds[i]);
            if (cpuTime == INVALID_TIMING) {
                continue;
            }
            long allocatedBytes = type == ReportType.MEM ? sunThreadInfo.getThreadAllocatedBytes(threadIds[i]) : 0;
            result.put(threadIds[i], new ThreadTimeAccumulator(threadInfos[i], interval, cpuTime, allocatedBytes));
        }

        return result;
    }

    ThreadInfo[][] captureThreadStacks(ThreadMXBean threadBean, long[] threadIds) throws InterruptedException {
        ThreadInfo[][] result = new ThreadInfo[threadElementsSnapshotCount][];
        for (int j = 0; j < threadElementsSnapshotCount; j++) {
            // NOTE, javadoc of getThreadInfo says: If a thread of the given ID is not alive or does not exist,
            // null will be set in the corresponding element in the returned array. A thread is alive if it has
            // been started and has not yet died.
            result[j] = threadBean.getThreadInfo(threadIds, Integer.MAX_VALUE);
            Thread.sleep(threadElementsSnapshotDelay.millis());
        }

        return result;
    }

    private boolean isThreadWaitBlockTimeMonitoringEnabled(ThreadMXBean threadBean) {
        if (threadBean.isThreadContentionMonitoringSupported()) {
            return threadBean.isThreadContentionMonitoringEnabled();
        }
        return false;
    }

    private double getTimeSharePercentage(long time) {
        return (((double) time) / interval.nanos()) * 100;
    }

    String innerDetect(ThreadMXBean threadBean, SunThreadInfo sunThreadInfo, long currentThreadId, SleepFunction threadSleep)
        throws Exception {
        if (threadBean.isThreadCpuTimeSupported() == false) {
            throw new ElasticsearchException("thread CPU time is not supported on this JDK");
        }

        if (type == ReportType.MEM && sunThreadInfo.isThreadAllocatedMemorySupported() == false) {
            throw new ElasticsearchException("thread allocated memory is not supported on this JDK");
        }

        // Enabling thread contention monitoring is required for capturing JVM thread wait/blocked times. If we weren't
        // able to enable this functionality during bootstrap, we should not produce HotThreads reports.
        if (isThreadWaitBlockTimeMonitoringEnabled(threadBean) == false) {
            throw new ElasticsearchException("thread wait/blocked time accounting is not supported on this JDK");
        }

        StringBuilder sb = new StringBuilder().append("Hot threads at ")
            .append(DATE_TIME_FORMATTER.format(LocalDateTime.now(Clock.systemUTC())))
            .append(", interval=")
            .append(interval)
            .append(", busiestThreads=")
            .append(busiestThreads)
            .append(", ignoreIdleThreads=")
            .append(ignoreIdleThreads)
            .append(":\n");

        // Capture before and after thread state with timings
        Map previousThreadInfos = getAllValidThreadInfos(threadBean, sunThreadInfo, currentThreadId);
        threadSleep.apply(interval.millis());
        Map latestThreadInfos = getAllValidThreadInfos(threadBean, sunThreadInfo, currentThreadId);

        latestThreadInfos.forEach((threadId, accumulator) -> accumulator.subtractPrevious(previousThreadInfos.get(threadId)));

        // Sort by delta CPU time on thread.
        List topThreads = new ArrayList<>(latestThreadInfos.values());

        // Special comparator for CPU mode with TOTAL sort type only. Otherwise, we simply use the value.
        if (type == ReportType.CPU && sortOrder == SortOrder.TOTAL) {
            CollectionUtil.introSort(
                topThreads,
                Comparator.comparingLong(ThreadTimeAccumulator::getRunnableTime)
                    .thenComparingLong(ThreadTimeAccumulator::getCpuTime)
                    .reversed()
            );
        } else {
            CollectionUtil.introSort(topThreads, Comparator.comparingLong(ThreadTimeAccumulator.valueGetterForReportType(type)).reversed());
        }

        topThreads = topThreads.subList(0, Math.min(busiestThreads, topThreads.size()));
        long[] topThreadIds = topThreads.stream().mapToLong(t -> t.threadId).toArray();

        // analyse N stack traces for the top threads
        ThreadInfo[][] allInfos = captureThreadStacks(threadBean, topThreadIds);

        for (int t = 0; t < topThreads.size(); t++) {
            String threadName = null;
            for (ThreadInfo[] info : allInfos) {
                if (info != null && info[t] != null) {
                    if (ignoreIdleThreads && isIdleThread(info[t])) {
                        info[t] = null;
                        continue;
                    }
                    threadName = info[t].getThreadName();
                    break;
                }
            }
            if (threadName == null) {
                continue; // thread is not alive yet or died before the first snapshot - ignore it!
            }

            ThreadTimeAccumulator topThread = topThreads.get(t);

            switch (type) {
                case MEM:
                    sb.append(
                        String.format(
                            Locale.ROOT,
                            "%n%s memory allocated by thread '%s'%n",
                            new ByteSizeValue(topThread.getAllocatedBytes()),
                            threadName
                        )
                    );
                    break;
                case CPU:
                    double percentCpu = getTimeSharePercentage(topThread.getCpuTime());
                    double percentOther = getTimeSharePercentage(topThread.getOtherTime());
                    sb.append(
                        String.format(
                            Locale.ROOT,
                            "%n%4.1f%% [cpu=%1.1f%%, other=%1.1f%%] (%s out of %s) %s usage by thread '%s'%n",
                            percentOther + percentCpu,
                            percentCpu,
                            percentOther,
                            TimeValue.timeValueNanos(topThread.getCpuTime() + topThread.getOtherTime()),
                            interval,
                            type.getTypeValue(),
                            threadName
                        )
                    );
                    break;
                default:
                    long time = ThreadTimeAccumulator.valueGetterForReportType(type).applyAsLong(topThread);
                    double percent = getTimeSharePercentage(time);
                    sb.append(
                        String.format(
                            Locale.ROOT,
                            "%n%4.1f%% (%s out of %s) %s usage by thread '%s'%n",
                            percent,
                            TimeValue.timeValueNanos(time),
                            interval,
                            type.getTypeValue(),
                            threadName
                        )
                    );
                    break;
            }

            // for each snapshot (2nd array index) find later snapshot for same thread with max number of
            // identical StackTraceElements (starting from end of each)
            boolean[] done = new boolean[threadElementsSnapshotCount];
            for (int i = 0; i < threadElementsSnapshotCount; i++) {
                if (done[i]) continue;
                int maxSim = 1;
                boolean[] similars = new boolean[threadElementsSnapshotCount];
                for (int j = i + 1; j < threadElementsSnapshotCount; j++) {
                    if (done[j]) continue;
                    int similarity = similarity(allInfos[i][t], allInfos[j][t]);
                    if (similarity > maxSim) {
                        maxSim = similarity;
                        similars = new boolean[threadElementsSnapshotCount];
                    }
                    if (similarity == maxSim) similars[j] = true;
                }
                // print out trace maxSim levels of i, and mark similar ones as done
                int count = 1;
                for (int j = i + 1; j < threadElementsSnapshotCount; j++) {
                    if (similars[j]) {
                        done[j] = true;
                        count++;
                    }
                }
                if (allInfos[i][t] != null) {
                    final StackTraceElement[] show = allInfos[i][t].getStackTrace();
                    if (count == 1) {
                        sb.append(String.format(Locale.ROOT, "  unique snapshot%n"));
                        for (StackTraceElement frame : show) {
                            sb.append(String.format(Locale.ROOT, "    %s%n", frame));
                        }
                    } else {
                        sb.append(
                            String.format(
                                Locale.ROOT,
                                "  %d/%d snapshots sharing following %d elements%n",
                                count,
                                threadElementsSnapshotCount,
                                maxSim
                            )
                        );
                        for (int l = show.length - maxSim; l < show.length; l++) {
                            sb.append(String.format(Locale.ROOT, "    %s%n", show[l]));
                        }
                    }
                }
            }
        }

        return sb.toString();
    }

    int similarity(ThreadInfo threadInfo, ThreadInfo threadInfo0) {
        StackTraceElement[] s1 = threadInfo == null ? EMPTY : threadInfo.getStackTrace();
        StackTraceElement[] s2 = threadInfo0 == null ? EMPTY : threadInfo0.getStackTrace();
        int i = s1.length - 1;
        int j = s2.length - 1;
        int rslt = 0;
        while (i >= 0 && j >= 0 && s1[i].equals(s2[j])) {
            rslt++;
            i--;
            j--;
        }
        return rslt;
    }

    static class ThreadTimeAccumulator {
        private final long threadId;
        private final TimeValue interval;

        private long cpuTime;
        private long blockedTime;
        private long waitedTime;
        private long allocatedBytes;

        ThreadTimeAccumulator(ThreadInfo info, TimeValue interval, long cpuTime, long allocatedBytes) {
            this.blockedTime = millisecondsToNanos(info.getBlockedTime()); // Convert to nanos to standardize
            this.waitedTime = millisecondsToNanos(info.getWaitedTime()); // Convert to nanos to standardize
            this.cpuTime = cpuTime;
            this.allocatedBytes = allocatedBytes;
            this.threadId = info.getThreadId();
            this.interval = interval;
        }

        private long millisecondsToNanos(long millis) {
            return millis * 1_000_000;
        }

        void subtractPrevious(ThreadTimeAccumulator previous) {
            // Previous can be null for threads that had started while we were sleeping.
            // In that case the absolute thread timings are the delta.
            if (previous != null) {
                if (previous.getThreadId() != getThreadId()) {
                    throw new IllegalStateException("Thread timing accumulation must be done on the same thread");
                }
                this.blockedTime -= previous.blockedTime;
                this.waitedTime -= previous.waitedTime;
                this.cpuTime -= previous.cpuTime;
                this.allocatedBytes -= previous.allocatedBytes;
            }
        }

        // A thread can migrate a CPU while executing, in which case, on certain processors, the
        // reported timings will be bogus (and likely negative). We cap the reported timings to >= 0 values only.
        public long getCpuTime() {
            return Math.max(cpuTime, 0);
        }

        public long getRunnableTime() {
            // If the thread didn't have any CPU movement, we can't really tell if it's
            // not running, or it has been asleep forever.
            if (getCpuTime() == 0) {
                return 0;
            }
            return Math.max(interval.nanos() - getWaitedTime() - getBlockedTime(), 0);
        }

        public long getOtherTime() {
            // If the thread didn't have any CPU movement, we can't really tell if it's
            // not running, or it has been asleep forever.
            if (getCpuTime() == 0) {
                return 0;
            }

            return Math.max(getRunnableTime() - getCpuTime(), 0);
        }

        public long getBlockedTime() {
            return Math.max(blockedTime, 0);
        }

        public long getWaitedTime() {
            return Math.max(waitedTime, 0);
        }

        public long getAllocatedBytes() {
            return Math.max(allocatedBytes, 0);
        }

        public long getThreadId() {
            return threadId;
        }

        static ToLongFunction valueGetterForReportType(ReportType type) {
            switch (type) {
                case CPU:
                    return ThreadTimeAccumulator::getCpuTime;
                case WAIT:
                    return ThreadTimeAccumulator::getWaitedTime;
                case BLOCK:
                    return ThreadTimeAccumulator::getBlockedTime;
                case MEM:
                    return ThreadTimeAccumulator::getAllocatedBytes;
            }
            throw new IllegalArgumentException("expected thread type to be either 'cpu', 'wait', 'mem', or 'block', but was " + type);
        }
    }

    @FunctionalInterface
    public interface SleepFunction {
        R apply(T t) throws InterruptedException;
    }

    public static void initializeRuntimeMonitoring() {
        // We'll let the JVM boot without this functionality, however certain APIs like HotThreads will not
        // function and report an error.
        if (ManagementFactory.getThreadMXBean().isThreadContentionMonitoringSupported() == false) {
            LogManager.getLogger(HotThreads.class).info("Thread wait/blocked time accounting not supported.");
        } else {
            try {
                ManagementFactory.getThreadMXBean().setThreadContentionMonitoringEnabled(true);
            } catch (UnsupportedOperationException monitoringUnavailable) {
                LogManager.getLogger(HotThreads.class).warn("Thread wait/blocked time accounting cannot be enabled.");
            }
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy