All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.glowroot.central.RollupService Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2016-2019 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.glowroot.central;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
import com.google.common.util.concurrent.Futures;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.glowroot.agent.api.Instrumentation;
import org.glowroot.central.repo.ActiveAgentDao;
import org.glowroot.central.repo.AggregateDao;
import org.glowroot.central.repo.GaugeValueDao;
import org.glowroot.central.repo.SyntheticResultDao;
import org.glowroot.central.util.MoreExecutors2;
import org.glowroot.central.util.MoreFutures;
import org.glowroot.central.util.Session;
import org.glowroot.common.util.Clock;
import org.glowroot.common2.repo.ActiveAgentRepository.AgentRollup;

import static java.util.concurrent.TimeUnit.DAYS;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.MINUTES;
import static java.util.concurrent.TimeUnit.SECONDS;

class RollupService implements Runnable {

    private static final int MIN_WORKER_THREADS = 1;
    private static final int MAX_WORKER_THREADS = 4;
    private static final int INITIAL_WORKER_THREADS = 2;

    private static final Logger logger = LoggerFactory.getLogger(RollupService.class);

    private final ActiveAgentDao activeAgentDao;
    private final AggregateDao aggregateDao;
    private final GaugeValueDao gaugeValueDao;
    private final SyntheticResultDao syntheticResultDao;
    private final CentralAlertingService centralAlertingService;
    private final Clock clock;

    private final ExecutorService mainLoopExecutor;

    private volatile boolean closed;

    RollupService(ActiveAgentDao activeAgentDao, AggregateDao aggregateDao,
            GaugeValueDao gaugeValueDao, SyntheticResultDao syntheticResultDao,
            CentralAlertingService centralAlertingService, Clock clock) {
        this.activeAgentDao = activeAgentDao;
        this.aggregateDao = aggregateDao;
        this.gaugeValueDao = gaugeValueDao;
        this.syntheticResultDao = syntheticResultDao;
        this.centralAlertingService = centralAlertingService;
        this.clock = clock;
        mainLoopExecutor = MoreExecutors2.newSingleThreadExecutor("Rollup-Main-Loop");
        mainLoopExecutor.execute(castInitialized(this));
    }

    @Override
    public void run() {
        Session.setInRollupThread(true);
        int counter = 0;
        int numWorkerThreads = INITIAL_WORKER_THREADS;
        ListeningExecutorService workerExecutor = newWorkerExecutor(numWorkerThreads);
        while (!closed) {
            try {
                MILLISECONDS.sleep(millisUntilNextRollup(clock.currentTimeMillis()));
                // perform larger sweep approx every 100 minutes
                long lastXMillis = counter++ % 100 == 0 ? DAYS.toMillis(7) : MINUTES.toMillis(30);
                Stopwatch stopwatch = Stopwatch.createStarted();
                List agentRollups =
                        activeAgentDao.readRecentlyActiveAgentRollups(lastXMillis);
                runInternal(agentRollups, workerExecutor);
                long elapsedInSeconds = stopwatch.elapsed(SECONDS);
                int oldNumWorkerThreads = numWorkerThreads;
                if (elapsedInSeconds > 300) {
                    if (numWorkerThreads < MAX_WORKER_THREADS) {
                        numWorkerThreads++;
                    } else {
                        logger.warn("rolling up data across {} agent rollup took {} seconds (using"
                                + " {} threads)", count(agentRollups), elapsedInSeconds,
                                numWorkerThreads);
                    }
                } else if (elapsedInSeconds < 60 && numWorkerThreads > MIN_WORKER_THREADS) {
                    numWorkerThreads--;
                }
                if (numWorkerThreads != oldNumWorkerThreads) {
                    ExecutorService oldWorkerExecutor = workerExecutor;
                    workerExecutor = newWorkerExecutor(numWorkerThreads);
                    oldWorkerExecutor.shutdown();
                    if (!oldWorkerExecutor.awaitTermination(10, SECONDS)) {
                        logger.error("timed out waiting for old worker rollup thread to terminate");
                    }
                }
            } catch (InterruptedException e) {
                // probably shutdown requested (see close method below)
                logger.debug(e.getMessage(), e);
                continue;
            } catch (Throwable t) {
                // this probably should never happen since runInternal catches and logs exceptions
                logger.error(t.getMessage(), t);
            }
        }
        // shutdownNow() is needed here to send interrupt to worker rollup thread
        workerExecutor.shutdownNow();
        try {
            if (!workerExecutor.awaitTermination(10, SECONDS)) {
                throw new IllegalStateException(
                        "Timed out waiting for worker rollup thread to terminate");
            }
        } catch (InterruptedException e) {
            // this is unexpected (but not harmful since already closing)
            logger.error(e.getMessage(), e);
        }
    }

    void close() throws InterruptedException {
        closed = true;
        // shutdownNow() is needed here to send interrupt to main rollup thread
        mainLoopExecutor.shutdownNow();
        if (!mainLoopExecutor.awaitTermination(10, SECONDS)) {
            throw new IllegalStateException(
                    "Timed out waiting for main rollup thread to terminate");
        }
    }

    @Instrumentation.Transaction(transactionType = "Background",
            transactionName = "Outer rollup loop", traceHeadline = "Outer rollup loop",
            timer = "outer rollup loop")
    private void runInternal(List agentRollups,
            ListeningExecutorService workerExecutor) throws Exception {
        List> futures = new ArrayList<>();
        // randomize order so that multiple central collector nodes will be less likely to perform
        // duplicative work
        for (AgentRollup agentRollup : shuffle(agentRollups)) {
            futures.addAll(rollupAggregates(agentRollup, workerExecutor));
            futures.add(rollupGauges(agentRollup, workerExecutor));
            futures.addAll(rollupSyntheticMonitors(agentRollup, workerExecutor));
            // checking aggregate and gauge alerts after rollup since their calculation can depend
            // on rollups depending on time period length (and alerts on rollups are not checked
            // anywhere else)
            //
            // agent (not rollup) alerts are also checked right after receiving the respective data
            // (aggregate/gauge/heartbeat) from the agent, but need to also check these once a
            // minute in case no data has been received from the agent recently
            futures.addAll(
                    checkAggregateAndGaugeAndHeartbeatAlertsAsync(agentRollup, workerExecutor));
        }
        // none of the futures should fail since they all catch and log exception at the end
        MoreFutures.waitForAll(futures);
        try {
            // FIXME keep this here as fallback, but also resolve alerts immediately when they are
            // deleted (or when their condition is updated)
            centralAlertingService.checkForAllDeletedAlerts();
        } catch (Exception e) {
            logger.error(e.getMessage(), e);
        }
    }

    private List> rollupAggregates(AgentRollup agentRollup,
            ListeningExecutorService workerExecutor) {
        List> futures = new ArrayList<>();
        // randomize order so that multiple central collector nodes will be less likely to perform
        // duplicative work
        for (AgentRollup childAgentRollup : shuffle(agentRollup.children())) {
            futures.addAll(rollupAggregates(childAgentRollup, workerExecutor));
        }
        futures.add(workerExecutor.submit(new Runnable() {
            @Override
            public void run() {
                try {
                    aggregateDao.rollup(agentRollup.id());
                } catch (InterruptedException e) {
                    // probably shutdown requested (see close method above)
                } catch (Throwable t) {
                    logger.error("{} - {}", agentRollup.id(), t.getMessage(), t);
                }
            }
        }));
        return futures;
    }

    private ListenableFuture rollupGauges(AgentRollup agentRollup,
            ListeningExecutorService workerExecutor) {
        List childAgentRollups = agentRollup.children();
        if (childAgentRollups.isEmpty()) {
            // optimization of common case
            return workerExecutor.submit(new RollupGauges(agentRollup.id()));
        }
        // need to roll up children first, since gauge values initial roll up from children is
        // done on the 1-min aggregates of the children
        List> futures = new ArrayList<>();
        for (AgentRollup childAgentRollup : shuffle(childAgentRollups)) {
            futures.add(rollupGauges(childAgentRollup, workerExecutor));
        }
        // using _whenAllSucceed_ because need to _not_ roll up parent if exception occurs while
        // rolling up a child, since gauge values initial roll up from children is done on the 1-min
        // aggregates of the children
        return Futures.whenAllSucceed(futures)
                .run(new RollupGauges(agentRollup.id()), workerExecutor);
    }

    private List> rollupSyntheticMonitors(AgentRollup agentRollup,
            ListeningExecutorService workerExecutor) {
        List> futures = new ArrayList<>();
        for (AgentRollup childAgentRollup : shuffle(agentRollup.children())) {
            futures.addAll(rollupSyntheticMonitors(childAgentRollup, workerExecutor));
        }
        futures.add(workerExecutor.submit(new Runnable() {
            @Override
            public void run() {
                try {
                    syntheticResultDao.rollup(agentRollup.id());
                } catch (InterruptedException e) {
                    // probably shutdown requested (see close method above)
                } catch (Throwable t) {
                    logger.error("{} - {}", agentRollup.id(), t.getMessage(), t);
                }
            }
        }));
        return futures;
    }

    private List> checkAggregateAndGaugeAndHeartbeatAlertsAsync(AgentRollup agentRollup,
            ListeningExecutorService workerExecutor) {
        List> futures = new ArrayList<>();
        for (AgentRollup childAgentRollup : agentRollup.children()) {
            futures.addAll(checkAggregateAndGaugeAndHeartbeatAlertsAsync(childAgentRollup,
                    workerExecutor));
        }
        futures.add(workerExecutor.submit(new Runnable() {
            @Override
            public void run() {
                try {
                    centralAlertingService.checkAggregateAndGaugeAndHeartbeatAlertsAsync(
                            agentRollup.id(), agentRollup.display(), clock.currentTimeMillis());
                } catch (InterruptedException e) {
                    // probably shutdown requested (see close method above)
                } catch (Throwable t) {
                    logger.error("{} - {}", agentRollup.id(), t.getMessage(), t);
                }
            }
        }));
        return futures;
    }

    private static ListeningExecutorService newWorkerExecutor(int numWorkerThreads) {
        return MoreExecutors.listeningDecorator(
                MoreExecutors2.newFixedThreadPool(numWorkerThreads, "Rollup-Worker-%d"));
    }

    private static  List shuffle(List agentRollups) {
        List mutable = new ArrayList<>(agentRollups);
        Collections.shuffle(mutable);
        return mutable;
    }

    private static int count(List agentRollups) {
        int count = agentRollups.size();
        for (AgentRollup agentRollup : agentRollups) {
            count += count(agentRollup.children());
        }
        return count;
    }

    @VisibleForTesting
    static long millisUntilNextRollup(long currentTimeMillis) {
        return 60000 - (currentTimeMillis - 10000) % 60000;
    }

    @SuppressWarnings("return.type.incompatible")
    private static  /*@Initialized*/ T castInitialized(/*@UnderInitialization*/ T obj) {
        return obj;
    }

    @FunctionalInterface
    interface AgentRollupConsumer {
        void accept(AgentRollup agentRollup) throws Exception;
    }

    private class RollupGauges implements Runnable {

        private final String agentRollupId;

        private RollupGauges(String agentRollupId) {
            this.agentRollupId = agentRollupId;
        }

        @Override
        public void run() {
            try {
                gaugeValueDao.rollup(agentRollupId);
            } catch (InterruptedException e) {
                // probably shutdown requested (see close method above)
            } catch (Throwable t) {
                logger.error("{} - {}", agentRollupId, t.getMessage(), t);
            }
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy