All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.glowroot.central.CentralAlertingService Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2017-2019 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.glowroot.central;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.ExecutorService;

import com.google.common.base.Stopwatch;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.glowroot.agent.api.Instrumentation;
import org.glowroot.agent.api.Instrumentation.AlreadyInTransactionBehavior;
import org.glowroot.central.repo.AlertingDisabledDao;
import org.glowroot.central.repo.ConfigRepositoryImpl;
import org.glowroot.central.util.MoreExecutors2;
import org.glowroot.common.util.Clock;
import org.glowroot.common2.repo.ConfigRepository.AgentConfigNotFoundException;
import org.glowroot.common2.repo.util.AlertingService;
import org.glowroot.wire.api.model.AgentConfigOuterClass.AgentConfig.AlertConfig;
import org.glowroot.wire.api.model.AgentConfigOuterClass.AgentConfig.AlertConfig.AlertCondition;

import static java.util.concurrent.TimeUnit.MINUTES;
import static java.util.concurrent.TimeUnit.SECONDS;

class CentralAlertingService {

    private static final Logger logger = LoggerFactory.getLogger(CentralAlertingService.class);

    private final ConfigRepositoryImpl configRepository;
    private final AlertingService alertingService;
    private final HeartbeatAlertingService heartbeatAlertingService;
    private final AlertingDisabledDao alertingDisabledDao;
    private final Clock clock;

    private final ExecutorService workerExecutor;

    private final Stopwatch stopwatch = Stopwatch.createStarted();

    private volatile boolean closed;

    CentralAlertingService(ConfigRepositoryImpl configRepository, AlertingService alertingService,
            HeartbeatAlertingService heartbeatAlertingService,
            AlertingDisabledDao alertingDisabledDao, Clock clock) {
        this.configRepository = configRepository;
        this.alertingService = alertingService;
        this.heartbeatAlertingService = heartbeatAlertingService;
        this.alertingDisabledDao = alertingDisabledDao;
        this.clock = clock;
        workerExecutor = MoreExecutors2.newCachedThreadPool("Alert-Async-Worker-%d");
    }

    void close() throws InterruptedException {
        closed = true;
        // shutdownNow() is needed here to send interrupt to alert checking thread
        workerExecutor.shutdownNow();
        if (!workerExecutor.awaitTermination(10, SECONDS)) {
            throw new IllegalStateException(
                    "Timed out waiting for alert checking thread to terminate");
        }
    }

    void checkForDeletedAlerts(String agentRollupId) throws InterruptedException {
        try {
            alertingService.checkForDeletedAlerts(agentRollupId);
        } catch (InterruptedException e) {
            // probably shutdown requested
            throw e;
        } catch (Exception e) {
            logger.error("{} - {}", agentRollupId, e.getMessage(), e);
        }
    }

    void checkForAllDeletedAlerts() throws InterruptedException {
        try {
            alertingService.checkForAllDeletedAlerts();
        } catch (InterruptedException e) {
            // probably shutdown requested
            throw e;
        } catch (Exception e) {
            logger.error("{}", e.getMessage(), e);
        }
    }

    void checkAggregateAlertsAsync(String agentId, String agentDisplay, long endTime)
            throws InterruptedException {
        List alertConfigs;
        try {
            if (isCurrentlyDisabled(agentId)) {
                return;
            }
            alertConfigs = configRepository.getAlertConfigs(agentId);
        } catch (InterruptedException e) {
            // probably shutdown requested
            throw e;
        } catch (AgentConfigNotFoundException e) {
            // be lenient if agent_config table is messed up
            logger.debug(e.getMessage(), e);
            return;
        } catch (Exception e) {
            logger.error("{} - {}", agentId, e.getMessage(), e);
            return;
        }
        List aggregateAlertConfigs = new ArrayList<>();
        for (AlertConfig alertConfig : alertConfigs) {
            if (isAggregateCondition(alertConfig.getCondition())) {
                aggregateAlertConfigs.add(alertConfig);
            }
        }
        if (!aggregateAlertConfigs.isEmpty()) {
            checkAlertsAsync(agentId, agentDisplay, endTime, aggregateAlertConfigs);
        }
    }

    void checkGaugeAndHeartbeatAlertsAsync(String agentId, String agentDisplay, long endTime)
            throws InterruptedException {
        List alertConfigs;
        try {
            if (isCurrentlyDisabled(agentId)) {
                return;
            }
            alertConfigs = configRepository.getAlertConfigs(agentId);
        } catch (InterruptedException e) {
            // probably shutdown requested
            throw e;
        } catch (AgentConfigNotFoundException e) {
            // be lenient if agent_config table is messed up
            logger.debug(e.getMessage(), e);
            return;
        } catch (Exception e) {
            logger.error("{} - {}", agentId, e.getMessage(), e);
            return;
        }
        List gaugeAndHeartbeatAlertConfigs = new ArrayList<>();
        for (AlertConfig alertConfig : alertConfigs) {
            AlertCondition condition = alertConfig.getCondition();
            if (isGaugeCondition(condition)
                    || condition.getValCase() == AlertCondition.ValCase.HEARTBEAT_CONDITION) {
                gaugeAndHeartbeatAlertConfigs.add(alertConfig);
            }
        }
        if (!gaugeAndHeartbeatAlertConfigs.isEmpty()) {
            checkAlertsAsync(agentId, agentDisplay, endTime, gaugeAndHeartbeatAlertConfigs);
        }
    }

    @Instrumentation.Transaction(transactionType = "Background", transactionName = "Check alert",
            traceHeadline = "Check alerts: {{0}}", timer = "check alerts",
            alreadyInTransactionBehavior = AlreadyInTransactionBehavior.CAPTURE_NEW_TRANSACTION)
    void checkAggregateAndGaugeAndHeartbeatAlertsAsync(String agentRollupId,
            String agentRollupDisplay, long endTime) throws InterruptedException {
        List alertConfigs;
        try {
            if (isCurrentlyDisabled(agentRollupId)) {
                return;
            }
            alertConfigs = configRepository.getAlertConfigs(agentRollupId);
        } catch (InterruptedException e) {
            // probably shutdown requested
            throw e;
        } catch (AgentConfigNotFoundException e) {
            // be lenient if agent_config table is messed up
            logger.debug(e.getMessage(), e);
            return;
        } catch (Exception e) {
            logger.error("{} - {}", agentRollupId, e.getMessage(), e);
            return;
        }
        List aggregateAndGaugeAndHeartbeatAlertConfigs = new ArrayList<>();
        for (AlertConfig alertConfig : alertConfigs) {
            AlertCondition condition = alertConfig.getCondition();
            if (condition.getValCase() == AlertCondition.ValCase.METRIC_CONDITION
                    || condition.getValCase() == AlertCondition.ValCase.HEARTBEAT_CONDITION) {
                aggregateAndGaugeAndHeartbeatAlertConfigs.add(alertConfig);
            }
        }
        if (!aggregateAndGaugeAndHeartbeatAlertConfigs.isEmpty()) {
            checkAlertsAsync(agentRollupId, agentRollupDisplay, endTime,
                    aggregateAndGaugeAndHeartbeatAlertConfigs);
        }
    }

    private boolean isCurrentlyDisabled(String agentRollupId) throws Exception {
        Long disabledUntilTime =
                alertingDisabledDao.getAlertingDisabledUntilTime(agentRollupId);
        return disabledUntilTime != null && disabledUntilTime > clock.currentTimeMillis();
    }

    private void checkAlertsAsync(String agentRollupId, String agentRollupDisplay, long endTime,
            List alertConfigs) {
        if (closed) {
            return;
        }
        workerExecutor.execute(() -> {
            for (AlertConfig alertConfig : alertConfigs) {
                try {
                    checkAlert(agentRollupId, agentRollupDisplay, endTime, alertConfig);
                } catch (InterruptedException e) {
                    // probably shutdown requested (see close method above)
                    logger.debug(e.getMessage(), e);
                    return;
                } catch (Throwable t) {
                    logger.error("{} - {}", agentRollupId, t.getMessage(), t);
                }
            }
        });
    }

    private void checkAlert(String agentRollupId, String agentDisplay, long endTime,
            AlertConfig alertConfig) throws Exception {
        AlertCondition alertCondition = alertConfig.getCondition();
        switch (alertCondition.getValCase()) {
            case METRIC_CONDITION:
                alertingService.checkMetricAlert(
                        configRepository.getCentralAdminGeneralConfig().centralDisplayName(),
                        agentRollupId, agentDisplay, alertConfig,
                        alertCondition.getMetricCondition(), endTime);
                break;
            case HEARTBEAT_CONDITION:
                if (stopwatch.elapsed(MINUTES) >= 4) {
                    // give agents plenty of time to re-connect after central start-up, needs to be
                    // at least enough time for grpc max reconnect backoff which is 2 minutes
                    // +/- 20% jitter (see io.grpc.internal.ExponentialBackoffPolicy) but better to
                    // give a bit extra (4 minutes above) to avoid false heartbeat alert
                    heartbeatAlertingService.checkHeartbeatAlert(agentRollupId, agentDisplay,
                            alertConfig, alertCondition.getHeartbeatCondition(), endTime);
                }
                break;
            default:
                throw new IllegalStateException(
                        "Unexpected alert condition: " + alertCondition.getValCase().name());
        }
    }

    private static boolean isAggregateCondition(AlertCondition alertCondition) {
        if (alertCondition.getValCase() != AlertCondition.ValCase.METRIC_CONDITION) {
            return false;
        }
        String metric = alertCondition.getMetricCondition().getMetric();
        return metric.startsWith("transaction:") || metric.startsWith("error:");
    }

    private static boolean isGaugeCondition(AlertCondition alertCondition) {
        return alertCondition.getValCase() == AlertCondition.ValCase.METRIC_CONDITION
                && alertCondition.getMetricCondition().getMetric().startsWith("gauge:");
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy