All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.graylog2.bootstrap.preflight.GraylogCertificateProvisioningPeriodical Maven / Gradle / Ivy

There is a newer version: 6.1.4
Show newest version
/*
 * Copyright (C) 2020 Graylog, Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the Server Side Public License, version 1,
 * as published by MongoDB, Inc.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * Server Side Public License for more details.
 *
 * You should have received a copy of the Server Side Public License
 * along with this program. If not, see
 * .
 */
package org.graylog2.bootstrap.preflight;

import com.github.rholder.retry.Attempt;
import com.github.rholder.retry.RetryException;
import com.github.rholder.retry.RetryListener;
import com.github.rholder.retry.RetryerBuilder;
import com.github.rholder.retry.StopStrategies;
import com.github.rholder.retry.WaitStrategies;
import com.google.common.base.Suppliers;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import okhttp3.OkHttpClient;
import okhttp3.Request;
import okhttp3.Response;
import org.graylog.security.certutil.CaConfiguration;
import org.graylog.security.certutil.CaService;
import org.graylog.security.certutil.ca.exceptions.KeyStoreStorageException;
import org.graylog.security.certutil.cert.CertificateChain;
import org.graylog.security.certutil.cert.storage.CertChainMongoStorage;
import org.graylog.security.certutil.cert.storage.CertChainStorage;
import org.graylog.security.certutil.csr.CsrSigner;
import org.graylog.security.certutil.csr.storage.CsrMongoStorage;
import org.graylog2.Configuration;
import org.graylog2.cluster.NodeService;
import org.graylog2.cluster.preflight.DataNodeProvisioningConfig;
import org.graylog2.cluster.preflight.DataNodeProvisioningService;
import org.graylog2.notifications.Notification;
import org.graylog2.notifications.NotificationService;
import org.graylog2.plugin.certificates.RenewalPolicy;
import org.graylog2.plugin.cluster.ClusterConfigService;
import org.graylog2.plugin.periodical.Periodical;
import org.graylog2.security.CustomCAX509TrustManager;
import org.graylog2.security.IndexerJwtAuthTokenProvider;
import org.jetbrains.annotations.NotNull;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.inject.Inject;
import javax.inject.Named;
import javax.inject.Singleton;
import javax.net.ssl.SSLContext;
import javax.net.ssl.TrustManager;
import javax.net.ssl.X509TrustManager;
import java.security.KeyManagementException;
import java.security.KeyStore;
import java.security.KeyStoreException;
import java.security.NoSuchAlgorithmException;
import java.security.PrivateKey;
import java.security.SecureRandom;
import java.security.UnrecoverableKeyException;
import java.security.cert.X509Certificate;
import java.time.Duration;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import java.util.function.Supplier;
import java.util.stream.Collectors;

import static org.graylog.security.certutil.CertConstants.CA_KEY_ALIAS;

@Singleton
public class GraylogCertificateProvisioningPeriodical extends Periodical {
    private static final Logger LOG = LoggerFactory.getLogger(GraylogCertificateProvisioningPeriodical.class);
    private static final int THREADPOOL_THREADS = 5;
    private static final int CONNECTION_ATTEMPTS = 40;
    private static final int WAIT_BETWEEN_CONNECTION_ATTEMPTS = 3;
    private static final Duration DELAY_BEFORE_SHOWING_EXCEPTIONS = Duration.ofMinutes(1);
    private static final String ERROR_MESSAGE_PREFIX = "Error trying to connect to data node ";

    private final DataNodeProvisioningService dataNodeProvisioningService;
    private final NodeService nodeService;

    private final CaConfiguration configuration;
    private final CsrMongoStorage csrStorage;
    private final CertChainStorage certMongoStorage;
    private final CaService caService;
    private final CsrSigner csrSigner;
    private final ClusterConfigService clusterConfigService;
    private final String passwordSecret;
    private final Supplier okHttpClient;
    private final PreflightConfigService preflightConfigService;
    private final IndexerJwtAuthTokenProvider indexerJwtAuthTokenProvider;
    private final NotificationService notificationService;
    private final ExecutorService executor;

    @Inject
    public GraylogCertificateProvisioningPeriodical(final DataNodeProvisioningService dataNodeProvisioningService,
                                                    final CsrMongoStorage csrStorage,
                                                    final CertChainMongoStorage certMongoStorage,
                                                    final CaService caService,
                                                    final Configuration configuration,
                                                    final NodeService nodeService,
                                                    final CsrSigner csrSigner,
                                                    final ClusterConfigService clusterConfigService,
                                                    final @Named("password_secret") String passwordSecret,
                                                    final IndexerJwtAuthTokenProvider indexerJwtAuthTokenProvider,
                                                    final PreflightConfigService preflightConfigService,
                                                    final NotificationService notificationService,
                                                    final CustomCAX509TrustManager trustManager) {
        this.dataNodeProvisioningService = dataNodeProvisioningService;
        this.csrStorage = csrStorage;
        this.certMongoStorage = certMongoStorage;
        this.caService = caService;
        this.passwordSecret = passwordSecret;
        this.configuration = configuration;
        this.nodeService = nodeService;
        this.csrSigner = csrSigner;
        this.clusterConfigService = clusterConfigService;
        this.preflightConfigService = preflightConfigService;
        this.indexerJwtAuthTokenProvider = indexerJwtAuthTokenProvider;
        this.notificationService = notificationService;
        this.executor = Executors.newFixedThreadPool(THREADPOOL_THREADS, new ThreadFactoryBuilder().setNameFormat("provisioning-connectivity-check-task").build());
        this.okHttpClient = Suppliers.memoize(() -> buildConnectivityCheckOkHttpClient(trustManager));
    }

    // building a httpclient to check the connectivity to OpenSearch - TODO: maybe replace it with a VersionProbe already?
    private static OkHttpClient buildConnectivityCheckOkHttpClient(final X509TrustManager trustManager) {
        try {
            final var clientBuilder = new OkHttpClient.Builder();
            final var sslContext = SSLContext.getInstance("TLS");
            sslContext.init(null, new TrustManager[]{trustManager}, new SecureRandom());
            clientBuilder.sslSocketFactory(sslContext.getSocketFactory(), trustManager);
            return clientBuilder.build();
        } catch (NoSuchAlgorithmException | KeyManagementException ex) {
            LOG.error("Could not set Graylog CA trust manager: {}", ex.getMessage(), ex);
            throw new RuntimeException(ex);
        }
    }

    private RenewalPolicy getRenewalPolicy() {
        return this.clusterConfigService.get(RenewalPolicy.class);
    }

    @Override
    public void doRun() {
        LOG.debug("checking if there are configuration steps to take care of");

        try {
            // only load nodes that are in a state that need sth done
            final var nodes = dataNodeProvisioningService.findAllNodesThatNeedAttention();
            if(!nodes.isEmpty()) {

                final var password = configuration.configuredCaExists()
                        ? configuration.getCaPassword().toCharArray()
                        : passwordSecret.toCharArray();
                final Optional optKey = caService.loadKeyStore();
                if (optKey.isEmpty()) {
                    LOG.debug("No keystore available.");
                    return;
                }

                final var renewalPolicy = getRenewalPolicy();
                if (renewalPolicy == null) {
                    LOG.debug("No renewal policy available.");
                    return;
                }

                var nodesByState = nodes.stream().collect(Collectors.groupingBy(node -> Optional.ofNullable(node.state())
                        .orElse(DataNodeProvisioningConfig.State.UNCONFIGURED)));

                // if we're running in post-preflight and new datanodes arrive, they should configure themselves automatically
                var cfg = preflightConfigService.getPreflightConfigResult();
                if (cfg.equals(PreflightConfigResult.FINISHED)) {
                    var unconfiguredNodes = nodesByState.getOrDefault(DataNodeProvisioningConfig.State.UNCONFIGURED, List.of());
                    if (renewalPolicy.mode().equals(RenewalPolicy.Mode.AUTOMATIC)) {
                        unconfiguredNodes.forEach(c -> dataNodeProvisioningService.save(c.asConfigured()));
                    } else {
                        var hasUnconfiguredNodes = !unconfiguredNodes.isEmpty();
                        if (hasUnconfiguredNodes) {
                            var notification = notificationService.buildNow()
                                    .addType(Notification.Type.DATA_NODE_NEEDS_PROVISIONING)
                                    .addSeverity(Notification.Severity.URGENT);
                            notificationService.publishIfFirst(notification);
                        } else {
                            notificationService.fixed(Notification.Type.DATA_NODE_NEEDS_PROVISIONING);
                        }
                    }
                }

                final var caKeystore = optKey.get();
                final var nodesWithCSR = nodesByState.getOrDefault(DataNodeProvisioningConfig.State.CSR, List.of());
                final var hasNodesWithCSR = !nodesWithCSR.isEmpty();
                if (hasNodesWithCSR) {
                    var caPrivateKey = (PrivateKey) caKeystore.getKey(CA_KEY_ALIAS, password);
                    var caCertificate = (X509Certificate) caKeystore.getCertificate(CA_KEY_ALIAS);
                    nodesWithCSR.forEach(c -> {
                        try {
                            var csr = csrStorage.readCsr(c.nodeId());
                            if (csr.isEmpty()) {
                                LOG.error("Node in CSR state, but no CSR present : " + c.nodeId());
                                dataNodeProvisioningService.save(c.asError("Node in CSR state, but no CSR present"));
                            } else {
                                var cert = csrSigner.sign(caPrivateKey, caCertificate, csr.get(), renewalPolicy);
                                final List caCertificates = List.of(caCertificate);
                                certMongoStorage.writeCertChain(new CertificateChain(cert, caCertificates), c.nodeId());
                            }
                        } catch (Exception e) {
                            LOG.error("Could not sign CSR: " + e.getMessage(), e);
                            dataNodeProvisioningService.save(c.asError(e.getMessage()));
                        }
                    });
                }

                nodesByState.getOrDefault(DataNodeProvisioningConfig.State.STORED, List.of())
                        .forEach(c -> {
                            dataNodeProvisioningService.save(c.asConnecting());
                            executor.submit(() -> checkConnectivity(c));
                        });
            }
        } catch (KeyStoreException | NoSuchAlgorithmException | UnrecoverableKeyException |
                 KeyStoreStorageException e) {
            throw new RuntimeException(e);
        }
    }

    private void checkConnectivity(final DataNodeProvisioningConfig config) {
        LOG.info("Starting connectivity check with node {}, silencing error messages for {} seconds.", config.nodeId(), DELAY_BEFORE_SHOWING_EXCEPTIONS.getSeconds());
        final var nodeId = config.nodeId();
        final var retryer = RetryerBuilder.newBuilder()
                .withWaitStrategy(WaitStrategies.fixedWait(WAIT_BETWEEN_CONNECTION_ATTEMPTS, TimeUnit.SECONDS))
                .withStopStrategy(StopStrategies.stopAfterAttempt(CONNECTION_ATTEMPTS))
                .withRetryListener(new RetryListener() {
                    @Override
                    public  void onRetry(Attempt attempt) {
                        if (attempt.getDelaySinceFirstAttempt() > DELAY_BEFORE_SHOWING_EXCEPTIONS.toMillis()) {
                            if (attempt.hasException()) {
                                var e = attempt.getExceptionCause();
                                LOG.warn(ERROR_MESSAGE_PREFIX + " {}: {}, retrying (attempt #{})", config.nodeId(), e.getMessage(), attempt.getAttemptNumber());
                            } else {
                                LOG.warn(ERROR_MESSAGE_PREFIX + " {}, retrying (attempt #{})", config.nodeId(), attempt.getAttemptNumber());
                            }
                        }
                    }
                })
                .retryIfResult(response -> !response.isSuccessful())
                .retryIfException()
                .build();
        final Callable callable = () -> {
            final var node = nodeService.byNodeId(nodeId);
            final var request = new Request.Builder().url(node.getTransportAddress()).build();
            final var builder = okHttpClient.get().newBuilder()
                    .authenticator((route, response) -> response.request()
                            .newBuilder()
                            .header("Authorization", indexerJwtAuthTokenProvider.get())
                            .build());
            final var call = builder.build().newCall(request);
            return call.execute();
        };
        try (Response response = retryer.call(callable)) {
            var success = response.isSuccessful();
            if (success) {
                dataNodeProvisioningService.save(config.asConnected());
                LOG.info("Connectivity check successful with node {}", nodeId);
            } else {
                var errorMessage = response.message();
                dataNodeProvisioningService.save(config.asError("Data Node not reachable: " + errorMessage));
            }
        } catch (ExecutionException e) {
            LOG.error(ERROR_MESSAGE_PREFIX + " {}: {}", config.nodeId(), e.getMessage());
            dataNodeProvisioningService.save(config.asError(e.getMessage()));
        } catch (RetryException e) {
            LOG.error(ERROR_MESSAGE_PREFIX + " {}: {}", config.nodeId(), e.getMessage());
            var exceptionCause = Optional.ofNullable(e.getLastFailedAttempt().getExceptionCause()).orElse(e);
            var errorMsg = exceptionCause.getMessage();
            dataNodeProvisioningService.save(config.asError(errorMsg));
        }
    }

    @NotNull
    @Override
    protected Logger getLogger() {
        return LOG;
    }

    @Override
    public boolean runsForever() {
        return false;
    }

    @Override
    public boolean stopOnGracefulShutdown() {
        return true;
    }

    @Override
    public boolean leaderOnly() {
        return true;
    }

    @Override
    public boolean startOnThisNode() {
        return true;
    }

    @Override
    public boolean isDaemon() {
        return true;
    }

    @Override
    public int getInitialDelaySeconds() {
        return 2;
    }

    @Override
    public int getPeriodSeconds() {
        return 2;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy