All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.pulsar.client.impl.AutoClusterFailover Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.pulsar.client.impl;

import static org.apache.pulsar.common.util.Runnables.catchingAndLoggingThrowables;
import org.apache.pulsar.shade.com.google.common.base.Strings;
import java.io.IOException;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import lombok.Data;
import lombok.NonNull;
import lombok.extern.slf4j.Slf4j;
import org.apache.pulsar.client.api.Authentication;
import org.apache.pulsar.client.api.AutoClusterFailoverBuilder;
import org.apache.pulsar.client.api.PulsarClient;
import org.apache.pulsar.client.api.ServiceUrlProvider;
import org.apache.pulsar.client.impl.conf.ClientConfigurationData;
import org.apache.pulsar.client.util.ExecutorProvider;

@Slf4j
@Data
public class AutoClusterFailover implements ServiceUrlProvider {
    private PulsarClientImpl pulsarClient;
    private volatile String currentPulsarServiceUrl;
    private final String primary;
    private final List secondary;
    private final AutoClusterFailoverBuilder.FailoverPolicy failoverPolicy;
    private Authentication primaryAuthentication;
    private final Map secondaryAuthentications;
    private String primaryTlsTrustCertsFilePath;
    private final Map secondaryTlsTrustCertsFilePaths;
    private String primaryTlsTrustStorePath;
    private Map secondaryTlsTrustStorePaths;
    private String primaryTlsTrustStorePassword;
    private Map secondaryTlsTrustStorePasswords;
    private final long failoverDelayNs;
    private final long switchBackDelayNs;
    private final ScheduledExecutorService executor;
    private volatile long recoverTimestamp;
    private volatile long failedTimestamp;
    private final long intervalMs;
    private static final int TIMEOUT = 30_000;
    private final PulsarServiceNameResolver resolver;

    private AutoClusterFailover(AutoClusterFailoverBuilderImpl builder) {
        this.primary = builder.primary;
        this.secondary = builder.secondary;
        this.failoverPolicy = builder.failoverPolicy;
        this.secondaryAuthentications = builder.secondaryAuthentications;
        this.secondaryTlsTrustCertsFilePaths = builder.secondaryTlsTrustCertsFilePaths;
        this.secondaryTlsTrustStorePaths = builder.secondaryTlsTrustStorePaths;
        this.secondaryTlsTrustStorePasswords = builder.secondaryTlsTrustStorePasswords;
        this.failoverDelayNs = builder.failoverDelayNs;
        this.switchBackDelayNs = builder.switchBackDelayNs;
        this.currentPulsarServiceUrl = builder.primary;
        this.recoverTimestamp = -1;
        this.failedTimestamp = -1;
        this.intervalMs = builder.checkIntervalMs;
        this.resolver = new PulsarServiceNameResolver();
        this.executor = Executors.newSingleThreadScheduledExecutor(
                new ExecutorProvider.ExtendedThreadFactory("pulsar-service-provider"));
    }

    @Override
    public void initialize(PulsarClient client) {
        this.pulsarClient = (PulsarClientImpl) client;
        ClientConfigurationData config = pulsarClient.getConfiguration();
        if (config != null) {
            this.primaryAuthentication = config.getAuthentication();
            this.primaryTlsTrustCertsFilePath = config.getTlsTrustCertsFilePath();
            this.primaryTlsTrustStorePath = config.getTlsTrustStorePath();
            this.primaryTlsTrustStorePassword = config.getTlsTrustStorePassword();
        }

        // start to probe primary cluster active or not
        this.executor.scheduleAtFixedRate(catchingAndLoggingThrowables(() -> {
            if (currentPulsarServiceUrl.equals(primary)) {
                // current service url is primary, probe whether it is down
                probeAndUpdateServiceUrl(secondary, secondaryAuthentications, secondaryTlsTrustCertsFilePaths,
                        secondaryTlsTrustStorePaths, secondaryTlsTrustStorePasswords);
            } else {
                // current service url is secondary, probe whether it is down
                probeAndUpdateServiceUrl(primary, primaryAuthentication, primaryTlsTrustCertsFilePath,
                        primaryTlsTrustStorePath, primaryTlsTrustStorePassword);
                // secondary cluster is up, check whether need to switch back to primary or not
                if (!currentPulsarServiceUrl.equals(primary)) {
                    probeAndCheckSwitchBack(primary, primaryAuthentication, primaryTlsTrustCertsFilePath,
                            primaryTlsTrustStorePath, primaryTlsTrustStorePassword);
                }
            }
        }), intervalMs, intervalMs, TimeUnit.MILLISECONDS);

    }

    @Override
    public String getServiceUrl() {
        return this.currentPulsarServiceUrl;
    }

    @Override
    public void close() {
        this.executor.shutdown();
    }

    boolean probeAvailable(String url) {
        try {
            resolver.updateServiceUrl(url);
            InetSocketAddress endpoint = resolver.resolveHost();
            Socket socket = new Socket();
            socket.connect(new InetSocketAddress(endpoint.getHostName(), endpoint.getPort()), TIMEOUT);
            socket.close();
            return true;
        } catch (Exception e) {
            log.warn("Failed to probe available, url: {}", url, e);
            return false;
        }
    }

    private static long nanosToMillis(long nanos) {
        return Math.max(0L, Math.round(nanos / 1_000_000.0d));
    }

    private void updateServiceUrl(String target,
                                  Authentication authentication,
                                  String tlsTrustCertsFilePath,
                                  String tlsTrustStorePath,
                                  String tlsTrustStorePassword) {
        try {
            if (!Strings.isNullOrEmpty(tlsTrustCertsFilePath)) {
                pulsarClient.updateTlsTrustCertsFilePath(tlsTrustCertsFilePath);
            }

            if (authentication != null) {
                pulsarClient.updateAuthentication(authentication);
            }

            if (!Strings.isNullOrEmpty(tlsTrustStorePath)) {
                pulsarClient.updateTlsTrustStorePathAndPassword(tlsTrustStorePath, tlsTrustStorePassword);
            }

            pulsarClient.updateServiceUrl(target);
            pulsarClient.reloadLookUp();
            currentPulsarServiceUrl = target;
        } catch (IOException e) {
            log.error("Current Pulsar service is {}, "
                    + "failed to switch back to {} ", currentPulsarServiceUrl, target, e);
        }
    }

    private void probeAndUpdateServiceUrl(List targetServiceUrls,
                                          Map authentications,
                                          Map tlsTrustCertsFilePaths,
                                          Map tlsTrustStorePaths,
                                          Map tlsTrustStorePasswords) {
        if (probeAvailable(currentPulsarServiceUrl)) {
            failedTimestamp = -1;
            return;
        }

        long currentTimestamp = System.nanoTime();
        if (failedTimestamp == -1) {
            failedTimestamp = currentTimestamp;
        } else if (currentTimestamp - failedTimestamp >= failoverDelayNs) {
            for (String targetServiceUrl : targetServiceUrls) {
                if (probeAvailable(targetServiceUrl)) {
                    log.info("Current Pulsar service is {}, it has been down for {} ms, "
                                    + "switch to the service {}. The current service down at {}",
                            currentPulsarServiceUrl, nanosToMillis(currentTimestamp - failedTimestamp),
                            targetServiceUrl, failedTimestamp);
                    updateServiceUrl(targetServiceUrl,
                            authentications != null ? authentications.get(targetServiceUrl) : null,
                            tlsTrustCertsFilePaths != null ? tlsTrustCertsFilePaths.get(targetServiceUrl) : null,
                            tlsTrustStorePaths != null ? tlsTrustStorePaths.get(targetServiceUrl) : null,
                            tlsTrustStorePasswords != null ? tlsTrustStorePasswords.get(targetServiceUrl) : null);
                    failedTimestamp = -1;
                    break;
                } else {
                    log.warn("Current Pulsar service is {}, it has been down for {} ms. "
                                    + "Failed to switch to service {}, "
                                    + "because it is not available, continue to probe next pulsar service.",
                        currentPulsarServiceUrl, nanosToMillis(currentTimestamp - failedTimestamp), targetServiceUrl);
                }
            }
        }
    }

    private void probeAndUpdateServiceUrl(String targetServiceUrl,
                                          Authentication authentication,
                                          String tlsTrustCertsFilePath,
                                          String tlsTrustStorePath,
                                          String tlsTrustStorePassword) {
        if (probeAvailable(currentPulsarServiceUrl)) {
            failedTimestamp = -1;
            return;
        }

        long currentTimestamp = System.nanoTime();
        if (failedTimestamp == -1) {
            failedTimestamp = currentTimestamp;
        } else if (currentTimestamp - failedTimestamp >= failoverDelayNs) {
            if (probeAvailable(targetServiceUrl)) {
                log.info("Current Pulsar service is {}, it has been down for {} ms, "
                                + "switch to the service {}. The current service down at {}",
                        currentPulsarServiceUrl, nanosToMillis(currentTimestamp - failedTimestamp),
                        targetServiceUrl, failedTimestamp);
                updateServiceUrl(targetServiceUrl, authentication, tlsTrustCertsFilePath,
                        tlsTrustStorePath, tlsTrustStorePassword);
                failedTimestamp = -1;
            } else {
                log.error("Current Pulsar service is {}, it has been down for {} ms. "
                                + "Failed to switch to service {}, "
                                + "because it is not available",
                        currentPulsarServiceUrl, nanosToMillis(currentTimestamp - failedTimestamp),
                        targetServiceUrl);
            }
        }
    }

    private void probeAndCheckSwitchBack(String target,
                                         Authentication authentication,
                                         String tlsTrustCertsFilePath,
                                         String tlsTrustStorePath,
                                         String tlsTrustStorePassword) {
        long currentTimestamp = System.nanoTime();
        if (!probeAvailable(target)) {
            recoverTimestamp = -1;
            return;
        }

        if (recoverTimestamp == -1) {
            recoverTimestamp = currentTimestamp;
        } else if (currentTimestamp - recoverTimestamp >= switchBackDelayNs) {
            log.info("Current Pulsar service is secondary: {}, "
                            + "the primary service: {} has been recover for {} ms, "
                            + "switch back to the primary service",
                    currentPulsarServiceUrl, target, nanosToMillis(currentTimestamp - recoverTimestamp));
            updateServiceUrl(target, authentication, tlsTrustCertsFilePath, tlsTrustStorePath, tlsTrustStorePassword);
            recoverTimestamp = -1;
        }
    }

    public static class AutoClusterFailoverBuilderImpl implements AutoClusterFailoverBuilder {
        private String primary;
        private List secondary;
        private Map secondaryAuthentications = null;
        private Map secondaryTlsTrustCertsFilePaths = null;
        private Map secondaryTlsTrustStorePaths = null;
        private Map secondaryTlsTrustStorePasswords = null;
        private FailoverPolicy failoverPolicy = FailoverPolicy.ORDER;
        private long failoverDelayNs;
        private long switchBackDelayNs;
        private long checkIntervalMs = 30_000;

        @Override
        public AutoClusterFailoverBuilder primary(@NonNull String primary) {
            this.primary = primary;
            return this;
        }

        @Override
        public AutoClusterFailoverBuilder secondary(@NonNull List secondary) {
            this.secondary = secondary;
            return this;
        }

        @Override
        public AutoClusterFailoverBuilder failoverPolicy(@NonNull FailoverPolicy policy) {
            this.failoverPolicy = policy;
            return this;
        }

        @Override
        public AutoClusterFailoverBuilder secondaryAuthentication(Map authentication) {
            this.secondaryAuthentications = authentication;
            return this;
        }

        @Override
        public AutoClusterFailoverBuilder secondaryTlsTrustCertsFilePath(Map tlsTrustCertsFilePath) {
            this.secondaryTlsTrustCertsFilePaths = tlsTrustCertsFilePath;
            return this;
        }

        @Override
        public AutoClusterFailoverBuilder secondaryTlsTrustStorePath(Map tlsTrustStorePath) {
            this.secondaryTlsTrustStorePaths = tlsTrustStorePath;
            return this;
        }

        @Override
        public AutoClusterFailoverBuilder secondaryTlsTrustStorePassword(Map tlsTrustStorePassword) {
            this.secondaryTlsTrustStorePasswords = tlsTrustStorePassword;
            return this;
        }

        @Override
        public AutoClusterFailoverBuilder failoverDelay(long failoverDelay, TimeUnit timeUnit) {
            this.failoverDelayNs = timeUnit.toNanos(failoverDelay);
            return this;
        }

        @Override
        public AutoClusterFailoverBuilder switchBackDelay(long switchBackDelay, TimeUnit timeUnit) {
            this.switchBackDelayNs = timeUnit.toNanos(switchBackDelay);
            return this;
        }

        @Override
        public AutoClusterFailoverBuilder checkInterval(long interval, TimeUnit timeUnit) {
            this.checkIntervalMs = timeUnit.toMillis(interval);
            return this;
        }

        @Override
        public ServiceUrlProvider build() {
            Objects.requireNonNull(primary, "primary service url shouldn't be null");
            checkArgument(secondary != null && secondary.size() > 0,
                    "secondary cluster service url shouldn't be null and should set at least one");
            checkArgument(failoverDelayNs > 0, "failoverDelay should > 0");
            checkArgument(switchBackDelayNs > 0, "switchBackDelay should > 0");
            checkArgument(checkIntervalMs > 0, "checkInterval should > 0");
            int secondarySize = secondary.size();

            checkArgument(secondaryAuthentications == null
                    || secondaryAuthentications.size() == secondarySize,
                    "secondaryAuthentication should be null or size equal with secondary url size");
            checkArgument(secondaryTlsTrustCertsFilePaths == null
                            || secondaryTlsTrustCertsFilePaths.size() == secondarySize,
                    "secondaryTlsTrustCertsFilePath should be null or size equal with secondary url size");
            checkArgument(secondaryTlsTrustStorePaths == null
                            || secondaryTlsTrustStorePaths.size() == secondarySize,
                    "secondaryTlsTrustStorePath should be null or size equal with secondary url size");
            checkArgument(secondaryTlsTrustStorePasswords == null
                            || secondaryTlsTrustStorePasswords.size() == secondarySize,
                    "secondaryTlsTrustStorePassword should be null or size equal with secondary url size");

            return new AutoClusterFailover(this);
        }

        public static void checkArgument(boolean expression, @NonNull Object errorMessage) {
            if (!expression) {
                throw new IllegalArgumentException(String.valueOf(errorMessage));
            }
        }
    }

    public static AutoClusterFailoverBuilder builder() {
        return new AutoClusterFailoverBuilderImpl();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy