All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.qpid.jms.provider.failover.FailoverProvider Maven / Gradle / Ivy

There is a newer version: 2.6.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.qpid.jms.provider.failover;

import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.ConcurrentModificationException;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.ScheduledThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReentrantReadWriteLock;

import org.apache.qpid.jms.message.JmsInboundMessageDispatch;
import org.apache.qpid.jms.message.JmsMessageFactory;
import org.apache.qpid.jms.message.JmsOutboundMessageDispatch;
import org.apache.qpid.jms.meta.JmsConnectionInfo;
import org.apache.qpid.jms.meta.JmsConsumerId;
import org.apache.qpid.jms.meta.JmsResource;
import org.apache.qpid.jms.meta.JmsSessionId;
import org.apache.qpid.jms.meta.JmsTransactionInfo;
import org.apache.qpid.jms.provider.AsyncResult;
import org.apache.qpid.jms.provider.DefaultProviderListener;
import org.apache.qpid.jms.provider.Provider;
import org.apache.qpid.jms.provider.ProviderConstants.ACK_TYPE;
import org.apache.qpid.jms.provider.ProviderException;
import org.apache.qpid.jms.provider.ProviderFactory;
import org.apache.qpid.jms.provider.ProviderFuture;
import org.apache.qpid.jms.provider.ProviderFutureFactory;
import org.apache.qpid.jms.provider.ProviderListener;
import org.apache.qpid.jms.provider.ProviderSynchronization;
import org.apache.qpid.jms.provider.WrappedAsyncResult;
import org.apache.qpid.jms.provider.exceptions.ProviderClosedException;
import org.apache.qpid.jms.provider.exceptions.ProviderConnectionRedirectedException;
import org.apache.qpid.jms.provider.exceptions.ProviderConnectionSecurityException;
import org.apache.qpid.jms.provider.exceptions.ProviderConnectionSecuritySaslException;
import org.apache.qpid.jms.provider.exceptions.ProviderExceptionSupport;
import org.apache.qpid.jms.provider.exceptions.ProviderFailedException;
import org.apache.qpid.jms.provider.exceptions.ProviderIOException;
import org.apache.qpid.jms.provider.exceptions.ProviderOperationTimedOutException;
import org.apache.qpid.jms.provider.exceptions.ProviderSendTimedOutException;
import org.apache.qpid.jms.provider.exceptions.ProviderTransactionRolledBackException;
import org.apache.qpid.jms.util.QpidJMSThreadFactory;
import org.apache.qpid.jms.util.ThreadPoolUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A Provider Facade that provides services for detection dropped Provider connections
 * and attempting to reconnect to a different remote peer.  Upon establishment of a new
 * connection the FailoverProvider will initiate state recovery of the active JMS
 * framework resources.
 */
public class FailoverProvider extends DefaultProviderListener implements Provider {

    private static final Logger LOG = LoggerFactory.getLogger(FailoverProvider.class);

    private static enum FailoverServerListAction {
        ADD, REPLACE, IGNORE
    }

    public static final int UNLIMITED = -1;

    private static final int UNDEFINED = -1;
    private static final int DISABLED = 0;
    private static final int MINIMUM_TIMEOUT = 1000;

    public static final int DEFAULT_MAX_RECONNECT_ATTEMPTS = UNLIMITED;
    public static final int DEFAULT_STARTUP_MAX_RECONNECT_ATTEMPTS = UNDEFINED;
    public static final long DEFAULT_INITIAL_RECONNECT_DELAY = 0;
    public static final long DEFAULT_RECONNECT_DELAY = 10;
    public static final long DEFAULT_MAX_RECONNECT_DELAY = TimeUnit.SECONDS.toMillis(30);
    public static final boolean DEFAULT_USE_RECONNECT_BACKOFF = true;
    public static final double DEFAULT_RECONNECT_BACKOFF_MULTIPLIER = 2.0d;
    public static final int DEFAULT_WARN_AFTER_RECONNECT_ATTEMPTS = 10;

    private ProviderListener listener;
    private Provider provider;
    private final FailoverUriPool uris;
    private ScheduledFuture requestTimeoutTask;

    private final ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
    private final ScheduledThreadPoolExecutor serializer;
    private final AtomicBoolean closed = new AtomicBoolean();
    private final AtomicBoolean failed = new AtomicBoolean();
    private final AtomicBoolean closingConnection = new AtomicBoolean(false);
    private final AtomicLong requestId = new AtomicLong();
    private final Map requests = Collections.synchronizedMap(new LinkedHashMap());
    private final DefaultProviderListener closedListener = new DefaultProviderListener();
    private final AtomicReference messageFactory = new AtomicReference();
    private final ProviderFutureFactory futureFactory;

    // Current state of connection / reconnection
    private final ReconnectControls reconnectControl = new ReconnectControls();
    private ProviderException failureCause;
    private volatile URI connectedURI;
    private volatile JmsConnectionInfo connectionInfo;

    // Timeout values configured via JmsConnectionInfo
    private long closeTimeout = JmsConnectionInfo.DEFAULT_CLOSE_TIMEOUT;
    private long sendTimeout = JmsConnectionInfo.DEFAULT_SEND_TIMEOUT;
    private long requestTimeout = JmsConnectionInfo.DEFAULT_REQUEST_TIMEOUT;

    // Configuration values.
    private long initialReconnectDelay = DEFAULT_INITIAL_RECONNECT_DELAY;
    private long reconnectDelay = DEFAULT_RECONNECT_DELAY;
    private long maxReconnectDelay = DEFAULT_MAX_RECONNECT_DELAY;
    private boolean useReconnectBackOff = DEFAULT_USE_RECONNECT_BACKOFF;
    private double reconnectBackOffMultiplier = DEFAULT_RECONNECT_BACKOFF_MULTIPLIER;
    private int maxReconnectAttempts = DEFAULT_MAX_RECONNECT_ATTEMPTS;
    private int startupMaxReconnectAttempts = DEFAULT_STARTUP_MAX_RECONNECT_ATTEMPTS;
    private int warnAfterReconnectAttempts = DEFAULT_WARN_AFTER_RECONNECT_ATTEMPTS;

    private FailoverServerListAction amqpOpenServerListAction = FailoverServerListAction.REPLACE;

    public FailoverProvider(List uris, Map nestedOptions, ProviderFutureFactory futureFactory) {
        this.uris = new FailoverUriPool(uris, nestedOptions);
        this.futureFactory = futureFactory;

        // All Connection attempts happen in this executor thread as well as handling
        // failed connection events and other maintenance work.
        serializer = new ScheduledThreadPoolExecutor(1, new QpidJMSThreadFactory("FailoverProvider: async work thread", true));
        serializer.setExecuteExistingDelayedTasksAfterShutdownPolicy(false);
        serializer.setContinueExistingPeriodicTasksAfterShutdownPolicy(false);
    }

    @Override
    public void connect(JmsConnectionInfo connectionInfo) throws ProviderException {
        checkClosed();
        this.connectionInfo = connectionInfo;
        LOG.debug("Initiating initial connection attempt task");
        triggerReconnectionAttempt();
    }

    @Override
    public void start() throws ProviderException, IllegalStateException {
        checkClosed();

        if (listener == null) {
            throw new IllegalStateException("No ProviderListener registered.");
        }
    }

    @Override
    public void close() {
        if (closed.compareAndSet(false, true)) {
            final ProviderFuture request = futureFactory.createFuture();

            serializer.execute(() -> {
                // At this point the closed flag is set and any threads running through this
                // provider will see it as being closed and react accordingly.  Any events
                // that fire from the active provider that is being closed will read a closed
                // state and not trigger new events into the executor. Any event that might be
                // sitting behind this one would also read the closed state and perform no
                // further work while we do a graceful shutdown of the executor service.
                lock.readLock().lock();
                try {
                    ProviderException error = failureCause != null ? failureCause : new ProviderClosedException("Connection closed");
                    final List pending = new ArrayList(requests.values());
                    for (FailoverRequest pendingRequest : pending) {
                        if (!pendingRequest.isComplete()) {
                            pendingRequest.onFailure(error);
                        }
                    }

                    if (requestTimeoutTask != null) {
                        requestTimeoutTask.cancel(false);
                    }

                    if (provider != null) {
                        provider.close();
                    }
                } catch (Exception e) {
                    LOG.warn("Error caught while closing Provider: ", e.getMessage());
                } finally {
                    lock.readLock().unlock();
                    request.onSuccess();
                }
            });

            try {
                if (getCloseTimeout() < 0) {
                    request.sync();
                } else {
                    request.sync(getCloseTimeout(), TimeUnit.MILLISECONDS);
                }
            } catch (ProviderException e) {
                LOG.warn("Error caught while closing Provider: {}", e.getMessage() != null ? e.getMessage() : "");
            } finally {
                ThreadPoolUtils.shutdownGraceful(serializer);
            }
        }
    }

    @Override
    public void create(final JmsResource resource, AsyncResult request) throws ProviderException {
        checkClosed();
        final FailoverRequest pending;
        if (resource instanceof JmsConnectionInfo) {
            pending = new CreateConnectionRequest(request) {
                @Override
                public void doTask(Provider provider) throws ProviderException {
                    JmsConnectionInfo connectionInfo = (JmsConnectionInfo) resource;

                    // Collect the timeouts we will handle in this provider.
                    closeTimeout = connectionInfo.getCloseTimeout();
                    sendTimeout = connectionInfo.getSendTimeout();
                    requestTimeout = connectionInfo.getRequestTimeout();

                    provider.create(resource, this);
                }

                @Override
                public String toString() {
                    return "create -> " + resource;
                }
            };
        } else {
            pending = new FailoverRequest(request, requestTimeout) {
                @Override
                public void doTask(Provider provider) throws ProviderException {
                    provider.create(resource, this);
                }

                @Override
                public boolean succeedsWhenOffline() {
                    if (resource instanceof JmsTransactionInfo) {
                        // Tag as in-doubt and let recovery on reconnect sort it out.
                        JmsTransactionInfo transactionInfo = (JmsTransactionInfo) resource;
                        transactionInfo.setInDoubt(true);

                        return true;
                    } else {
                        return false;
                    }
                }

                @Override
                public String toString() {
                    return "create -> " + resource;
                }
            };
        }

        pending.run();
    }

    @Override
    public void start(final JmsResource resource, final AsyncResult request) throws ProviderException {
        checkClosed();
        final FailoverRequest pending = new FailoverRequest(request, requestTimeout) {
            @Override
            public void doTask(Provider provider) throws ProviderException {
                provider.start(resource, this);
            }

            @Override
            public String toString() {
                return "start -> " + resource;
            }
        };

        pending.run();
    }

    @Override
    public void stop(final JmsResource resource, final AsyncResult request) throws ProviderException {
        checkClosed();
        final FailoverRequest pending = new FailoverRequest(request, requestTimeout) {
            @Override
            public void doTask(Provider provider) throws ProviderException {
                provider.stop(resource, this);
            }

            @Override
            public String toString() {
                return "stop -> " + resource;
            }
        };

        pending.run();
    }

    @Override
    public void destroy(final JmsResource resourceId, AsyncResult request) throws ProviderException {
        checkClosed();
        final FailoverRequest pending = new FailoverRequest(request, requestTimeout) {
            @Override
            public void doTask(Provider provider) throws ProviderException {
                if (resourceId instanceof JmsConnectionInfo) {
                   closingConnection.set(true);
                }
                provider.destroy(resourceId, this);
            }

            @Override
            public boolean succeedsWhenOffline() {
                // Allow this to succeed, resource won't get recreated on reconnect.
                return true;
            }

            @Override
            public String toString() {
                return "destroy -> " + resourceId;
            }
        };

        pending.run();
    }

    @Override
    public void send(final JmsOutboundMessageDispatch envelope, AsyncResult request) throws ProviderException {
        checkClosed();
        final FailoverRequest pending = new FailoverRequest(request, sendTimeout) {
            @Override
            public void doTask(Provider provider) throws ProviderException {
                provider.send(envelope, this);
            }

            @Override
            public String toString() {
                return "send -> " + envelope;
            }

            @Override
            public ProviderException createTimedOutException() {
                return new ProviderSendTimedOutException("Timed out waiting on " + this, envelope.getMessage());
            }
        };

        pending.run();
    }

    @Override
    public void acknowledge(final JmsSessionId sessionId, final ACK_TYPE ackType, AsyncResult request) throws ProviderException {
        checkClosed();
        final FailoverRequest pending = new FailoverRequest(request, requestTimeout) {
            @Override
            public void doTask(Provider provider) throws ProviderException {
                provider.acknowledge(sessionId, ackType, this);
            }

            @Override
            public boolean succeedsWhenOffline() {
                // Allow this to succeed, acknowledgement would be stale after reconnect.
                return true;
            }

            @Override
            public String toString() {
                return "session acknowledge -> " + sessionId;
            }
        };

        pending.run();
    }

    @Override
    public void acknowledge(final JmsInboundMessageDispatch envelope, final ACK_TYPE ackType, AsyncResult request) throws ProviderException {
        checkClosed();
        final FailoverRequest pending = new FailoverRequest(request, requestTimeout) {
            @Override
            public void doTask(Provider provider) throws ProviderException {
                provider.acknowledge(envelope, ackType, this);
            }

            @Override
            public boolean succeedsWhenOffline() {
                // Allow this to succeed, acknowledgement would be stale after reconnect.
                return true;
            }

            @Override
            public String toString() {
                return "message acknowledge -> " + envelope + " ackType: " + ackType;
            }
        };

        pending.run();
    }

    @Override
    public void commit(final JmsTransactionInfo transactionInfo, JmsTransactionInfo nextTransactionInfo, AsyncResult request) throws ProviderException {
        checkClosed();
        final FailoverRequest pending = new FailoverRequest(request, requestTimeout) {
            @Override
            public void doTask(Provider provider) throws ProviderException {
                provider.commit(transactionInfo, nextTransactionInfo, this);
            }

            @Override
            public boolean failureWhenOffline() {
                return true;
            }

            @Override
            public String toString() {
                return "TX commit -> " + transactionInfo.getId();
            }

            @Override
            protected ProviderException createOfflineFailureException(ProviderException error) {
                return new ProviderTransactionRolledBackException("Commit failed, connection offline: " + error.getMessage(), error);
            }
        };

        pending.run();
    }

    @Override
    public void rollback(final JmsTransactionInfo transactionInfo, JmsTransactionInfo nextTransactionInfo, AsyncResult request) throws ProviderException {
        checkClosed();
        final FailoverRequest pending = new FailoverRequest(request, requestTimeout) {
            @Override
            public void doTask(Provider provider) throws ProviderException {
                provider.rollback(transactionInfo, nextTransactionInfo, this);
            }

            @Override
            public boolean succeedsWhenOffline() {
                return true;
            }

            @Override
            public String toString() {
                return "TX rollback -> " + transactionInfo.getId();
            }
        };

        pending.run();
    }

    @Override
    public void recover(final JmsSessionId sessionId, final AsyncResult request) throws ProviderException {
        checkClosed();
        final FailoverRequest pending = new FailoverRequest(request, requestTimeout) {
            @Override
            public void doTask(Provider provider) throws ProviderException {
                provider.recover(sessionId, this);
            }

            @Override
            public boolean succeedsWhenOffline() {
                return true;
            }

            @Override
            public String toString() {
                return "recover -> " + sessionId;
            }
        };

        pending.run();
    }

    @Override
    public void unsubscribe(final String subscription, AsyncResult request) throws ProviderException {
        checkClosed();
        final FailoverRequest pending = new FailoverRequest(request, requestTimeout) {
            @Override
            public void doTask(Provider provider) throws ProviderException {
                provider.unsubscribe(subscription, this);
            }

            @Override
            public String toString() {
                return "unsubscribe -> " + subscription;
            }
        };

        pending.run();
    }

    @Override
    public void pull(final JmsConsumerId consumerId, final long timeout, final AsyncResult request) throws ProviderException {
        checkClosed();
        final FailoverRequest pending = new FailoverRequest(request) {
            @Override
            public void doTask(Provider provider) throws ProviderException {
                provider.pull(consumerId, timeout, this);
            }

            @Override
            public String toString() {
                return "message pull -> " + consumerId;
            }
        };

        pending.run();
    }

    @Override
    public JmsMessageFactory getMessageFactory() {
        return messageFactory.get();
    }

    //--------------- Connection Error and Recovery methods ------------------//

    /**
     * This method is always called from within the FailoverProvider's state lock.
     * 

* When a failure is encountered either from an outgoing request or from an error fired * from the underlying Provider instance this method is called to determine if a reconnect * is allowed and if so a new reconnect cycle is triggered on the serialization executor. * * @param provider * the Provider that was in use when the failure occurred. * @param cause * the error that triggered the failure of the provider. */ private void handleProviderFailure(final Provider provider, final ProviderException cause) { if (closingConnection.get() || closed.get() || failed.get()) { return; } serializer.execute(() -> { if (closingConnection.get() || closed.get() || failed.get()) { return; } lock.readLock().lock(); try { // It is possible that another failed request signaled an error for the same provider // and we already cleaned up the old failed provider and scheduled a reconnect that // has already succeeded, so we need to ensure that we don't kill a valid provider. if (provider == FailoverProvider.this.provider) { LOG.debug("handling Provider failure: {}", cause.getMessage()); LOG.trace("stack", cause); FailoverProvider.this.provider = null; provider.setProviderListener(closedListener); URI failedURI = provider.getRemoteURI(); try { provider.close(); } catch (Throwable error) { LOG.trace("Caught exception while closing failed provider: {}", error.getMessage()); } if (reconnectControl.isReconnectAllowed(cause)) { if (cause instanceof ProviderConnectionRedirectedException) { ProviderConnectionRedirectedException redirect = (ProviderConnectionRedirectedException) cause; try { uris.addFirst(redirect.getRedirectionURI()); } catch (Exception error) { LOG.warn("Could not construct redirection URI from remote provided information"); } } ProviderListener listener = this.listener; if (listener != null) { listener.onConnectionInterrupted(failedURI); } final List pending = new ArrayList(requests.values()); for (FailoverRequest request : pending) { request.whenOffline(cause); } // Start watching for request timeouts while we are offline, unless we already are. if (requestTimeoutTask == null) { long sweeperInterval = getRequestSweeperInterval(); if (sweeperInterval > 0) { LOG.trace("Request timeout monitoring enabled: interval = {}ms", sweeperInterval); requestTimeoutTask = serializer.scheduleWithFixedDelay( new FailoverRequestSweeper(), sweeperInterval, sweeperInterval, TimeUnit.MILLISECONDS); } } triggerReconnectionAttempt(); } else { failed.set(true); failureCause = cause; ProviderListener listener = this.listener; if (listener != null) { listener.onConnectionFailure(cause); } } } else { LOG.trace("Ignoring duplicate provider failed event for provider: {}", provider); } } finally { lock.readLock().unlock(); } }); } /** * Called from the serialization executor after a successful connection. *

* This method enqueues a new task that will attempt to recover connection state. * Once successfully recovered, normal operations will resume. If an error occurs * while attempting to recover the JMS framework state then a reconnect cycle is again * triggered on the connection thread. * * @param provider * The newly connect Provider instance that will become active. */ private void initializeNewConnection(final Provider provider) { serializer.execute(() -> { // Disallow other processing in the provider while we attempt to establish this // provider as the new one for recovery, any incoming work stops until we finish // and either recover or go back into a failed state. lock.writeLock().lock(); try { // In case a close is in play as we are reconnecting we close out the connected // provider instance and return here to allow any pending close operations to // finish now. if (closingConnection.get() || closed.get() || failed.get()) { try { provider.close(); } catch(Throwable ignore) { LOG.trace("Ingoring failure to close failed provider: {}", provider, ignore); } return; } FailoverProvider.this.provider = provider; provider.setProviderListener(FailoverProvider.this); connectedURI = provider.getRemoteURI(); if (reconnectControl.isRecoveryRequired()) { LOG.debug("Signalling connection recovery: {}", provider); // Stage 1: Allow listener to recover its resources try { listener.onConnectionRecovery(provider); } finally { // Stage 2: If the provider knows of others lets add them to the URI pool // even if something failed here we can learn of new hosts so we // always process the potential Open frame failover URI results. processAlternates(provider.getAlternateURIs()); } // Stage 3: Connection state recovered, get newly configured message factory. FailoverProvider.this.messageFactory.set(provider.getMessageFactory()); // Stage 4: Restart consumers, send pull commands, etc. listener.onConnectionRecovered(provider); // Stage 5: Let the client know that connection has restored. listener.onConnectionRestored(provider.getRemoteURI()); // Last step: Send pending actions. final List pending = new ArrayList(requests.values()); for (FailoverRequest request : pending) { if (!request.isComplete()) { request.run(); } } reconnectControl.connectionEstablished(); } else { processAlternates(provider.getAlternateURIs()); // Last step: Send pending actions. final List pending = new ArrayList(requests.values()); for (FailoverRequest request : pending) { if (!request.isComplete()) { request.run(); } } } // Cancel timeout processing since we are connected again. We waited until // now for the case where we are continually getting bounced from otherwise // live servers, we want the timeout to remain scheduled in that case so that // it doesn't keep getting rescheduled and never actually time anything out. if (requestTimeoutTask != null) { requestTimeoutTask.cancel(false); requestTimeoutTask = null; } } catch (Throwable error) { LOG.trace("Connection attempt:[{}] to: {} failed", reconnectControl.reconnectAttempts, provider.getRemoteURI()); handleProviderFailure(provider, ProviderExceptionSupport.createOrPassthroughFatal(error)); } finally { lock.writeLock().unlock(); } }); } /** * Called when the Provider was either first created or when a connection failure has * been reported. A reconnection attempt is executed by the serialization executor either * immediately or after a delay based on configuration and number of attempts that have * elapsed. If a new Provider is able to be created and connected then a recovery task * is scheduled with the serialization executor. If the connect attempt fails another * attempt is scheduled based on the configured delay settings until a max attempts * limit is hit, if one is set. */ private void triggerReconnectionAttempt() { if (closingConnection.get() || closed.get() || failed.get()) { return; } reconnectControl.scheduleReconnect(new Runnable() { @Override public void run() { if (provider != null || closingConnection.get() || closed.get() || failed.get()) { return; } ProviderException failure = null; Provider provider = null; long reconnectAttempts = reconnectControl.recordNextAttempt(); try { if (!uris.isEmpty()) { for (int i = 0; i < uris.size(); ++i) { URI target = uris.getNext(); if (target == null) { LOG.trace("Failover URI collection unexpectedly modified during connection attempt."); failure = ProviderExceptionSupport.createOrPassthroughFatal( new ConcurrentModificationException("Failover URIs changed unexpectedly")); continue; } try { LOG.debug("Connection attempt:[{}] to: {} in-progress", reconnectAttempts, target.getScheme() + "://" + target.getHost() + ":" + target.getPort()); provider = ProviderFactory.create(target, futureFactory); provider.connect(connectionInfo); initializeNewConnection(provider); return; } catch (Throwable e) { LOG.info("Connection attempt:[{}] to: {} failed", reconnectAttempts, target.getScheme() + "://" + target.getHost() + ":" + target.getPort()); failure = ProviderExceptionSupport.createOrPassthroughFatal(e); try { if (provider != null) { provider.close(); } } catch (Throwable ex) { } finally { provider = null; } if (reconnectControl.isStoppageCause(failure)) { LOG.trace("Stopping attempt due to type of failure"); break; } } } } else { LOG.debug("No remote URI available to connect to in failover list"); failure = new ProviderFailedException( "No remote URI available for reconnection during connection attempt: " + reconnectAttempts); } } catch (Throwable unknownFailure) { LOG.warn("Connection attempt:[{}] failed abnormally.", reconnectAttempts); failure = failure == null ? ProviderExceptionSupport.createOrPassthroughFatal(unknownFailure) : failure; } finally { if (provider == null) { LOG.trace("Connection attempt:[{}] failed error: {}", reconnectControl.reconnectAttempts, failure.getMessage()); if (!reconnectControl.isReconnectAllowed(failure)) { reportReconnectFailure(failure); } else { reconnectControl.scheduleReconnect(this); } } } } }); } /** * Called when the provider has tried to reconnect for the last time based on reconnection policy * configuration and we now consider this connection attempt to be failed. * * @param lastFailure the last failure encountered while trying to (re)connect. */ private void reportReconnectFailure(final ProviderException lastFailure) { serializer.execute(() -> { LOG.error("Failed to connect after: " + reconnectControl.reconnectAttempts + " attempt(s)"); if (failed.compareAndSet(false, true)) { if (lastFailure == null) { failureCause = new ProviderIOException( "Failed to connect after: " + reconnectControl.reconnectAttempts + " attempt(s)"); } else { failureCause = ProviderExceptionSupport.createOrPassthroughFatal(lastFailure); } if (listener != null) { listener.onConnectionFailure(failureCause); }; } }); } protected void checkClosed() throws ProviderException { if (closed.get()) { throw new ProviderClosedException("The Provider is already closed"); } } //--------------- DefaultProviderListener overrides ----------------------// @Override public void onInboundMessage(final JmsInboundMessageDispatch envelope) { if (closingConnection.get() || closed.get() || failed.get()) { return; } listener.onInboundMessage(envelope); } @Override public void onCompletedMessageSend(final JmsOutboundMessageDispatch envelope) { if (closingConnection.get() || closed.get() || failed.get()) { return; } listener.onCompletedMessageSend(envelope); } @Override public void onFailedMessageSend(final JmsOutboundMessageDispatch envelope, ProviderException cause) { if (closingConnection.get() || closed.get() || failed.get()) { return; } listener.onFailedMessageSend(envelope, cause); } @Override public void onConnectionFailure(final ProviderException ex) { if (closingConnection.get() || closed.get() || failed.get()) { return; } LOG.debug("Failover: the provider reports failure: {}", ex.getMessage()); handleProviderFailure(provider, ex); } @Override public void onResourceClosed(JmsResource resource, ProviderException cause) { if (closingConnection.get() || closed.get() || failed.get()) { return; } listener.onResourceClosed(resource, cause); } @Override public void onProviderException(final ProviderException ex) { if (closingConnection.get() || closed.get() || failed.get()) { return; } LOG.debug("Provider reports an async error: {}", ex.getMessage()); listener.onProviderException(ex); } //--------------- Processing for server-provided alternate URIs ---------// private void processAlternates(List alternates) { if (!alternates.isEmpty()) { List newRemotes = new ArrayList(alternates); LOG.debug("Processing alternates uris:{} with new set: {}", uris, newRemotes); switch (amqpOpenServerListAction) { case ADD: try { uris.addAll(alternates); } catch (Throwable err) { LOG.warn("Error while attempting to add discovered URIs: {}", alternates); } break; case REPLACE: // The current server is assumed not to be in the list of updated remote // as it is meant for the failover nodes. The pool will de-dup if it is. newRemotes.add(0, connectedURI); try { LOG.debug("Replacing uris:{} with new set: {}", uris, newRemotes); uris.replaceAll(newRemotes); } catch (Throwable err) { LOG.warn("Error while attempting to add discovered URIs: {}", alternates); } break; case IGNORE: // Do Nothing break; default: // Shouldn't get here, but do nothing if we do. break; } LOG.debug("Processing alternates done new uris:{}", uris); } } //--------------- URI update and rebalance methods -----------------------// public void add(final URI uri) { serializer.execute(() -> uris.add(uri)); } public void remove(final URI uri) { serializer.execute(() -> uris.remove(uri)); } //--------------- Property Getters and Setters ---------------------------// @Override public URI getRemoteURI() { Provider provider = this.provider; if (provider != null) { return provider.getRemoteURI(); } return null; } @Override public List getAlternateURIs() { Provider provider = this.provider; if (provider != null) { return provider.getAlternateURIs(); } else { return Collections.emptyList(); } }; @Override public void setProviderListener(ProviderListener listener) { this.listener = listener; } @Override public ProviderListener getProviderListener() { return listener; } public boolean isRandomize() { return uris.isRandomize(); } public void setRandomize(boolean value) { this.uris.setRandomize(value); } public long getInitialReconnectDelay() { return initialReconnectDelay; } public void setInitialReconnectDelay(long initialReconnectDelay) { this.initialReconnectDelay = initialReconnectDelay; } public long getReconnectDelay() { return reconnectDelay; } public void setReconnectDelay(long reconnectDealy) { this.reconnectDelay = reconnectDealy; } public long getMaxReconnectDelay() { return maxReconnectDelay; } public void setMaxReconnectDelay(long maxReconnectDelay) { this.maxReconnectDelay = maxReconnectDelay; } public int getMaxReconnectAttempts() { return maxReconnectAttempts; } public void setMaxReconnectAttempts(int maxReconnectAttempts) { this.maxReconnectAttempts = maxReconnectAttempts; } public int getStartupMaxReconnectAttempts() { return startupMaxReconnectAttempts; } public void setStartupMaxReconnectAttempts(int startupMaxReconnectAttempts) { this.startupMaxReconnectAttempts = startupMaxReconnectAttempts; } /** * Gets the current setting controlling how many Connect / Reconnect attempts must occur * before a warn message is logged. A value of {@literal <= 0} indicates that there will be * no warn message logged regardless of how many reconnect attempts occur. * * @return the current number of connection attempts before warn logging is triggered. */ public int getWarnAfterReconnectAttempts() { return warnAfterReconnectAttempts; } /** * Sets the number of Connect / Reconnect attempts that must occur before a warn message * is logged indicating that the transport is not connected. This can be useful when the * client is running inside some container or service as it gives an indication of some * problem with the client connection that might not otherwise be visible. To disable the * log messages this value should be set to a value {@literal <= 0} * * @param warnAfterReconnectAttempts * The number of failed connection attempts that must happen before a warning is logged. */ public void setWarnAfterReconnectAttempts(int warnAfterReconnectAttempts) { this.warnAfterReconnectAttempts = warnAfterReconnectAttempts; } public double getReconnectBackOffMultiplier() { return reconnectBackOffMultiplier; } public void setReconnectBackOffMultiplier(double reconnectBackOffMultiplier) { this.reconnectBackOffMultiplier = reconnectBackOffMultiplier; } public boolean isUseReconnectBackOff() { return useReconnectBackOff; } public void setUseReconnectBackOff(boolean useReconnectBackOff) { this.useReconnectBackOff = useReconnectBackOff; } public long getCloseTimeout() { return this.closeTimeout; } public long getSendTimeout() { return this.sendTimeout; } public long getRequestTimeout() { return this.requestTimeout; } public String getAmqpOpenServerListAction() { return amqpOpenServerListAction.toString(); } public void setAmqpOpenServerListAction(String amqpOpenServerListAction) { this.amqpOpenServerListAction = FailoverServerListAction.valueOf(amqpOpenServerListAction.toUpperCase(Locale.ENGLISH)); } public Map getNestedOptions() { return uris.getNestedOptions(); } @Override public ProviderFuture newProviderFuture() { return futureFactory.createFuture(); } @Override public ProviderFuture newProviderFuture(ProviderSynchronization synchronization) { return futureFactory.createFuture(synchronization); } @Override public String toString() { return "FailoverProvider: " + (connectedURI == null ? "unconnected" : connectedURI.toString()); } //--------------- FailoverProvider Request Timeout Support ---------------// protected final long getRequestSweeperInterval() { long[] timeouts = new long[] { requestTimeout, sendTimeout }; Arrays.sort(timeouts); for (long timeout : timeouts) { if (timeout != JmsConnectionInfo.INFINITE) { return Math.max(Math.max(1, timeout) / 3, MINIMUM_TIMEOUT); } } return DISABLED; } protected final class FailoverRequestSweeper implements Runnable { @Override public void run() { lock.readLock().lock(); try { final List pending = new ArrayList(requests.values()); for (FailoverRequest request : pending) { if (request.isExpired()) { LOG.trace("Task {} has timed out, sending failure notice.", request); request.onFailure(request.createTimedOutException()); } } } finally { lock.readLock().unlock(); } } } //--------------- FailoverProvider Asynchronous Request ------------------// /** * For all requests that are dispatched from the FailoverProvider to a connected * Provider instance an instance of FailoverRequest is used to handle errors that * occur during processing of that request and trigger a reconnect. */ protected abstract class FailoverRequest extends WrappedAsyncResult implements Runnable { protected final long id = requestId.incrementAndGet(); private final long requestStarted = System.nanoTime(); private final long requestTimeout; protected Provider activeProvider; public FailoverRequest(AsyncResult watcher) { this(watcher, JmsConnectionInfo.INFINITE); } public FailoverRequest(AsyncResult watcher, long requestTimeout) { super(watcher); this.requestTimeout = requestTimeout; LOG.trace("Created Failover Task: {} ({})", this, id); } @Override public void run() { lock.readLock().lock(); try { // Snapshot the current provider as this action is scoped to that // instance and any failure we report should reflect the provider // that was in use when the failure happened. activeProvider = provider; requests.put(id, this); if (activeProvider == null) { whenOffline(new ProviderIOException("Connection failed.")); } else { try { LOG.debug("Executing Failover Task: {} ({})", this, id); doTask(activeProvider); } catch (Throwable e) { LOG.debug("Caught exception while executing task: {} - {}", this, e.getMessage()); ProviderException providerEx = ProviderExceptionSupport.createNonFatalOrPassthrough(e); whenOffline(providerEx); handleProviderFailure(activeProvider, providerEx); } } } finally { lock.readLock().unlock(); } } @Override public void onFailure(final ProviderException error) { lock.readLock().lock(); try { if (!(error instanceof ProviderIOException) || closingConnection.get() || closed.get() || failed.get()) { requests.remove(id); super.onFailure(error); } else { LOG.debug("Request received error: {}", error.getMessage()); ProviderException ioError = ProviderExceptionSupport.createOrPassthroughFatal(error); whenOffline(ioError); handleProviderFailure(activeProvider, ioError); } } finally { lock.readLock().unlock(); } } @Override public void onSuccess() { lock.readLock().lock(); try { requests.remove(id); } finally { lock.readLock().unlock(); } super.onSuccess(); } /** * Called to execute the specific task that was requested. * * @param provider * The provider instance to use when performing this action. * * @throws ProviderException if an error occurs during task execution. */ public abstract void doTask(Provider provider) throws ProviderException; /** * Should the request just succeed when the Provider is not connected. * * @return true if the request is marked as successful when not connected. */ public boolean succeedsWhenOffline() { return false; } /** * When the transport is not connected should this request automatically fail. * * @return true if the task should fail when the Provider is not connected. */ public boolean failureWhenOffline() { return false; } /** * @return true if the request has a configured expiration time. */ public boolean isExpired() { if (requestTimeout != JmsConnectionInfo.INFINITE) { return (System.nanoTime() - requestStarted) > TimeUnit.MILLISECONDS.toNanos(requestTimeout); } else { return false; } } protected ProviderException createTimedOutException() { return new ProviderOperationTimedOutException("Timed out waiting on " + this); } protected ProviderException createOfflineFailureException(ProviderException error) { return ProviderExceptionSupport.createNonFatalOrPassthrough(error); } private void whenOffline(ProviderException error) { if (failureWhenOffline()) { requests.remove(id); getWrappedRequest().onFailure(createOfflineFailureException(error)); } else if (succeedsWhenOffline()) { onSuccess(); } else { LOG.trace("Failover task held until connection recovered: {} ({})", this, id); } } } /** * Captures the initial request to create a JmsConnectionInfo based resources and ensures * that if the connection is successfully established that the connection established event * is triggered once before moving on to sending only connection interrupted and restored * events. */ protected abstract class CreateConnectionRequest extends FailoverRequest { public CreateConnectionRequest(AsyncResult watcher) { super(watcher); } @Override public void onSuccess() { lock.readLock().lock(); try { LOG.trace("First connection requst has completed:"); FailoverProvider.this.messageFactory.set(provider.getMessageFactory()); processAlternates(provider.getAlternateURIs()); listener.onConnectionEstablished(provider.getRemoteURI()); reconnectControl.connectionEstablished(); CreateConnectionRequest.this.signalConnected(); } finally { lock.readLock().unlock(); } } @Override public void onFailure(final ProviderException result) { lock.readLock().lock(); try { if (closingConnection.get() || closed.get() || failed.get()) { requests.remove(id); super.onFailure(result); } else { LOG.debug("Request received error: {}", result.getMessage()); // If we managed to receive an Open frame it might contain // a failover update so process it before handling the error. processAlternates(activeProvider.getAlternateURIs()); handleProviderFailure(activeProvider, ProviderExceptionSupport.createOrPassthroughFatal(result)); } } finally { lock.readLock().unlock(); } } public void signalConnected() { super.onSuccess(); } } //----- Reconnection Control State Management ----------------------------// private class ReconnectControls { // Reconnection state tracking private volatile boolean recoveryRequired; private volatile long reconnectAttempts; private volatile long nextReconnectDelay = -1; public void scheduleReconnect(Runnable runnable) { try { // Warn of ongoing connection attempts if configured. int warnInterval = getWarnAfterReconnectAttempts(); if (reconnectAttempts > 0 && warnInterval > 0 && (reconnectAttempts % warnInterval) == 0) { LOG.warn("Failed to connect after: {} attempt(s) continuing to retry.", reconnectAttempts); } // If no connection recovery required then we have never fully connected to a remote // so we proceed down the connect with one immediate connection attempt and then follow // on delayed attempts based on configuration. if (!recoveryRequired) { if (reconnectAttempts == 0) { LOG.trace("Initial connect attempt will be performed immediately"); serializer.execute(runnable); } else if (reconnectAttempts == 1 && initialReconnectDelay > 0) { LOG.trace("Delayed initial reconnect attempt will be in {} milliseconds", initialReconnectDelay); serializer.schedule(runnable, initialReconnectDelay, TimeUnit.MILLISECONDS); } else { long delay = reconnectControl.nextReconnectDelay(); LOG.trace("Next reconnect attempt will be in {} milliseconds", delay); serializer.schedule(runnable, delay, TimeUnit.MILLISECONDS); } } else if (reconnectAttempts == 0) { if (initialReconnectDelay > 0) { LOG.trace("Delayed initial reconnect attempt will be in {} milliseconds", initialReconnectDelay); serializer.schedule(runnable, initialReconnectDelay, TimeUnit.MILLISECONDS); } else { LOG.trace("Initial Reconnect attempt will be performed immediately"); serializer.execute(runnable); } } else { long delay = reconnectControl.nextReconnectDelay(); LOG.trace("Next reconnect attempt will be in {} milliseconds", delay); serializer.schedule(runnable, delay, TimeUnit.MILLISECONDS); } } catch (Throwable unrecoverable) { reportReconnectFailure(ProviderExceptionSupport.createOrPassthroughFatal(unrecoverable)); } } public void connectionEstablished() { recoveryRequired = true; nextReconnectDelay = -1; reconnectAttempts = 0; uris.connected(); } public long recordNextAttempt() { return ++reconnectAttempts; } public boolean isRecoveryRequired() { return recoveryRequired; } public boolean isLimitExceeded() { int reconnectLimit = reconnectAttemptLimit(); if (reconnectLimit != UNLIMITED && reconnectAttempts >= reconnectLimit) { return true; } return false; } public boolean isReconnectAllowed(ProviderException cause) { // If a connection attempts fail due to Security errors than we abort // reconnection as there is a configuration issue and we want to avoid // a spinning reconnect cycle that can never complete. if (isStoppageCause(cause)) { return false; } return !isLimitExceeded(); } private boolean isStoppageCause(ProviderException cause) { if (cause instanceof ProviderConnectionSecuritySaslException) { ProviderConnectionSecuritySaslException saslFailure = (ProviderConnectionSecuritySaslException) cause; return !saslFailure.isSysTempFailure(); } else if (cause instanceof ProviderConnectionSecurityException ) { return true; } return false; } private int reconnectAttemptLimit() { int maxReconnectValue = maxReconnectAttempts; if (!recoveryRequired && startupMaxReconnectAttempts != UNDEFINED) { // If this is the first connection attempt and a specific startup retry limit // is configured then use it, otherwise use the main reconnect limit maxReconnectValue = startupMaxReconnectAttempts; } return maxReconnectValue; } private long nextReconnectDelay() { if (nextReconnectDelay == -1) { nextReconnectDelay = reconnectDelay; } if (isUseReconnectBackOff() && reconnectAttempts > 1) { // Exponential increment of reconnect delay. nextReconnectDelay *= getReconnectBackOffMultiplier(); if (nextReconnectDelay > maxReconnectDelay) { nextReconnectDelay = maxReconnectDelay; } } return nextReconnectDelay; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy