All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.neo4j.kernel.ha.cluster.modeswitch.HighAvailabilityModeSwitcher Maven / Gradle / Ivy

/*
 * Copyright (c) 2002-2016 "Neo Technology,"
 * Network Engine for Objects in Lund AB [http://neotechnology.com]
 *
 * This file is part of Neo4j.
 *
 * Neo4j is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program. If not, see .
 */
package org.neo4j.kernel.ha.cluster.modeswitch;

import java.net.URI;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.function.Supplier;

import org.neo4j.cluster.BindingListener;
import org.neo4j.cluster.InstanceId;
import org.neo4j.cluster.client.ClusterClient;
import org.neo4j.cluster.member.ClusterMemberAvailability;
import org.neo4j.cluster.protocol.election.Election;
import org.neo4j.helpers.CancellationRequest;
import org.neo4j.kernel.ha.cluster.HighAvailabilityMemberChangeEvent;
import org.neo4j.kernel.ha.cluster.HighAvailabilityMemberListener;
import org.neo4j.kernel.ha.cluster.HighAvailabilityMemberState;
import org.neo4j.kernel.ha.cluster.SwitchToMaster;
import org.neo4j.kernel.ha.cluster.SwitchToSlave;
import org.neo4j.kernel.ha.store.HighAvailabilityStoreFailureException;
import org.neo4j.kernel.ha.store.UnableToCopyStoreFromOldMasterException;
import org.neo4j.kernel.impl.logging.LogService;
import org.neo4j.kernel.impl.store.MismatchingStoreIdException;
import org.neo4j.kernel.impl.store.StoreId;
import org.neo4j.kernel.impl.transaction.state.DataSourceManager;
import org.neo4j.kernel.lifecycle.LifeSupport;
import org.neo4j.kernel.lifecycle.Lifecycle;
import org.neo4j.logging.Log;

import static org.neo4j.cluster.ClusterSettings.INSTANCE_ID;
import static org.neo4j.helpers.NamedThreadFactory.named;
import static org.neo4j.helpers.Uris.parameter;

/**
 * Performs the internal switches in various services from pending to slave/master, by listening for
 * {@link HighAvailabilityMemberChangeEvent}s. When finished it will invoke
 * {@link ClusterMemberAvailability#memberIsAvailable(String, URI, StoreId)} to announce it's new status to the
 * cluster.
 */
public class HighAvailabilityModeSwitcher implements HighAvailabilityMemberListener, BindingListener, Lifecycle
{
    public static final String MASTER = "master";
    public static final String SLAVE = "slave";
    public static final String UNKNOWN = "UNKNOWN";

    public static final String INADDR_ANY = "0.0.0.0";

    private final ComponentSwitcher componentSwitcher;

    private volatile URI masterHaURI;
    private volatile URI slaveHaURI;
    private CancellationHandle cancellationHandle; // guarded by synchronized in startModeSwitching()

    public static InstanceId getServerId( URI haUri )
    {
        // Get serverId parameter, default to -1 if it is missing, and parse to integer
        String serverIdParam = parameter( "serverId" ).apply( haUri );
        return INSTANCE_ID.apply( serverIdParam != null ? serverIdParam : "-1" );
    }

    private URI availableMasterId;

    private SwitchToSlave switchToSlave;
    private SwitchToMaster switchToMaster;
    private final Election election;
    private final ClusterMemberAvailability clusterMemberAvailability;
    private final ClusterClient clusterClient;
    private final Supplier storeIdSupplier;
    private final InstanceId instanceId;

    private final Log msgLog;
    private final Log userLog;

    private LifeSupport haCommunicationLife;

    private ScheduledExecutorService modeSwitcherExecutor;
    private volatile URI me;
    private volatile Future modeSwitcherFuture;
    private volatile HighAvailabilityMemberState currentTargetState;
    private final AtomicBoolean canAskForElections = new AtomicBoolean( true );
    private final DataSourceManager neoStoreDataSourceSupplier;

    public HighAvailabilityModeSwitcher( SwitchToSlave switchToSlave,
                                         SwitchToMaster switchToMaster,
                                         Election election,
                                         ClusterMemberAvailability clusterMemberAvailability,
                                         ClusterClient clusterClient,
                                         Supplier storeIdSupplier,
                                         InstanceId instanceId,
                                         ComponentSwitcher componentSwitcher,
                                         DataSourceManager neoStoreDataSourceSupplier,
                                         LogService logService )
    {
        this.switchToSlave = switchToSlave;
        this.switchToMaster = switchToMaster;
        this.election = election;
        this.clusterMemberAvailability = clusterMemberAvailability;
        this.clusterClient = clusterClient;
        this.storeIdSupplier = storeIdSupplier;
        this.instanceId = instanceId;
        this.componentSwitcher = componentSwitcher;
        this.msgLog = logService.getInternalLog( getClass() );
        this.userLog = logService.getUserLog( getClass() );
        this.neoStoreDataSourceSupplier = neoStoreDataSourceSupplier;
        this.haCommunicationLife = new LifeSupport();
    }

    @Override
    public void listeningAt( URI myUri )
    {
        me = myUri;
    }

    @Override
    public synchronized void init() throws Throwable
    {
        modeSwitcherExecutor = createExecutor();

        haCommunicationLife.init();
    }

    @Override
    public synchronized void start() throws Throwable
    {
        haCommunicationLife.start();
    }

    @Override
    public synchronized void stop() throws Throwable
    {
        haCommunicationLife.stop();
    }

    @Override
    public synchronized void shutdown() throws Throwable
    {
        modeSwitcherExecutor.shutdown();

        modeSwitcherExecutor.awaitTermination( 60, TimeUnit.SECONDS );

        haCommunicationLife.shutdown();

        switchToMaster.close();
        switchToMaster = null;
        switchToSlave = null;
    }

    @Override
    public void masterIsElected( HighAvailabilityMemberChangeEvent event )
    {
        if ( event.getNewState() == event.getOldState() && event.getOldState() == HighAvailabilityMemberState.MASTER )
        {
            clusterMemberAvailability.memberIsAvailable( MASTER, masterHaURI, storeIdSupplier.get() );
        }
        else
        {
            stateChanged( event );
        }
    }

    @Override
    public void masterIsAvailable( HighAvailabilityMemberChangeEvent event )
    {
        if ( event.getNewState() == event.getOldState() && event.getOldState() == HighAvailabilityMemberState.SLAVE )
        {
            clusterMemberAvailability.memberIsAvailable( SLAVE, slaveHaURI, storeIdSupplier.get() );
        }
        else
        {
            stateChanged( event );
        }
    }

    @Override
    public void slaveIsAvailable( HighAvailabilityMemberChangeEvent event )
    {
        // ignored, we don't do any mode switching in slave available events
    }

    @Override
    public void instanceStops( HighAvailabilityMemberChangeEvent event )
    {
        stateChanged( event );
    }

    @Override
    public void instanceDetached( HighAvailabilityMemberChangeEvent event )
    {
        switchToDetached();
    }

    public void forceElections()
    {
        if ( canAskForElections.compareAndSet( true, false ) )
        {
            clusterMemberAvailability.memberIsUnavailable( HighAvailabilityModeSwitcher.SLAVE );
            election.performRoleElections();
        }
    }

    private void stateChanged( HighAvailabilityMemberChangeEvent event )
    {
        if ( event.getNewState() == event.getOldState() )
        {
            /*
             * We get here if for example a new master becomes available while we are already switching. In that case
             * we don't change state but we must update with the new availableMasterId, but only if it is not null.
             */
            if ( event.getServerHaUri() != null )
            {
                availableMasterId = event.getServerHaUri();
            }
            return;
        }

        availableMasterId = event.getServerHaUri();

        currentTargetState = event.getNewState();
        switch ( event.getNewState() )
        {
            case TO_MASTER:

                if ( event.getOldState().equals( HighAvailabilityMemberState.SLAVE ) )
                {
                    clusterMemberAvailability.memberIsUnavailable( SLAVE );
                }

                switchToMaster();
                break;
            case TO_SLAVE:
                switchToSlave();
                break;
            case PENDING:

                switchToPending( event.getOldState() );
                break;
            default:
                // do nothing
        }
    }

    private void switchToMaster()
    {
        final CancellationHandle cancellationHandle = new CancellationHandle();
        startModeSwitching( () -> {
            if ( currentTargetState != HighAvailabilityMemberState.TO_MASTER )
            {
                return;
            }

            // We just got scheduled. Maybe we are already obsolete - test
            if ( cancellationHandle.cancellationRequested() )
            {
                msgLog.info( "Switch to master cancelled on start." );
                return;
            }

            componentSwitcher.switchToMaster();

            if ( cancellationHandle.cancellationRequested() )
            {
                msgLog.info( "Switch to master cancelled before ha communication started." );
                return;
            }

            haCommunicationLife.shutdown();
            haCommunicationLife = new LifeSupport();

            try
            {
                masterHaURI = switchToMaster.switchToMaster( haCommunicationLife, me );
                canAskForElections.set( true );
            }
            catch ( Throwable e )
            {
                msgLog.error( "Failed to switch to master", e );
                // Since this master switch failed, elect someone else
                election.demote( instanceId );
            }
        }, cancellationHandle );
    }

    private void switchToSlave()
    {
        // Do this with a scheduler, so that if it fails, it can retry later with an exponential backoff with max
        // wait time.
        /*
         * This is purely defensive and should never trigger. There was a race where the switch to slave task would
         * start after this instance was elected master and the task would constantly try to change as slave
         * for itself, never cancelling. This now should not be possible, since we cancel the task and wait for it
         * to complete, all in a single thread executor. However, this is a check worth doing because if this
         * condition slips through via some other code path it can cause trouble.
         */
        if ( getServerId( availableMasterId ).equals( instanceId ) )
        {
            msgLog.error( "I (" + me + ") tried to switch to slave for myself as master (" + availableMasterId + ")" );
            return;
        }
        final AtomicLong wait = new AtomicLong();
        final CancellationHandle cancellationHandle = new CancellationHandle();
        startModeSwitching( new Runnable()
        {
            @Override
            public void run()
            {
                if ( currentTargetState != HighAvailabilityMemberState.TO_SLAVE )
                {
                    return; // Already switched - this can happen if a second master becomes available while waiting
                }

                if ( cancellationHandle.cancellationRequested() )
                {
                    msgLog.info( "Switch to slave cancelled on start." );
                    return;
                }

                componentSwitcher.switchToSlave();

                try
                {
                    if ( cancellationHandle.cancellationRequested() )
                    {
                        msgLog.info( "Switch to slave cancelled before ha communication started." );
                        return;
                    }

                    haCommunicationLife.shutdown();
                    haCommunicationLife = new LifeSupport();

                    // it is important for availableMasterId to be re-read on every attempt so that
                    // slave switching would not result in an infinite loop with wrong/stale availableMasterId
                    URI resultingSlaveHaURI = switchToSlave.switchToSlave( haCommunicationLife, me, availableMasterId, cancellationHandle );
                    if ( resultingSlaveHaURI == null )
                    {
                        /*
                         * null slave uri means the task was cancelled. The task then must simply terminate and
                         * have no side effects.
                         */
                        msgLog.info( "Switch to slave is effectively cancelled" );
                    }
                    else
                    {
                        slaveHaURI = resultingSlaveHaURI;
                        canAskForElections.set( true );
                    }
                }
                catch ( HighAvailabilityStoreFailureException e )
                {
                    userLog.error( "UNABLE TO START UP AS SLAVE: %s", e.getMessage() );
                    msgLog.error( "Unable to start up as slave", e );

                    clusterMemberAvailability.memberIsUnavailable( SLAVE );
                    ClusterClient clusterClient = HighAvailabilityModeSwitcher.this.clusterClient;
                    try
                    {
                        // TODO I doubt this actually works
                        clusterClient.leave();
                        clusterClient.stop();
                        haCommunicationLife.shutdown();
                    }
                    catch ( Throwable t )
                    {
                        msgLog.error( "Unable to stop cluster client", t );
                    }

                    modeSwitcherExecutor.schedule( this, 5, TimeUnit.SECONDS );
                }
                catch ( MismatchingStoreIdException e )
                {
                    // Try again immediately, the place that threw it have already treated the db
                    // as branched and so a new attempt will have this slave copy a new store from master.
                    run();
                }
                catch ( Throwable t )
                {
                    msgLog.error( "Error while trying to switch to slave", t );

                    // Try again later
                    wait.set( (1 + wait.get() * 2) ); // Exponential backoff
                    wait.set( Math.min( wait.get(), 5 * 60 ) ); // Wait maximum 5 minutes

                    modeSwitcherFuture = modeSwitcherExecutor.schedule( this, wait.get(), TimeUnit.SECONDS );

                    msgLog.info( "Attempting to switch to slave in %ds", wait.get() );
                }
            }
        }, cancellationHandle );
    }

    private void switchToPending( final HighAvailabilityMemberState oldState )
    {
        msgLog.info( "I am %s, moving to pending", instanceId );

        startModeSwitching( () -> {
            if ( cancellationHandle.cancellationRequested() )
            {
                msgLog.info( "Switch to pending cancelled on start." );
                return;
            }

            componentSwitcher.switchToPending();
            neoStoreDataSourceSupplier.getDataSource().beforeModeSwitch();

            if ( cancellationHandle.cancellationRequested() )
            {
                msgLog.info( "Switch to pending cancelled before ha communication shutdown." );
                return;
            }

            haCommunicationLife.shutdown();
            haCommunicationLife = new LifeSupport();
        }, new CancellationHandle() );

        try
        {
            modeSwitcherFuture.get( 10, TimeUnit.SECONDS );
        }
        catch ( Exception ignored )
        {
        }
    }

    private void switchToDetached()
    {
        msgLog.info( "I am %s, moving to detached", instanceId );

        startModeSwitching( () -> {
            if ( cancellationHandle.cancellationRequested() )
            {
                msgLog.info( "Switch to pending cancelled on start." );
                return;
            }

            componentSwitcher.switchToSlave();
            neoStoreDataSourceSupplier.getDataSource().beforeModeSwitch();

            if ( cancellationHandle.cancellationRequested() )
            {
                msgLog.info( "Switch to pending cancelled before ha communication shutdown." );
                return;
            }

            haCommunicationLife.shutdown();
            haCommunicationLife = new LifeSupport();
        }, new CancellationHandle() );

        try
        {
            modeSwitcherFuture.get( 10, TimeUnit.SECONDS );
        }
        catch ( Exception e )
        {
            msgLog.warn( "Exception received while waiting for switching to detached", e );
        }
    }

    private synchronized void startModeSwitching( Runnable switcher, CancellationHandle cancellationHandle )
    {
        if ( modeSwitcherFuture != null )
        {
            // Cancel any delayed previous switching
            this.cancellationHandle.cancel();
            // Wait for it to actually stop what it was doing
            try
            {
                modeSwitcherFuture.get();
            }
            catch ( UnableToCopyStoreFromOldMasterException e )
            {
                throw e;
            }
            catch ( Exception e )
            {
                msgLog.warn( "Got exception from cancelled task", e );
            }
        }

        this.cancellationHandle = cancellationHandle;
        modeSwitcherFuture = modeSwitcherExecutor.submit( switcher );
    }

    ScheduledExecutorService createExecutor()
    {
        return Executors.newSingleThreadScheduledExecutor( named( "HA Mode switcher" ) );
    }

    private static class CancellationHandle implements CancellationRequest
    {
        private volatile boolean cancelled = false;

        @Override
        public boolean cancellationRequested()
        {
            return cancelled;
        }

        public void cancel()
        {
            assert !cancelled : "Should not cancel on the same request twice";
            cancelled = true;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy