org.neo4j.cluster.protocol.atomicbroadcast.multipaxos.LearnerState Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of ongdb-cluster Show documentation
Show all versions of ongdb-cluster Show documentation
Library implementing Paxos and Heartbeat components required for High Availability Neo4j
The newest version!
/*
* Copyright (c) 2018-2020 "Graph Foundation"
* Graph Foundation, Inc. [https://graphfoundation.org]
*
* Copyright (c) 2002-2018 "Neo4j,"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of ONgDB Enterprise Edition. The included source
* code can be redistributed and/or modified under the terms of the
* GNU AFFERO GENERAL PUBLIC LICENSE Version 3
* (http://www.fsf.org/licensing/licenses/agpl-3.0.html) as found
* in the associated LICENSE.txt file.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*/
package org.neo4j.cluster.protocol.atomicbroadcast.multipaxos;
import java.io.IOException;
import java.net.URI;
import java.net.URISyntaxException;
import org.neo4j.cluster.com.message.Message;
import org.neo4j.cluster.com.message.MessageHolder;
import org.neo4j.cluster.protocol.atomicbroadcast.AtomicBroadcastSerializer;
import org.neo4j.cluster.protocol.atomicbroadcast.Payload;
import org.neo4j.cluster.statemachine.State;
import org.neo4j.logging.Log;
import static org.neo4j.cluster.protocol.atomicbroadcast.multipaxos.LearnerContext.LEARN_GAP_THRESHOLD;
/**
* State machine for Paxos Learner
*/
public enum LearnerState
implements State
{
start
{
@Override
public LearnerState handle( LearnerContext context,
Message message,
MessageHolder outgoing
)
{
if ( message.getMessageType() == LearnerMessage.join )
{
return learner;
}
return this;
}
},
learner
{
@Override
public LearnerState handle( LearnerContext context,
Message message,
MessageHolder outgoing
) throws IOException, ClassNotFoundException, URISyntaxException
{
switch ( message.getMessageType() )
{
case learn:
{
LearnerMessage.LearnState learnState = message.getPayload();
final InstanceId instanceId = new InstanceId( message );
PaxosInstance instance = context.getPaxosInstance( instanceId );
Log log = context.getLog( getClass() );
// Skip if we already know about this
if ( instanceId.getId() <= context.getLastDeliveredInstanceId() )
{
break;
}
context.learnedInstanceId( instanceId.getId() );
instance.closed( learnState.getValue(), message.getHeader( Message.HEADER_CONVERSATION_ID ) );
/*
* The conditional below is simply so that no expensive deserialization will happen if we
* are not to print anything anyway if debug is not enabled.
*/
if ( log.isDebugEnabled() )
{
String description;
if ( instance.value_2 instanceof Payload )
{
AtomicBroadcastSerializer atomicBroadcastSerializer = context.newSerializer();
description = atomicBroadcastSerializer.receive( (Payload) instance.value_2 ).toString();
}
else
{
description = instance.value_2.toString();
}
log.debug(
"Learned and closed instance " + instance.id +
" from conversation " +
instance.conversationIdHeader +
" and the content was " +
description );
}
/*
* Here we have to deal with a potential problem, in essence the fallout of a bug
* that happens elsewhere.
* The instance we just received should be delivered if it's the next one being waited on.
* But, some previous instance may not have closed yet and, more often than not, it may
* never close. That's the bug we're dealing with and the reasons that this happens are
* currently unknown. When this happens, all subsequent instances, including for example
* configuration updates of instances joining the cluster, will get stuck and not be
* applied, making everyone just hang around and not allow members to join or leave.
* If this situation arises, we have a way out. If the current master doesn't have the
* instance, then no one does or at least it's relatively safe to assume so. This
* assumption allows us to accept that if more than N instances are missing from this one,
* just go ahead and skip them and deliver, in order, everything that is currently pending.
* This is nothing more than a tradeoff between sticking with correctness and risking
* unavailability and risking correctness to ensure availability. Given that most
* pending messages are cluster configuration changes and these are idempontent, the risk
* is acceptable.
*
* Technically, what is going to happen here is that if we are the coordinator and the
* instance we just closed is larger than a threshold compared to the last delivered one,
* we'll just deliver everything that we have pending, potentially skipping some instances.
* Slaves will not do that and instead ask the master for what they're missing. Eventually
* the master will become unstuck and everyone will progress.
*
* Nevertheless, the normal case is what the non coordinators do, so we only enter the
* pathological handling code only when we are the coordinator AND we don't follow the
* happy path.
*/
if ( context.isMe( context.getCoordinator() )
&& instanceId.getId() != context.getLastDeliveredInstanceId() + 1 )
{
context.getLog( LearnerState.class ).debug( "Gap developed in delivered instances," +
"latest received was %s but last delivered was %d.",
instanceId, context.getLastDeliveredInstanceId() );
/*
* We'll wait a bit, eagerness should not cause out of order delivery. At the
* same time, we have to make sure that if we get here, then we sort of erase any
* issues accumulated so far, therefor we will try to deliver everything pending, in
* order, regardless of whether we have it or not. In essence, we assume this is the
* latest message and it is valid.
*/
if ( instanceId.getId() > context.getLastDeliveredInstanceId() + LEARN_GAP_THRESHOLD )
{
context.getLog( LearnerState.class ).debug(
"Gap threshold reached (%d), proceeding to deliver everything pending " +
"up until now", LEARN_GAP_THRESHOLD );
boolean currentInstanceFound = false; // To assert we delivered this instance
long checkInstanceId = context.getLastDeliveredInstanceId() + 1;
final long startingInstanceId = checkInstanceId; // for debug message, later
while ( ( instance = context.getPaxosInstance( new InstanceId(
checkInstanceId ) ) ) != null ) // As long as it exists, deliver it
{
if ( checkInstanceId == instanceId.getId() )
{
currentInstanceFound = true;
}
instance.delivered();
context.setLastDeliveredInstanceId( checkInstanceId );
Message learnMessage = Message.internal(
AtomicBroadcastMessage.broadcastResponse, instance.value_2 )
.setHeader( InstanceId.INSTANCE, instance.id.toString() )
.setHeader( Message.HEADER_CONVERSATION_ID, instance.conversationIdHeader );
outgoing.offer( learnMessage );
checkInstanceId++;
}
context.getLog( LearnerMessage.LearnState.class ).
debug( "Delivered everything from %d up until %d. Triggering message was %s, delivered: %b",
startingInstanceId, checkInstanceId - 1, instanceId, currentInstanceFound );
}
}
/*
* Else we are a follower. Then we must wait for everything and deliver everything in order,
* relying on the master to hand out stuff we're missing. If the leader doesn't have it,
* then a restart may be necessary, because we cannot risk having more than one place in
* the cluster where decisions about skipping Paxos instances are taken.
*/
else
{
if ( instanceId.getId() == context.getLastDeliveredInstanceId() + 1 )
{
instance.delivered();
outgoing.offer( Message.internal( AtomicBroadcastMessage.broadcastResponse,
learnState.getValue() )
.setHeader( InstanceId.INSTANCE, instance.id.toString() )
.setHeader( Message.HEADER_CONVERSATION_ID, instance.conversationIdHeader ) );
context.setLastDeliveredInstanceId( instanceId.getId() );
long checkInstanceId = instanceId.getId() + 1;
while ( (instance = context.getPaxosInstance( new InstanceId(
checkInstanceId ) )).isState( PaxosInstance.State.closed ) )
{
instance.delivered();
context.setLastDeliveredInstanceId( checkInstanceId );
Message learnMessage = Message.internal(
AtomicBroadcastMessage.broadcastResponse, instance.value_2 )
.setHeader( InstanceId.INSTANCE, instance.id.toString() )
.setHeader( Message.HEADER_CONVERSATION_ID, instance.conversationIdHeader );
outgoing.offer( learnMessage );
checkInstanceId++;
}
if ( checkInstanceId == context.getLastKnownLearnedInstanceInCluster() + 1 )
{
// No hole - all is ok
// Cancel potential timeout, if one is active
context.cancelTimeout( "learn" );
}
else
{
// Found hole - we're waiting for this to be filled, i.e. timeout already set
context.getLog( LearnerState.class ).debug( "*** HOLE! WAITING " +
"FOR " + (context.getLastDeliveredInstanceId() + 1) );
}
}
else
{
// Found hole - we're waiting for this to be filled, i.e. timeout already set
context.getLog( LearnerState.class ).debug( "*** GOT " + instanceId
+ ", WAITING FOR " + (context.getLastDeliveredInstanceId() + 1) );
context.setTimeout( "learn", Message.timeout( LearnerMessage.learnTimedout,
message ) );
}
}
break;
}
case learnTimedout:
{
// Timed out waiting for learned values - send explicit request to everyone that is not failed
if ( !context.hasDeliveredAllKnownInstances() )
{
for ( long instanceId = context.getLastDeliveredInstanceId() + 1;
instanceId < context.getLastKnownLearnedInstanceInCluster();
instanceId++ )
{
InstanceId id = new InstanceId( instanceId );
PaxosInstance instance = context.getPaxosInstance( id );
if ( !instance.isState( PaxosInstance.State.closed ) && !instance.isState(
PaxosInstance.State.delivered ) )
{
for ( org.neo4j.cluster.InstanceId node : context.getAlive() )
{
URI nodeUri = context.getUriForId( node );
if ( !node.equals( context.getMyId() ) )
{
outgoing.offer( Message.to( LearnerMessage.learnRequest, nodeUri,
new LearnerMessage.LearnRequestState() ).setHeader(
InstanceId.INSTANCE,
id.toString() ) );
}
}
}
}
// Set another timeout
context.setTimeout( "learn", Message.timeout( LearnerMessage.learnTimedout,
message ) );
}
break;
}
case learnRequest:
{
// Someone wants to learn a value that we might have
InstanceId instanceId = new InstanceId( message );
PaxosInstance instance = context.getPaxosInstance( instanceId );
if ( instance.isState( PaxosInstance.State.closed ) ||
instance.isState( PaxosInstance.State.delivered ) )
{
outgoing.offer( Message.respond( LearnerMessage.learn, message,
new LearnerMessage.LearnState( instance.value_2 ) ).
setHeader( InstanceId.INSTANCE, instanceId.toString() ).
setHeader( Message.HEADER_CONVERSATION_ID, instance.conversationIdHeader ) );
}
else
{
outgoing.offer( message.copyHeadersTo( Message.respond( LearnerMessage.learnFailed,
message,
new LearnerMessage.LearnFailedState() ), org.neo4j.cluster.protocol
.atomicbroadcast.multipaxos.InstanceId.INSTANCE ) );
}
break;
}
case learnFailed:
{
InstanceId instanceId = new InstanceId( message );
context.notifyLearnMiss( instanceId );
break;
}
case catchUp:
{
long catchUpTo = message.getPayload();
if ( context.getLastKnownLearnedInstanceInCluster() < catchUpTo )
{
context.setNextInstanceId( catchUpTo + 1 );
// Try to get up to date
for ( long instanceId = context.getLastLearnedInstanceId() + 1;
instanceId <= catchUpTo; instanceId++ )
{
InstanceId id = new InstanceId( instanceId );
PaxosInstance instance = context.getPaxosInstance( id );
if ( !instance.isState( PaxosInstance.State.closed ) &&
!instance.isState( PaxosInstance.State.delivered ) )
{
outgoing.offer( Message.to( LearnerMessage.learnRequest,
lastKnownAliveUriOrSenderUri( context, message ),
new LearnerMessage.LearnRequestState() ).setHeader(
InstanceId.INSTANCE,
id.toString() ) );
context.setTimeout( "learn",
Message.timeout( LearnerMessage.learnTimedout, message ) );
break;
}
}
org.neo4j.cluster.InstanceId instanceId =
message.hasHeader( Message.HEADER_INSTANCE_ID )
? new org.neo4j.cluster.InstanceId(
Integer.parseInt( message.getHeader( Message.HEADER_INSTANCE_ID ) ) )
: context.getMyId();
context.setLastKnownLearnedInstanceInCluster( catchUpTo, instanceId );
}
break;
}
case leave:
{
context.leave();
return start;
}
default:
break;
}
return this;
}
private URI lastKnownAliveUriOrSenderUri( LearnerContext context, Message message )
throws URISyntaxException
{
org.neo4j.cluster.InstanceId lastKnownAliveInstance = context.getLastKnownAliveUpToDateInstance();
if ( lastKnownAliveInstance != null )
{
return context.getUriForId( lastKnownAliveInstance );
}
else
{
return new URI( message.getHeader( Message.HEADER_FROM ) );
}
}
}
}