org.jgroups.protocols.BARRIER Maven / Gradle / Ivy
package org.jgroups.protocols;
import org.jgroups.Address;
import org.jgroups.Event;
import org.jgroups.Message;
import org.jgroups.annotations.MBean;
import org.jgroups.annotations.ManagedAttribute;
import org.jgroups.annotations.ManagedOperation;
import org.jgroups.annotations.Property;
import org.jgroups.conf.AttributeType;
import org.jgroups.stack.Protocol;
import org.jgroups.util.MessageBatch;
import org.jgroups.util.TimeScheduler;
import org.jgroups.util.Util;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.stream.Collectors;
/**
* All messages up the stack have to go through a barrier (read lock, RL). By default, the barrier is open.
* When a CLOSE_BARRIER event is received, we close the barrier by acquiring a write lock (WL). This succeeds when all
* previous messages have completed (by releasing their RLs). Thus, when we have acquired the WL, we know that there
* are no pending messages processed.
* When an OPEN_BARRIER event is received, we simply open the barrier again and let all messages pass in the up
* direction. This is done by releasing the WL.
* @author Bela Ban
*/
@MBean(description="Blocks all multicast threads when closed")
public class BARRIER extends Protocol {
@Property(description="Max time barrier can be closed. Default is 60000 ms",type=AttributeType.TIME)
protected long max_close_time=60000; // how long can the barrier stay closed (in ms) ? 0 means forever
@Property(description="Max time (in ms) to wait until the threads which passed the barrier before it was closed " +
"have completed. If this time elapses, an exception will be thrown and state transfer will fail. 0= wait forever",
type=AttributeType.TIME)
protected long flush_timeout=5000;
protected final Lock lock=new ReentrantLock();
protected final AtomicBoolean barrier_closed=new AtomicBoolean(false);
/** signals to waiting threads that the barrier is open again */
protected Condition no_pending_threads=lock.newCondition();
protected Map in_flight_threads=Util.createConcurrentMap();
protected volatile Future> barrier_opener_future;
protected TimeScheduler timer;
// mbrs from which unicasts should be accepted even if BARRIER is closed (PUNCH_HOLE adds, CLOSE_HOLE removes mbrs)
protected final Set holes=new HashSet<>();
// queues multicast messages or message batches (dest == null)
protected final Map mcast_queue=new ConcurrentHashMap<>();
// queues unicast messages or message batches (dest != null)
protected final Map ucast_queue=new ConcurrentHashMap<>();
protected TP transport;
protected static final Object NULL=new Object();
public long getMaxCloseTime() {return max_close_time;}
public BARRIER setMaxCloseTime(long m) {this.max_close_time=m; return this;}
public long getFlushTimeout() {return flush_timeout;}
public BARRIER setFlushTimeout(long f) {this.flush_timeout=f; return this;}
@ManagedAttribute(description="Shows whether the barrier closed")
public boolean isClosed() {
return barrier_closed.get();
}
@ManagedAttribute(description="Lists the members whose unicast messages are let through")
public String getHoles() {return holes.toString();}
public int getNumberOfInFlightThreads() {
return in_flight_threads.size();
}
@ManagedAttribute(description="Number of threads in flight",type=AttributeType.SCALAR)
public int getInFlightThreadsCount() {
return getNumberOfInFlightThreads();
}
@ManagedAttribute
public boolean isOpenerScheduled() {
return barrier_opener_future != null && !barrier_opener_future.isDone() && !barrier_opener_future.isCancelled();
}
public void init() throws Exception {
super.init();
transport=getTransport();
timer=transport.getTimer();
}
public void stop() {
super.stop();
openBarrier();
}
public void destroy() {
super.destroy();
openBarrier();
}
public Object down(Event evt) {
switch(evt.getType()) {
case Event.CLOSE_BARRIER:
try {
closeBarrier();
}
catch(TimeoutException e) {
throw new RuntimeException(e);
}
return null;
case Event.OPEN_BARRIER:
openBarrier();
return null;
case Event.PUNCH_HOLE:
Address mbr=evt.getArg();
holes.add(mbr);
return null;
case Event.CLOSE_HOLE:
mbr=evt.getArg();
holes.remove(mbr);
return null;
}
return down_prot.down(evt);
}
public Object up(Event evt) {
switch(evt.getType()) {
case Event.CLOSE_BARRIER:
try {
closeBarrier();
return null;
}
catch(TimeoutException e) {
throw new RuntimeException(e);
}
case Event.OPEN_BARRIER:
openBarrier();
return null;
}
return up_prot.up(evt);
}
public Object up(Message msg) {
// https://issues.redhat.com/browse/JGRP-1341: let unicast messages pass
if(msg.isFlagSet(Message.Flag.SKIP_BARRIER) || msg.getDest() != null
&& ((msg.isFlagSet(Message.Flag.OOB)) || holes.contains(msg.getSrc())))
return up_prot.up(msg);
if(barrier_closed.get()) {
final Map map=msg.getDest() == null? mcast_queue : ucast_queue;
map.put(msg.getSrc(), msg);
return null; // queue and drop the message
}
Thread current_thread=Thread.currentThread();
in_flight_threads.put(current_thread, NULL);
try {
return up_prot.up(msg);
}
finally {
unblock(current_thread);
}
}
public void up(MessageBatch batch) {
// let unicast message batches pass
if(batch.dest() != null
&& (batch.mode() == MessageBatch.Mode.OOB) || holes.contains(batch.sender())) {
up_prot.up(batch);
return;
}
if(barrier_closed.get()) {
final Map map=batch.dest() == null? mcast_queue : ucast_queue;
map.put(batch.sender(), batch.last().putHeader(transport.getId(),new TpHeader(batch.clusterName())));
return; // queue the last message of the batch and drop the batch
}
Thread current_thread=Thread.currentThread();
in_flight_threads.put(current_thread, NULL);
try {
up_prot.up(batch);
}
finally {
unblock(current_thread);
}
}
protected void unblock(final Thread current_thread) {
if(in_flight_threads.remove(current_thread) == NULL && in_flight_threads.isEmpty()) {
lock.lock();
try {
no_pending_threads.signalAll();
}
finally {
lock.unlock();
}
}
}
/** Close the barrier. Temporarily remove all threads which are waiting or blocked, re-insert them after the call */
public void closeBarrier() throws TimeoutException {
if(!barrier_closed.compareAndSet(false, true))
return; // barrier is already closed
long target_time=0, wait_time=0, start=System.currentTimeMillis();
in_flight_threads.remove(Thread.currentThread());
lock.lock();
try {
// wait until all pending threads have returned
while(barrier_closed.get() && !in_flight_threads.isEmpty()) {
if(target_time == 0 && flush_timeout > 0)
target_time=System.currentTimeMillis() + flush_timeout;
// should be the same
in_flight_threads.keySet().removeIf(thread -> !thread.isAlive() || thread.getState() == Thread.State.TERMINATED);
if(in_flight_threads.isEmpty())
break;
try {
if(flush_timeout <= 0)
no_pending_threads.await();
else {
if((wait_time=target_time - System.currentTimeMillis()) <= 0)
break;
no_pending_threads.await(wait_time, TimeUnit.MILLISECONDS);
}
}
catch(InterruptedException e) {
}
}
if(flush_timeout > 0 && !in_flight_threads.isEmpty()) {
long time=System.currentTimeMillis() - start;
throw new TimeoutException(local_addr + ": failed flushing pending threads in " + time +
" ms; threads:\n" + printInFlightThreads());
}
}
finally {
lock.unlock();
}
if(max_close_time > 0)
scheduleBarrierOpener();
}
@ManagedOperation(description="Opens the barrier. No-op if already open")
public void openBarrier() {
if(!barrier_closed.compareAndSet(true, false))
return; // barrier was already open
cancelBarrierOpener(); // cancels if running
synchronized(mcast_queue) {
flushQueue(mcast_queue);
}
synchronized(ucast_queue) {
flushQueue(ucast_queue);
}
}
@ManagedOperation(description="Lists the in-flight threads")
protected String printInFlightThreads() {
return in_flight_threads.keySet().stream().map(Object::toString).collect(Collectors.joining("\n"));
}
protected void flushQueue(final Map queue) {
if(queue.isEmpty())
return;
for(Message msg: queue.values()) {
boolean oob=msg.isFlagSet(Message.Flag.OOB);
transport.msg_processing_policy.process(msg, oob);
}
queue.clear();
}
protected void scheduleBarrierOpener() {
if(barrier_opener_future == null || barrier_opener_future.isDone()) {
barrier_opener_future=timer.schedule(this::openBarrier, max_close_time, TimeUnit.MILLISECONDS, false);
}
}
protected void cancelBarrierOpener() {
if(barrier_opener_future != null) {
barrier_opener_future.cancel(true);
barrier_opener_future=null;
}
}
}