All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jgroups.protocols.SEQUENCER Maven / Gradle / Ivy

There is a newer version: 9.1.7.Final
Show newest version

package org.jgroups.protocols;

import org.jgroups.*;
import org.jgroups.annotations.MBean;
import org.jgroups.annotations.ManagedAttribute;
import org.jgroups.annotations.ManagedOperation;
import org.jgroups.annotations.Property;
import org.jgroups.stack.Protocol;
import org.jgroups.util.*;

import java.io.DataInput;
import java.io.DataOutput;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.Objects;
import java.util.concurrent.ConcurrentMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.locks.Condition;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Supplier;


/**
 * Implementation of total order protocol using a sequencer.
 * Consult SEQUENCER.txt for details
 * @author Bela Ban
 */
@MBean(description="Implementation of total order protocol using a sequencer")
public class SEQUENCER extends Protocol {
    protected Address                           local_addr;
    protected volatile Address                  coord;
    protected volatile View                     view;
    @ManagedAttribute
    protected volatile boolean                  is_coord;
    protected final AtomicLong                  seqno=new AtomicLong(0);


    /** Maintains messages forwarded to the coord which which no ack has been received yet.
     *  Needs to be sorted so we resend them in the right order
     */
    protected final NavigableMap  forward_table=new ConcurrentSkipListMap<>();

    protected final Lock                        send_lock=new ReentrantLock();

    protected final Condition                   send_cond=send_lock.newCondition();

    /** When ack_mode is set, we need to wait for an ack for each forwarded message until we can send the next one */
    protected volatile boolean                  ack_mode=true;

    /** Set when we block all sending threads to resend all messages from forward_table */
    protected volatile boolean                  flushing=false;

    protected volatile boolean                  running=true;

    /** Keeps track of the threads sending messages */
    protected final AtomicInteger               in_flight_sends=new AtomicInteger(0);

    // Maintains received seqnos, so we can weed out dupes
    protected final ConcurrentMap> delivery_table=Util.createConcurrentMap();

    protected volatile Flusher                  flusher;

    /** Used for each resent message to wait until the message has been received */
    protected final Promise               ack_promise=new Promise<>();



    @Property(description="Size of the set to store received seqnos (for duplicate checking)")
    protected int  delivery_table_max_size=2000;

    @Property(description="Number of acks needed before going from ack-mode to normal mode. " +
      "0 disables this, which means that ack-mode is always on")
    protected int  threshold=10;

    @ManagedAttribute protected int  num_acks;
    @ManagedAttribute protected long forwarded_msgs;
    @ManagedAttribute protected long bcast_msgs;
    @ManagedAttribute protected long received_forwards;
    @ManagedAttribute protected long received_bcasts;
    @ManagedAttribute protected long delivered_bcasts;

    @ManagedAttribute
    public boolean isCoordinator() {return is_coord;}
    public Address getCoordinator() {return coord;}
    public Address getLocalAddress() {return local_addr;}

    @ManagedAttribute(description="Number of messages in the forward-table")
    public int getForwardTableSize() {return forward_table.size();}

    public void setThreshold(int new_threshold) {this.threshold=new_threshold;}

    public void setDeliveryTableMaxSize(int size) {delivery_table_max_size=size;}

    @ManagedOperation
    public void resetStats() {
        forwarded_msgs=bcast_msgs=received_forwards=received_bcasts=delivered_bcasts=0L;
    }


    public void start() throws Exception {
        super.start();
        running=true;
        ack_mode=true;
    }

    public void stop() {
        running=false;
        unblockAll();
        stopFlusher();
        super.stop();
    }

    public Object down(Event evt) {
        switch(evt.getType()) {
            case Event.VIEW_CHANGE:
                handleViewChange(evt.getArg());
                break;

            case Event.TMP_VIEW:
                handleTmpView(evt.getArg());
                break;

            case Event.SET_LOCAL_ADDRESS:
                local_addr=evt.getArg();
                break;
        }
        return down_prot.down(evt);
    }


    public Object down(Message msg) {
        if(msg.getDest() != null || msg.isFlagSet(Message.Flag.NO_TOTAL_ORDER) || msg.isFlagSet(Message.Flag.OOB))
            return down_prot.down(msg);

        if(msg.getSrc() == null)
            msg.setSrc(local_addr);

        if(flushing)
            block();

        // A seqno is not used to establish ordering, but only to weed out duplicates; next_seqno doesn't need
        // to increase monotonically, but only to be unique (https://issues.jboss.org/browse/JGRP-1461) !
        long next_seqno=seqno.incrementAndGet();
        in_flight_sends.incrementAndGet();
        try {
            SequencerHeader hdr=new SequencerHeader(is_coord? SequencerHeader.BCAST : SequencerHeader.WRAPPED_BCAST, next_seqno);
            msg.putHeader(this.id, hdr);
            if(log.isTraceEnabled())
                log.trace("[" + local_addr + "]: forwarding " + local_addr + "::" + seqno + " to coord " + coord);

            // We always forward messages to the coordinator, even if we're the coordinator. Having the coord
            // send its messages directly led to starvation of messages from other members. MPerf perf went up
            // from 20MB/sec/node to 50MB/sec/node with this change !
            forwardToCoord(next_seqno, msg);
        }
        catch(Exception ex) {
            log.error(Util.getMessage("FailedSendingMessage"), ex);
        }
        finally {
            in_flight_sends.decrementAndGet();
        }
        return null; // don't pass down
    }

    public Object up(Event evt) {
        switch(evt.getType()) {
            case Event.VIEW_CHANGE:
                Object retval=up_prot.up(evt);
                handleViewChange(evt.getArg());
                return retval;

            case Event.TMP_VIEW:
                handleTmpView(evt.getArg());
                break;
        }
        return up_prot.up(evt);
    }

    public Object up(Message msg) {
        SequencerHeader hdr;
        if(msg.isFlagSet(Message.Flag.NO_TOTAL_ORDER) || msg.isFlagSet(Message.Flag.OOB))
            return up_prot.up(msg);
        hdr=msg.getHeader(this.id);
        if(hdr == null)
            return up_prot.up(msg); // pass up

        switch(hdr.type) {
            case SequencerHeader.FORWARD:
            case SequencerHeader.FLUSH:
                if(!is_coord) {
                    if(log.isErrorEnabled())
                        log.error(local_addr + ": non-coord; dropping FORWARD request from " + msg.getSrc());
                    return null;
                }
                Address sender=msg.getSrc();
                if(view != null && !view.containsMember(sender)) {
                    if(log.isErrorEnabled())
                        log.error(local_addr + ": dropping FORWARD request from non-member " + sender +
                                    "; view=" + view);
                    return null;
                }

                broadcast(msg, true, msg.getSrc(), hdr.seqno, hdr.type == SequencerHeader.FLUSH); // do copy the message
                received_forwards++;
                break;

            case SequencerHeader.BCAST:
                deliver(msg, hdr);
                received_bcasts++;
                break;

            case SequencerHeader.WRAPPED_BCAST:
                unwrapAndDeliver(msg, hdr.flush_ack);  // unwrap the original message (in the payload) and deliver it
                received_bcasts++;
                break;
        }
        return null;
    }

    public void up(MessageBatch batch) {
        for(Message msg: batch) {
            if(msg.isFlagSet(Message.Flag.NO_TOTAL_ORDER) || msg.isFlagSet(Message.Flag.OOB) || msg.getHeader(id) == null)
                continue;
            batch.remove(msg);

            // simplistic implementation
            try {
                up(msg);
            }
            catch(Throwable t) {
                log.error(Util.getMessage("FailedPassingUpMessage"), t);
            }
        }

        if(!batch.isEmpty())
            up_prot.up(batch);
    }

    /* --------------------------------- Private Methods ----------------------------------- */

    protected void handleViewChange(View v) {
        List
mbrs=v.getMembers(); if(mbrs.isEmpty()) return; if(view == null || view.compareTo(v) < 0) view=v; else return; delivery_table.keySet().retainAll(mbrs); Address existing_coord=coord, new_coord=mbrs.get(0); boolean coord_changed=!Objects.equals(existing_coord, new_coord); if(coord_changed && new_coord != null) { stopFlusher(); startFlusher(new_coord); // needs to be done in the background, to prevent blocking if down() would block } } protected void flush(final Address new_coord) throws InterruptedException { // wait until all threads currently sending messages have returned (new threads after flushing=true) will block // flushing is set to true in startFlusher() while(flushing && running) { if(in_flight_sends.get() == 0) break; Thread.sleep(100); } send_lock.lockInterruptibly(); try { if(log.isTraceEnabled()) log.trace(local_addr + ": coord changed from " + coord + " to " + new_coord); coord=new_coord; is_coord=Objects.equals(local_addr, coord); flushMessagesInForwardTable(); } finally { if(log.isTraceEnabled()) log.trace(local_addr + ": flushing completed"); flushing=false; ack_mode=true; // go to ack-mode after flushing num_acks=0; send_cond.signalAll(); send_lock.unlock(); } } // If we're becoming coordinator, we need to handle TMP_VIEW as // an immediate change of view. See JGRP-1452. private void handleTmpView(View v) { Address new_coord=v.getCoord(); if(new_coord != null && !new_coord.equals(coord) && local_addr != null && local_addr.equals(new_coord)) handleViewChange(v); } /** * Sends all messages currently in forward_table to the new coordinator (changing the dest field). * This needs to be done, so the underlying reliable unicast protocol (e.g. UNICAST) adds these messages * to its retransmission mechanism
* Note that we need to resend the messages in order of their seqnos ! We also need to prevent other message * from being inserted until we're done, that's why there's synchronization.
* Access to the forward_table doesn't need to be synchronized as there won't be any insertions during flushing * (all down-threads are blocked) */ protected void flushMessagesInForwardTable() { if(is_coord) { for(Map.Entry entry: forward_table.entrySet()) { Long key=entry.getKey(); Message msg=entry.getValue(); Buffer buf; try { buf=Util.streamableToBuffer(msg); } catch(Exception e) { log.error(Util.getMessage("FlushingBroadcastingFailed"), e); continue; } SequencerHeader hdr=new SequencerHeader(SequencerHeader.WRAPPED_BCAST, key); Message forward_msg=new Message(null, buf).putHeader(this.id, hdr); if(log.isTraceEnabled()) log.trace(local_addr + ": flushing (broadcasting) " + local_addr + "::" + key); down_prot.down(forward_msg); } return; } // for forwarded messages, we need to receive the forwarded message from the coordinator, to prevent this case: // - V1={A,B,C} // - A crashes // - C installs V2={B,C} // - C forwards messages 3 and 4 to B (the new coord) // - B drops 3 because its view is still V1 // - B installs V2 // - B receives message 4 and broadcasts it // ==> C's message 4 is delivered *before* message 3 ! // ==> By resending 3 until it is received, then resending 4 until it is received, we make sure this won't happen // (see https://issues.jboss.org/browse/JGRP-1449) while(flushing && running && !forward_table.isEmpty()) { Map.Entry entry=forward_table.firstEntry(); final Long key=entry.getKey(); Message msg=entry.getValue(); Buffer buf; try { buf=Util.streamableToBuffer(msg); } catch(Exception e) { log.error(Util.getMessage("FlushingBroadcastingFailed"), e); continue; } while(flushing && running && !forward_table.isEmpty()) { SequencerHeader hdr=new SequencerHeader(SequencerHeader.FLUSH, key); Message forward_msg=new Message(coord, buf).putHeader(this.id,hdr).setFlag(Message.Flag.DONT_BUNDLE); if(log.isTraceEnabled()) log.trace(local_addr + ": flushing (forwarding) " + local_addr + "::" + key + " to coord " + coord); ack_promise.reset(); down_prot.down(forward_msg); Long ack=ack_promise.getResult(500); if((Objects.equals(ack, key)) || !forward_table.containsKey(key)) break; } } } protected void forwardToCoord(long seqno, Message msg) { if(is_coord) { forward(msg, seqno, false); return; } if(!running || flushing) { forward_table.put(seqno, msg); return; } if(!ack_mode) { forward_table.put(seqno, msg); forward(msg, seqno, false); return; } send_lock.lock(); try { forward_table.put(seqno, msg); while(running && !flushing) { ack_promise.reset(); forward(msg, seqno, true); if(!ack_mode || !running || flushing) break; Long ack=ack_promise.getResult(500); if((Objects.equals(ack, seqno)) || !forward_table.containsKey(seqno)) break; } } finally { send_lock.unlock(); } } protected void forward(final Message msg, long seqno, boolean flush) { Address target=coord; if(target == null) return; byte type=flush? SequencerHeader.FLUSH : SequencerHeader.FORWARD; try { SequencerHeader hdr=new SequencerHeader(type, seqno); Message forward_msg=new Message(target, Util.streamableToBuffer(msg)).putHeader(this.id,hdr); down_prot.down(forward_msg); forwarded_msgs++; } catch(Exception ex) { log.error(Util.getMessage("FailedForwardingMessageTo") + msg.getDest(), ex); } } protected void broadcast(final Message msg, boolean copy, Address original_sender, long seqno, boolean resend) { Message bcast_msg=null; if(!copy) { bcast_msg=msg; // no need to add a header, message already has one } else { SequencerHeader new_hdr=new SequencerHeader(SequencerHeader.WRAPPED_BCAST, seqno); bcast_msg=new Message(null, msg.getRawBuffer(), msg.getOffset(), msg.getLength()).putHeader(this.id, new_hdr); if(resend) { new_hdr.flush_ack=true; bcast_msg.setFlag(Message.Flag.DONT_BUNDLE); } } if(log.isTraceEnabled()) log.trace(local_addr + ": broadcasting " + original_sender + "::" + seqno); down_prot.down(bcast_msg); bcast_msgs++; } /** * Unmarshal the original message (in the payload) and then pass it up (unless already delivered) * @param msg */ protected void unwrapAndDeliver(final Message msg, boolean flush_ack) { try { Message msg_to_deliver=Util.streamableFromBuffer(Message.class, msg.getRawBuffer(), msg.getOffset(), msg.getLength()); SequencerHeader hdr=msg_to_deliver.getHeader(this.id); if(flush_ack) hdr.flush_ack=true; deliver(msg_to_deliver, hdr); } catch(Exception ex) { log.error(Util.getMessage("FailureUnmarshallingBuffer"), ex); } } protected void deliver(Message msg, SequencerHeader hdr) { Address sender=msg.getSrc(); if(sender == null) { if(log.isErrorEnabled()) log.error(local_addr + ": sender is null, cannot deliver " + "::" + hdr.getSeqno()); return; } long msg_seqno=hdr.getSeqno(); if(sender.equals(local_addr)) { forward_table.remove(msg_seqno); if(hdr.flush_ack) { ack_promise.setResult(msg_seqno); if(ack_mode && !flushing && threshold > 0 && ++num_acks >= threshold) { ack_mode=false; num_acks=0; } } } if(!canDeliver(sender, msg_seqno)) { if(log.isWarnEnabled()) log.warn(local_addr + ": dropped duplicate message " + sender + "::" + msg_seqno); return; } if(log.isTraceEnabled()) log.trace(local_addr + ": delivering " + sender + "::" + msg_seqno); up_prot.up(msg); delivered_bcasts++; } /** * Checks if seqno has already been received from sender. This weeds out duplicates. * Note that this method is never called concurrently for the same sender, as the sender in NAKACK will always be * the coordinator. */ protected boolean canDeliver(Address sender, long seqno) { BoundedHashMap seqno_set=delivery_table.get(sender); if(seqno_set == null) { seqno_set=new BoundedHashMap<>(delivery_table_max_size); BoundedHashMap existing=delivery_table.put(sender,seqno_set); if(existing != null) seqno_set=existing; } return seqno_set.add(seqno, seqno); } protected void block() { send_lock.lock(); try { while(flushing && running) { try { send_cond.await(); } catch(InterruptedException e) { } } } finally { send_lock.unlock(); } } protected void unblockAll() { flushing=false; send_lock.lock(); try { send_cond.signalAll(); ack_promise.setResult(null); } finally { send_lock.unlock(); } } protected synchronized void startFlusher(final Address new_coord) { if(flusher == null || !flusher.isAlive()) { if(log.isTraceEnabled()) log.trace(local_addr + ": flushing started"); // causes subsequent message sends (broadcasts and forwards) to block (https://issues.jboss.org/browse/JGRP-1495) flushing=true; flusher=new Flusher(new_coord); flusher.setName("Flusher"); flusher.start(); } } protected void stopFlusher() { flushing=false; Thread tmp=flusher; while(tmp != null && tmp.isAlive()) { tmp.interrupt(); ack_promise.setResult(null); try { tmp.join(); } catch(InterruptedException e) { } } } /* ----------------------------- End of Private Methods -------------------------------- */ protected class Flusher extends Thread { protected final Address new_coord; public Flusher(Address new_coord) { this.new_coord=new_coord; } public void run() { try { flush(new_coord); } catch (InterruptedException e) { } } } public static class SequencerHeader extends Header { protected static final byte FORWARD = 1; protected static final byte FLUSH = 2; protected static final byte BCAST = 3; protected static final byte WRAPPED_BCAST = 4; protected byte type=-1; protected long seqno=-1; protected boolean flush_ack; public SequencerHeader() { } public SequencerHeader(byte type) { this.type=type; } public SequencerHeader(byte type, long seqno) { this(type); this.seqno=seqno; } public short getMagicId() {return 61;} public long getSeqno() { return seqno; } public Supplier create() {return SequencerHeader::new;} public String toString() { StringBuilder sb=new StringBuilder(64); sb.append(printType()); if(seqno >= 0) sb.append(" seqno=" + seqno); if(flush_ack) sb.append(" (flush_ack)"); return sb.toString(); } protected final String printType() { switch(type) { case FORWARD: return "FORWARD"; case FLUSH: return "FLUSH"; case BCAST: return "BCAST"; case WRAPPED_BCAST: return "WRAPPED_BCAST"; default: return "n/a"; } } public void writeTo(DataOutput out) throws Exception { out.writeByte(type); Bits.writeLong(seqno,out); out.writeBoolean(flush_ack); } public void readFrom(DataInput in) throws Exception { type=in.readByte(); seqno=Bits.readLong(in); flush_ack=in.readBoolean(); } public int serializedSize() { return Global.BYTE_SIZE + Bits.size(seqno) + Global.BYTE_SIZE; // type + seqno + flush_ack } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy