org.jgroups.protocols.MERGE3 Maven / Gradle / Ivy
package org.jgroups.protocols;
import org.jgroups.*;
import org.jgroups.annotations.MBean;
import org.jgroups.annotations.ManagedAttribute;
import org.jgroups.annotations.ManagedOperation;
import org.jgroups.annotations.Property;
import org.jgroups.stack.Protocol;
import org.jgroups.util.*;
import org.jgroups.util.UUID;
import java.io.DataInput;
import java.io.DataOutput;
import java.util.*;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.Future;
import java.util.function.Supplier;
import java.util.stream.Collectors;
/**
* Protocol to discover subgroups; e.g., existing due to a network partition (that healed). Example: group
* {p,q,r,s,t,u,v,w} is split into 3 subgroups {p,q}, {r,s,t,u} and {v,w}. This protocol will eventually send
* a MERGE event with the views of each subgroup up the stack: {p,r,v}.
* Works as follows (https://issues.jboss.org/browse/JGRP-1387): every member periodically broadcasts its address (UUID),
* logical name, physical address and ViewID information. Other members collect this information and see if the ViewIds
* are different (indication of different subpartitions). If they are, the member with the lowest address (first in the
* sorted list of collected addresses) sends a MERGE event up the stack, which will be handled by GMS.
* The others do nothing.
* The advantage compared to MERGE2 is that there are no merge collisions caused by multiple merges going on.
* Also, the INFO traffic is spread out over max_interval, and every member sends its physical address with INFO, so
* we don't need to fetch the physical address first.
*
* @author Bela Ban, Nov 2011
* @since 3.1
*/
@MBean(description="Protocol to discover subgroups existing due to a network partition")
public class MERGE3 extends Protocol {
/* ----------------------------------------- Properties -------------------------------------------------- */
@Property(description="Minimum time in ms before sending an info message")
protected long min_interval=1000;
@Property(description="Interval (in milliseconds) when the next info " +
"message will be sent. A random value is picked from range [1..max_interval]")
protected long max_interval=10000;
@Property(description="The max number of merge participants to be involved in a merge. 0 sets this to unlimited.")
protected int max_participants_in_merge=100;
@Property(description="If true, only coordinators periodically check view consistency, otherwise everybody runs " +
"this task (https://issues.jboss.org/browse/JGRP-2092). Might get removed without notice.")
protected boolean only_coords_run_consistency_checker=false;
/* ---------------------------------------------- JMX -------------------------------------------------------- */
@Property(description="Interval (in ms) after which we check for view inconsistencies")
protected long check_interval;
@ManagedAttribute(description="Number of cached ViewIds")
public int getViews() {return views.size();}
/* --------------------------------------------- Fields ------------------------------------------------------ */
protected Address local_addr;
protected volatile View view;
protected TimeScheduler timer;
protected final InfoSender info_sender=new InfoSender();
protected Future> info_sender_future;
protected Future> view_consistency_checker;
// hashmap to keep track of view-id sent in INFO messages. Keys=senders, values = ViewId sent
protected final Map views=new HashMap<>();
protected final ResponseCollector view_rsps=new ResponseCollector<>();
protected boolean transport_supports_multicasting=true;
protected String cluster_name;
@ManagedAttribute(description="Whether or not the current member is the coordinator")
protected volatile boolean is_coord;
@ManagedAttribute(description="Number of times a MERGE event was sent up the stack")
protected int num_merge_events;
public int getNumMergeEvents() {return num_merge_events;}
@ManagedAttribute(description="Is the view consistency checker task running")
public synchronized boolean isViewConsistencyCheckerRunning() {
return view_consistency_checker != null && !view_consistency_checker.isDone();
}
@ManagedAttribute(description="Is the view consistency checker task running")
public boolean isMergeTaskRunning() {return isViewConsistencyCheckerRunning();}
@ManagedAttribute(description="Is the info sender task running")
public synchronized boolean isInfoSenderRunning() {
return info_sender_future != null && !info_sender_future.isDone();
}
@ManagedOperation(description="Lists the contents of the cached views")
public String dumpViews() {
StringBuilder sb=new StringBuilder();
for(Map.Entry> entry: convertViews().entrySet())
sb.append(entry.getKey()).append(": [")
.append(Util.printListWithDelimiter(entry.getValue(), ", ", Util.MAX_LIST_PRINT_SIZE)).append("]");
return sb.toString();
}
@ManagedOperation(description="Clears the views cache")
public void clearViews() {synchronized(views) {views.clear();}}
@ManagedOperation(description="Send INFO")
public void sendInfo() {
new InfoSender().run();
}
@ManagedOperation(description="Check views for inconsistencies")
public void checkInconsistencies() {new ViewConsistencyChecker().run();}
public void init() throws Exception {
timer=getTransport().getTimer();
if(timer == null)
throw new Exception("timer cannot be retrieved");
if(min_interval >= max_interval)
throw new IllegalArgumentException("min_interval (" + min_interval + ") has to be < max_interval (" + max_interval + ")");
if(check_interval == 0)
check_interval=computeCheckInterval();
else {
if(check_interval <= max_interval) {
log.warn("set check_interval=%d as it is <= max_interval", computeCheckInterval());
check_interval=computeCheckInterval();
}
}
if(max_interval <= 0)
throw new Exception("max_interval must be > 0");
transport_supports_multicasting=getTransport().supportsMulticasting();
}
public void stop() {
super.stop();
is_coord=false;
stopViewConsistencyChecker();
stopInfoSender();
}
public long getMinInterval() {
return min_interval;
}
public MERGE3 setMinInterval(long i) {
if(min_interval < 0 || min_interval >= max_interval)
throw new IllegalArgumentException("min_interval (" + min_interval + ") has to be < max_interval (" + max_interval + ")");
min_interval=i;
return this;
}
public long getMaxInterval() {
return max_interval;
}
public MERGE3 setMaxInterval(long val) {
if(val <= 0)
throw new IllegalArgumentException("max_interval must be > 0");
max_interval=val;
check_interval=computeCheckInterval();
return this;
}
protected long computeCheckInterval() {
return (long)(max_interval * 1.6);
}
protected boolean isMergeRunning() {
Object retval=up_prot.up(new Event(Event.IS_MERGE_IN_PROGRESS));
return retval instanceof Boolean && (Boolean)retval;
}
protected synchronized void startInfoSender() {
if(info_sender_future == null || info_sender_future.isDone())
info_sender_future=timer.scheduleWithDynamicInterval(info_sender, getTransport() instanceof TCP);
}
protected synchronized void stopInfoSender() {
if(info_sender_future != null) {
info_sender_future.cancel(true);
info_sender.stop();
info_sender_future=null;
}
}
protected synchronized void startViewConsistencyChecker() {
if(view_consistency_checker == null || view_consistency_checker.isDone())
view_consistency_checker=timer.scheduleWithDynamicInterval(new ViewConsistencyChecker());
}
protected synchronized void stopViewConsistencyChecker() {
if(view_consistency_checker != null) {
view_consistency_checker.cancel(true);
view_consistency_checker=null;
}
}
public Object down(Event evt) {
switch(evt.getType()) {
case Event.CONNECT:
case Event.CONNECT_USE_FLUSH:
case Event.CONNECT_WITH_STATE_TRANSFER:
case Event.CONNECT_WITH_STATE_TRANSFER_USE_FLUSH:
cluster_name=evt.getArg();
break;
case Event.DISCONNECT:
stopViewConsistencyChecker();
stopInfoSender();
break;
case Event.TMP_VIEW:
stopViewConsistencyChecker();
stopInfoSender();
break;
case Event.VIEW_CHANGE:
stopViewConsistencyChecker(); // should already be stopped
stopInfoSender(); // should already be stopped
Object ret=down_prot.down(evt);
view=evt.getArg();
clearViews();
if(ergonomics && max_participants_in_merge > 0)
max_participants_in_merge=Math.max(100, view.size() / 3);
startInfoSender();
if(only_coords_run_consistency_checker == false)
startViewConsistencyChecker();
Address coord=view.getCoord();
if(Objects.equals(coord, local_addr)) {
is_coord=true;
if(only_coords_run_consistency_checker)
startViewConsistencyChecker(); // start task if we became coordinator (doesn't start if already running)
}
else {
// if we were coordinator, but are no longer, stop task. this happens e.g. when we merge and someone
// else becomes the new coordinator of the merged group
is_coord=false;
clearViews();
}
return ret;
case Event.SET_LOCAL_ADDRESS:
local_addr=evt.getArg();
break;
}
return down_prot.down(evt);
}
public Object up(Message msg) {
MergeHeader hdr=msg.getHeader(getId());
if(hdr == null)
return up_prot.up(msg);
Address sender=msg.getSrc();
switch(hdr.type) {
case INFO:
addInfo(sender, hdr.view_id, hdr.logical_name, hdr.physical_addr);
break;
case VIEW_REQ:
Message view_rsp=new Message(sender).setFlag(Message.Flag.INTERNAL)
.putHeader(getId(), MergeHeader.createViewResponse()).setBuffer(marshal(view));
down_prot.down(view_rsp);
break;
case VIEW_RSP:
View tmp_view=readView(msg.getRawBuffer(), msg.getOffset(), msg.getLength());
if(tmp_view != null)
view_rsps.add(sender, tmp_view);
break;
default:
log.error("Type %s not known", hdr.type);
}
return null;
}
public static List detectDifferentViews(Map map) {
final List ret=new ArrayList<>();
for(View view: map.values()) {
if(view == null)
continue;
ViewId vid=view.getViewId();
if(!Util.containsViewId(ret, vid))
ret.add(view);
}
return ret;
}
public static Buffer marshal(View view) {
return Util.streamableToBuffer(view);
}
protected View readView(byte[] buffer, int offset, int length) {
try {
return buffer != null? Util.streamableFromBuffer(View.class, buffer, offset, length) : null;
}
catch(Exception ex) {
log.error("%s: failed reading View from message: %s", local_addr, ex);
return null;
}
}
protected MergeHeader createInfo() {
PhysicalAddress physical_addr=local_addr != null?
(PhysicalAddress)down_prot.down(new Event(Event.GET_PHYSICAL_ADDRESS, local_addr)) : null;
return MergeHeader.createInfo(view.getViewId(), NameCache.get(local_addr), physical_addr);
}
/** Adds received INFO to views hashmap */
protected void addInfo(Address sender, ViewId view_id, String logical_name, PhysicalAddress physical_addr) {
if(logical_name != null && sender instanceof UUID)
NameCache.add(sender, logical_name);
if(physical_addr != null)
down(new Event(Event.ADD_PHYSICAL_ADDRESS, new Tuple<>(sender, physical_addr)));
synchronized(views) {
ViewId existing=views.get(sender);
if(existing == null || existing.compareTo(view_id) < 0)
views.put(sender, view_id);
}
}
protected Map> convertViews() {
Map> retval=new HashMap<>();
synchronized(views) {
for(Map.Entry entry : views.entrySet()) {
Address key=entry.getKey();
ViewId view_id=entry.getValue();
Set existing=retval.get(view_id);
if(existing == null)
retval.put(view_id, existing=new ConcurrentSkipListSet<>());
existing.add(key);
}
}
return retval;
}
protected boolean differentViewIds() {
ViewId first=null;
synchronized(views) {
for(ViewId view_id : views.values()) {
if(first == null)
first=view_id;
else if(!first.equals(view_id))
return true;
}
}
return false;
}
protected class InfoSender implements TimeScheduler.Task {
protected final long discovery_timeout=(max_interval + min_interval) /2;
protected Responses rsps;
public void run() {
if(view == null) {
log.warn("view is null, cannot send INFO message");
return;
}
MergeHeader hdr=createInfo();
// not needed; this is done below in ViewConsistencyChecker
// addInfo(local_addr, hdr.view_id, hdr.logical_name, hdr.physical_addr);
if(transport_supports_multicasting) { // mcast the discovery request to all but self
Message msg=new Message().setFlag(Message.Flag.INTERNAL).putHeader(getId(), hdr)
.setTransientFlag(Message.TransientFlag.DONT_LOOPBACK);
down_prot.down(msg);
return;
}
rsps=(Responses)down_prot.down(Event.FIND_MBRS_EVT);
rsps.waitFor(discovery_timeout); // return immediately if done
rsps.done();
if(rsps.isEmpty())
return;
log.trace("discovery protocol returned %d responses: %s", rsps.size(), rsps);
for(PingData rsp: rsps) {
Address target=rsp.getAddress();
if(local_addr.equals(target))
continue; // skip discovery request to self
Address dest=rsp.getPhysicalAddr();
if(dest == null) continue;
Message info=new Message(dest).setFlag(Message.Flag.INTERNAL).putHeader(getId(), hdr);
down_prot.down(info);
}
}
public void stop() {
if(rsps != null)
rsps.clear();
}
public long nextInterval() {
return Math.max(min_interval, Util.random(max_interval) + max_interval/2);
}
public String toString() {
return MERGE3.class.getSimpleName() + ": " + getClass().getSimpleName();
}
}
protected class ViewConsistencyChecker implements TimeScheduler.Task {
public void run() {
try {
MergeHeader hdr=createInfo();
addInfo(local_addr, hdr.view_id, hdr.logical_name, hdr.physical_addr);
if(!differentViewIds()) {
log.trace("%s: found no inconsistent views: %s", local_addr, dumpViews());
return;
}
_run();
}
finally {
clearViews();
}
}
protected void _run() {
SortedSet coords=new TreeSet<>();
// Only add view creators which *are* actually in the set as well, e.g.
// A|4: {A,B,C} and
// B|4: {D} would only add A to the coords list. A is a real coordinator
Map> converted_views=convertViews();
for(Map.Entry> entry: converted_views.entrySet()) {
Address coord=entry.getKey().getCreator();
Set members=entry.getValue();
if(only_coords_run_consistency_checker && members != null && members.contains(coord))
coords.add(coord);
else
coords.add(coord);
}
Address merge_leader=coords.isEmpty() ? null : coords.first();
if(merge_leader == null || local_addr == null || !merge_leader.equals(local_addr)) {
log.trace("I (%s) won't be the merge leader", local_addr);
return;
}
log.debug("I (%s) will be the merge leader", local_addr);
// add merge participants
coords.addAll(converted_views.values().stream().filter(set -> !set.isEmpty()).map(set -> set.iterator().next()).collect(Collectors.toList()));
if(coords.size() <= 1) {
log.trace("cancelling merge as we only have 1 coordinator: %s", coords);
return;
}
log.trace("merge participants are %s", coords);
if(max_participants_in_merge > 0 && coords.size() > max_participants_in_merge) {
int old_size=coords.size();
for(Iterator it=coords.iterator(); it.hasNext();) {
Address next=it.next();
if(next.equals(merge_leader))
continue;
if(coords.size() > max_participants_in_merge)
it.remove();
}
log.trace("%s: reduced %d coords to %d", local_addr, old_size, max_participants_in_merge);
}
// grab views from all members in coords
view_rsps.reset(coords);
for(Address target: coords) {
if(target.equals(local_addr)) {
if(view != null)
view_rsps.add(local_addr, view);
continue;
}
Message view_req=new Message(target).setFlag(Message.Flag.INTERNAL)
.putHeader(getId(), MergeHeader.createViewRequest());
down_prot.down(view_req);
}
view_rsps.waitForAllResponses(check_interval / 10);
Map results=view_rsps.getResults();
Map merge_views=new HashMap<>();
results.entrySet().stream().filter(entry -> entry.getValue() != null).forEach(entry -> merge_views.put(entry.getKey(), entry.getValue()));
view_rsps.reset();
if(merge_views.size() >= 2) {
Collection tmp_views=merge_views.values();
if(Util.allEqual(tmp_views)) {
log.trace("%s: all views are the same, suppressing sending MERGE up. Views: %s", local_addr, tmp_views);
return;
}
up_prot.up(new Event(Event.MERGE, merge_views));
num_merge_events++;
}
}
public long nextInterval() {
return check_interval;
}
public String toString() {
return MERGE3.class.getSimpleName() + ": " + getClass().getSimpleName();
}
}
public static class MergeHeader extends Header {
protected Type type=Type.INFO;
protected ViewId view_id;
protected String logical_name;
protected PhysicalAddress physical_addr;
public MergeHeader() {
}
public static MergeHeader createInfo(ViewId view_id, String logical_name, PhysicalAddress physical_addr) {
return new MergeHeader(Type.INFO, view_id, logical_name, physical_addr);
}
public static MergeHeader createViewRequest() {
return new MergeHeader(Type.VIEW_REQ, null, null, null);
}
public static MergeHeader createViewResponse() {
return new MergeHeader(Type.VIEW_RSP, null, null, null);
}
protected MergeHeader(Type type, ViewId view_id, String logical_name, PhysicalAddress physical_addr) {
this.type=type;
this.view_id=view_id;
this.logical_name=logical_name;
this.physical_addr=physical_addr;
}
public short getMagicId() {return 75;}
public Supplier extends Header> create() {return MergeHeader::new;}
public int serializedSize() {
int retval=Global.BYTE_SIZE; // for the type
retval+=Util.size(view_id);
retval+=Global.BYTE_SIZE; // presence byte for logical_name
if(logical_name != null)
retval+=logical_name.length() +2;
retval+=Util.size(physical_addr);
return retval;
}
public void writeTo(DataOutput outstream) throws Exception {
outstream.writeByte(type.ordinal()); // a byte if ok as we only have 3 types anyway
Util.writeViewId(view_id,outstream);
Bits.writeString(logical_name,outstream);
Util.writeAddress(physical_addr, outstream);
}
@SuppressWarnings("unchecked")
public void readFrom(DataInput instream) throws Exception {
type=Type.values()[instream.readByte()];
view_id=Util.readViewId(instream);
logical_name=Bits.readString(instream);
physical_addr=(PhysicalAddress)Util.readAddress(instream);
}
public String toString() {
StringBuilder sb=new StringBuilder();
sb.append(type + ": ");
if(view_id != null)
sb.append("view_id=" + view_id);
sb.append(", logical_name=" + logical_name + ", physical_addr=" + physical_addr);
return sb.toString();
}
protected enum Type {INFO, VIEW_REQ, VIEW_RSP}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy