org.jgroups.protocols.Discovery Maven / Gradle / Ivy
package org.jgroups.protocols;
import org.jgroups.*;
import org.jgroups.annotations.MBean;
import org.jgroups.annotations.ManagedAttribute;
import org.jgroups.annotations.ManagedOperation;
import org.jgroups.annotations.Property;
import org.jgroups.conf.ConfiguratorFactory;
import org.jgroups.stack.IpAddress;
import org.jgroups.stack.Protocol;
import org.jgroups.util.*;
import org.jgroups.util.UUID;
import java.io.FileOutputStream;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.*;
import java.util.concurrent.TimeUnit;
/**
* The Discovery protocol retrieves the initial membership (used by GMS and MERGE3) by sending discovery requests.
* We do this in subclasses of Discovery, e.g. by mcasting a discovery request ({@link PING}) or, if gossiping is enabled,
* by contacting the GossipRouter ({@link TCPGOSSIP}).
* The responses should allow us to determine the coordinator which we have to contact, e.g. in case we want to join
* the group, or to see if we have diverging views in case of MERGE2.
* When we are a server (after having received the BECOME_SERVER event), we'll respond to discovery requests with
* a discovery response.
*
* @author Bela Ban
*/
@MBean
public abstract class Discovery extends Protocol {
/* ----------------------------------------- Properties -------------------------------------------------- */
@Property(description="Return from the discovery phase as soon as we have 1 coordinator response")
protected boolean break_on_coord_rsp=true;
@Property(description="Whether or not to return the entire logical-physical address cache mappings on a " +
"discovery request, or not.")
protected boolean return_entire_cache;
@Property(description="If greater than 0, we'll wait a random number of milliseconds in range [0..stagger_timeout] " +
"before sending a discovery response. This prevents traffic spikes in large clusters when everyone sends their " +
"discovery response at the same time")
protected long stagger_timeout;
@Property(description="If a persistent disk cache (PDC) is present, combine the discovery results with the " +
"contents of the disk cache before returning the results")
protected boolean use_disk_cache;
@Property(description="Max size of the member list shipped with a discovery request. If we have more, the " +
"mbrs field in the discovery request header is nulled and members return the entire membership, " +
"not individual members")
protected int max_members_in_discovery_request=500;
@Property(description="Expiry time of discovery responses in ms")
protected long discovery_rsp_expiry_time=60000;
@Property(description="If true then the discovery is done on a separate timer thread. Should be set to true when " +
"discovery is blocking and/or takes more than a few milliseconds")
protected boolean async_discovery;
@Property(description="If enabled, use a separate thread for every discovery request. Can be used with or without " +
"async_discovery")
protected boolean async_discovery_use_separate_thread_per_request;
@Property(description="When a new node joins, and we have a static discovery protocol (TCPPING), then send the " +
"contents of the discovery cache to new and existing members if true (and we're the coord). Addresses JGRP-1903")
protected boolean send_cache_on_join;
@Property(description="The max rank of this member to respond to discovery requests, e.g. if " +
"max_rank_to_reply=2 in {A,B,C,D,E}, only A (rank 1) and B (rank 2) will reply. A value <= 0 means " +
"everybody will reply. This attribute is ignored if TP.use_ip_addrs is false.")
protected int max_rank_to_reply;
/* --------------------------------------------- JMX ------------------------------------------------------ */
@ManagedAttribute(description="Total number of discovery requests sent ")
protected int num_discovery_requests;
/* --------------------------------------------- Fields ------------------------------------------------------ */
protected volatile boolean is_server;
protected volatile boolean is_leaving;
protected TimeScheduler timer;
protected volatile View view;
@ManagedAttribute(description="Whether this member is the current coordinator")
protected volatile boolean is_coord;
protected volatile Address local_addr;
protected volatile Address current_coord;
protected String cluster_name;
protected final Map ping_responses=new HashMap<>();
@ManagedAttribute(description="Whether the transport supports multicasting")
protected boolean transport_supports_multicasting=true;
protected boolean use_ip_addrs; // caches TP.use_ip_addrs
@ManagedAttribute(description="True if sending a message can block at the transport level")
protected boolean sends_can_block=true;
protected static final byte[] WHITESPACE=" \t".getBytes();
public void init() throws Exception {
TP tp=getTransport();
timer=tp.getTimer();
if(timer == null)
throw new Exception("timer cannot be retrieved from protocol stack");
if(stagger_timeout < 0)
throw new IllegalArgumentException("stagger_timeout cannot be negative");
transport_supports_multicasting=tp.supportsMulticasting();
sends_can_block=getTransport() instanceof TCP; // UDP and TCP_NIO2 won't block
use_ip_addrs=tp.getUseIpAddresses();
}
public abstract boolean isDynamic();
public void handleDisconnect() {
}
public void handleConnect() {
}
public void discoveryRequestReceived(Address sender, String logical_name, PhysicalAddress physical_addr) {
}
public int getNumberOfDiscoveryRequestsSent() {return num_discovery_requests;}
public boolean breakOnCoordResponse() {return break_on_coord_rsp;}
public Discovery breakOnCoordResponse(boolean flag) {break_on_coord_rsp=flag; return this;}
public boolean returnEntireCache() {return return_entire_cache;}
public Discovery returnEntireCache(boolean flag) {return_entire_cache=flag; return this;}
public long staggerTimeout() {return stagger_timeout;}
public Discovery staggerTimeout(long timeout) {stagger_timeout=timeout; return this;}
public boolean useDiskCache() {return use_disk_cache;}
public Discovery useDiskCache(boolean flag) {use_disk_cache=flag; return this;}
public Discovery discoveryRspExpiryTime(long t) {this.discovery_rsp_expiry_time=t; return this;}
@ManagedAttribute
public String getView() {return view != null? view.getViewId().toString() : "null";}
public ViewId getViewId() {
return view != null? view.getViewId() : null;
}
@ManagedAttribute(description="The address of the current coordinator")
public String getCurrentCoord() {return current_coord != null? current_coord.toString() : "n/a";}
protected boolean isMergeRunning() {
Object retval=up_prot.up(new Event(Event.IS_MERGE_IN_PROGRESS));
return retval instanceof Boolean && (Boolean)retval;
}
@ManagedOperation(description="Sends information about my cache to everyone but myself")
public void sendCacheInformation() {
List current_members=new ArrayList<>(view.getMembers());
disseminateDiscoveryInformation(current_members, null, current_members);
}
public List providedUpServices() {
return Arrays.asList(Event.FIND_INITIAL_MBRS, Event.GET_PHYSICAL_ADDRESS, Event.FIND_MBRS);
}
public void resetStats() {
super.resetStats();
num_discovery_requests=0;
}
public void start() throws Exception {
super.start();
}
public void stop() {
is_server=false;
}
/**
* Fetches information (e.g. physical address, logical name) for the given member addresses. Needs to add responses
* to the {@link org.jgroups.util.Responses} object. If {@link #async_discovery} is true, this method will be called
* in a separate thread, otherwise the caller's thread will be used.
* @param members A list of logical addresses (typically {@link org.jgroups.util.UUID}s). If null, then information
* for all members is fetched
* @param initial_discovery Set to true if this is for the initial membership discovery. Some protocols (e.g.
* file based ones) may return only the information for the coordinator(s).
* @param responses The list to which responses should be added
*/
protected abstract void findMembers(List members, boolean initial_discovery, Responses responses);
public Responses findMembers(final List members, final boolean initial_discovery, boolean async) {
num_discovery_requests++;
int num_expected=members != null? members.size() : 0;
int capacity=members != null? members.size() : 16;
final Responses rsps=new Responses(num_expected, initial_discovery && break_on_coord_rsp, capacity);
synchronized(ping_responses) {
ping_responses.put(System.nanoTime(), rsps);
}
if(async || async_discovery)
timer.execute(() -> findMembers(members, initial_discovery, rsps));
else
findMembers(members, initial_discovery, rsps);
weedOutCompletedDiscoveryResponses();
return rsps;
}
@ManagedOperation(description="Runs the discovery protocol to find initial members")
public String findInitialMembersAsString() {
Responses rsps=findMembers(null, false, false);
if(!rsps.isDone())
rsps.waitFor(300);
if(rsps.isEmpty()) return "";
StringBuilder sb=new StringBuilder();
for(PingData rsp: rsps)
sb.append(rsp).append("\n");
return sb.toString();
}
@ManagedOperation(description="Reads logical-physical address mappings and logical name mappings from a " +
"file (or URL) and adds them to the local caches")
public void addToCache(String filename) throws Exception {
InputStream in=ConfiguratorFactory.getConfigStream(filename);
List list=read(in);
if(list != null)
for(PingData data: list)
addDiscoveryResponseToCaches(data.getAddress(), data.getLogicalName(), data.getPhysicalAddr());
}
@ManagedOperation(description="Reads data from local caches and dumps them to a file")
public void dumpCache(String output_filename) throws Exception {
Map cache_contents=
(Map)down_prot.down(new Event(Event.GET_LOGICAL_PHYSICAL_MAPPINGS, false));
List list=new ArrayList<>(cache_contents.size());
for(Map.Entry entry: cache_contents.entrySet()) {
Address addr=entry.getKey();
PhysicalAddress phys_addr=entry.getValue();
PingData data=new PingData(addr, true, NameCache.get(addr), phys_addr).coord(addr.equals(local_addr));
list.add(data);
}
OutputStream out=new FileOutputStream(output_filename);
write(list, out);
}
public Object up(Event evt) {
switch(evt.getType()) {
case Event.FIND_MBRS:
return findMembers(evt.getArg(), false, true); // this is done asynchronously
}
return up_prot.up(evt);
}
public Object up(Message msg) {
PingHeader hdr=msg.getHeader(this.id);
if(hdr == null)
return up_prot.up(msg);
if(is_leaving)
return null; // prevents merging back a leaving member (https://issues.jboss.org/browse/JGRP-1336)
PingData data=readPingData(msg.getRawBuffer(), msg.getOffset(), msg.getLength());
Address logical_addr=data != null? data.getAddress() : msg.src();
switch(hdr.type) {
case PingHeader.GET_MBRS_REQ: // return Rsp(local_addr, coord)
if(cluster_name == null || hdr.cluster_name == null) {
log.warn("cluster_name (%s) or cluster_name of header (%s) is null; passing up discovery " +
"request from %s, but this should not be the case", cluster_name, hdr.cluster_name, msg.src());
}
else {
if(!cluster_name.equals(hdr.cluster_name)) {
log.warn("%s: discarding discovery request for cluster '%s' from %s; " +
"our cluster name is '%s'. Please separate your clusters properly",
logical_addr, hdr.cluster_name, msg.src(), cluster_name);
return null;
}
}
// add physical address and logical name of the discovery sender (if available) to the cache
if(data != null) { // null if use_ip_addrs is true
addDiscoveryResponseToCaches(logical_addr, data.getLogicalName(), data.getPhysicalAddr());
discoveryRequestReceived(msg.getSrc(), data.getLogicalName(), data.getPhysicalAddr());
addResponse(data, false);
}
if(return_entire_cache) {
Map cache=(Map)down(new Event(Event.GET_LOGICAL_PHYSICAL_MAPPINGS));
if(cache != null) {
for(Map.Entry entry: cache.entrySet()) {
Address addr=entry.getKey();
// JGRP-1492: only return our own address, and addresses in view.
if(addr.equals(local_addr) || view.containsMember(addr)) {
PhysicalAddress physical_addr=entry.getValue();
sendDiscoveryResponse(addr, physical_addr, NameCache.get(addr), msg.getSrc(), is_coord);
}
}
}
return null;
}
// Only send a response if hdr.mbrs is not empty and contains myself. Otherwise always send my info
Collection extends Address> mbrs=data != null? data.mbrs() : null;
boolean drop_because_of_rank=use_ip_addrs && max_rank_to_reply > 0 && hdr.initialDiscovery() && Util.getRank(view, local_addr) > max_rank_to_reply;
if(drop_because_of_rank || (mbrs != null && !mbrs.contains(local_addr)))
return null;
PhysicalAddress physical_addr=(PhysicalAddress)down(new Event(Event.GET_PHYSICAL_ADDRESS, local_addr));
sendDiscoveryResponse(local_addr, physical_addr, NameCache.get(local_addr), msg.getSrc(), is_coord);
return null;
case PingHeader.GET_MBRS_RSP:
// add physical address (if available) to transport's cache
if(data != null) {
log.trace("%s: received GET_MBRS_RSP from %s: %s", local_addr, msg.src(), data);
handleDiscoveryResponse(data, msg.src());
}
return null;
default:
log.warn("got PING header with unknown type %d", hdr.type);
return null;
}
}
protected void handleDiscoveryResponse(PingData data, Address sender) {
// add physical address (if available) to transport's cache
Address logical_addr=data.getAddress() != null? data.getAddress() : sender;
addDiscoveryResponseToCaches(logical_addr, data.getLogicalName(), data.getPhysicalAddr());
boolean overwrite=Objects.equals(logical_addr, sender);
addResponse(data, overwrite);
}
@SuppressWarnings("unchecked")
public Object down(Event evt) {
switch(evt.getType()) {
case Event.FIND_INITIAL_MBRS: // sent by GMS layer
return findMembers(null, true, false); // triggered by JOIN process (ClientGmsImpl)
case Event.FIND_MBRS:
return findMembers(evt.getArg(), false, false); // triggered by MERGE3
case Event.VIEW_CHANGE:
View old_view=view;
view=evt.getArg();
current_coord=view.getCoord();
is_coord=current_coord != null && local_addr != null && current_coord.equals(local_addr);
Object retval=down_prot.down(evt);
if(send_cache_on_join && !isDynamic() && is_coord) {
List curr_mbrs, left_mbrs, new_mbrs;
curr_mbrs=new ArrayList<>(view.getMembers());
left_mbrs=View.leftMembers(old_view, view);
new_mbrs=View.newMembers(old_view, view);
startCacheDissemination(curr_mbrs, left_mbrs, new_mbrs); // separate task
}
return retval;
case Event.BECOME_SERVER: // called after client has joined and is fully working group member
down_prot.down(evt);
is_server=true;
return null;
case Event.SET_LOCAL_ADDRESS:
local_addr=evt.getArg();
return down_prot.down(evt);
case Event.CONNECT:
case Event.CONNECT_WITH_STATE_TRANSFER:
case Event.CONNECT_USE_FLUSH:
case Event.CONNECT_WITH_STATE_TRANSFER_USE_FLUSH:
is_leaving=false;
cluster_name=evt.getArg();
Object ret=down_prot.down(evt);
handleConnect();
return ret;
case Event.DISCONNECT:
is_leaving=true;
handleDisconnect();
return down_prot.down(evt);
default:
return down_prot.down(evt); // Pass on to the layer below us
}
}
/* -------------------------- Private methods ---------------------------- */
protected List read(InputStream in) {
List retval=null;
try {
while(true) {
try {
String name_str=Util.readToken(in);
String uuid_str=Util.readToken(in);
String addr_str=Util.readToken(in);
String coord_str=Util.readToken(in);
if(name_str == null || uuid_str == null || addr_str == null || coord_str == null)
break;
UUID uuid=null;
try {
long tmp=Long.valueOf(uuid_str);
uuid=new UUID(0, tmp);
}
catch(Throwable t) {
uuid=UUID.fromString(uuid_str);
}
PhysicalAddress phys_addr=new IpAddress(addr_str);
boolean is_coordinator=coord_str.trim().equals("T") || coord_str.trim().equals("t");
if(retval == null)
retval=new ArrayList<>();
retval.add(new PingData(uuid, true, name_str, phys_addr).coord(is_coordinator));
}
catch(Throwable t) {
log.error(Util.getMessage("FailedReadingLineOfInputStream"), t);
}
}
return retval;
}
finally {
Util.close(in);
}
}
protected void write(List list, OutputStream out) throws Exception {
try {
for(PingData data: list) {
String logical_name=data.getLogicalName();
Address addr=data.getAddress();
PhysicalAddress phys_addr=data.getPhysicalAddr();
if(logical_name == null || addr == null || phys_addr == null)
continue;
out.write(logical_name.getBytes());
out.write(WHITESPACE);
out.write(addressAsString(addr).getBytes());
out.write(WHITESPACE);
out.write(phys_addr.toString().getBytes());
out.write(WHITESPACE);
out.write(data.isCoord()? String.format("T%n").getBytes() : String.format("F%n").getBytes());
}
}
finally {
Util.close(out);
}
}
protected void addResponse(PingData rsp, boolean overwrite) {
synchronized(ping_responses) {
for(Iterator> it=ping_responses.entrySet().iterator(); it.hasNext();) {
Map.Entry entry=it.next();
long timestamp=entry.getKey();
Responses rsps=entry.getValue();
rsps.addResponse(rsp, overwrite);
if(rsps.isDone() || TimeUnit.MILLISECONDS.convert(System.nanoTime() - timestamp, TimeUnit.NANOSECONDS) > discovery_rsp_expiry_time) {
it.remove();
rsps.done();
}
}
}
}
/** Removes responses which are done or whose timeout has expired (in the latter case, an expired response is marked as done) */
@ManagedOperation(description="Removes expired or completed responses")
public void weedOutCompletedDiscoveryResponses() {
synchronized(ping_responses) {
for(Iterator> it=ping_responses.entrySet().iterator(); it.hasNext();) {
Map.Entry entry=it.next();
long timestamp=entry.getKey();
Responses rsps=entry.getValue();
if(rsps.isDone() || TimeUnit.MILLISECONDS.convert(System.nanoTime() - timestamp, TimeUnit.NANOSECONDS) > discovery_rsp_expiry_time) {
it.remove();
rsps.done();
}
}
}
}
protected boolean addDiscoveryResponseToCaches(Address mbr, String logical_name, PhysicalAddress physical_addr) {
if(mbr == null)
return false;
if(logical_name != null)
NameCache.add(mbr, logical_name);
if(physical_addr != null)
return (Boolean)down(new Event(Event.ADD_PHYSICAL_ADDRESS, new Tuple<>(mbr, physical_addr)));
return false;
}
protected synchronized void startCacheDissemination(List curr_mbrs, List left_mbrs, List new_mbrs) {
timer.execute(new DiscoveryCacheDisseminationTask(curr_mbrs,left_mbrs,new_mbrs), sends_can_block);
}
/**
* Creates a byte[] representation of the PingData, but DISCARDING the view it contains.
* @param data the PingData instance to serialize.
* @return
*/
protected byte[] serializeWithoutView(PingData data) {
final PingData clone = new PingData(data.getAddress(), data.isServer(), data.getLogicalName(), data.getPhysicalAddr()).coord(data.isCoord());
try {
return Util.streamableToByteBuffer(clone);
}
catch(Exception e) {
log.error(Util.getMessage("ErrorSerializingPingData"), e);
return null;
}
}
protected static PingData deserialize(final byte[] data) throws Exception {
return (PingData)Util.streamableFromByteBuffer(PingData.class, data);
}
public static Buffer marshal(PingData data) {
return Util.streamableToBuffer(data);
}
protected PingData readPingData(byte[] buffer, int offset, int length) {
try {
return buffer != null? Util.streamableFromBuffer(PingData.class, buffer, offset, length) : null;
}
catch(Exception ex) {
log.error("%s: failed reading PingData from message: %s", local_addr, ex);
return null;
}
}
protected void sendDiscoveryResponse(Address logical_addr, PhysicalAddress physical_addr,
String logical_name, final Address sender, boolean coord) {
final PingData data=new PingData(logical_addr, is_server, logical_name, physical_addr).coord(coord);
final Message rsp_msg=new Message(sender).setFlag(Message.Flag.INTERNAL, Message.Flag.OOB, Message.Flag.DONT_BUNDLE)
.putHeader(this.id, new PingHeader(PingHeader.GET_MBRS_RSP)).setBuffer(marshal(data));
if(stagger_timeout > 0) {
int view_size=view != null? view.size() : 10;
int rank=Util.getRank(view, local_addr); // returns 0 if view or local_addr are null
long sleep_time=rank == 0? Util.random(stagger_timeout)
: stagger_timeout * rank / view_size - (stagger_timeout / view_size);
timer.schedule(() -> {
log.trace("%s: received GET_MBRS_REQ from %s, sending staggered response %s", local_addr, sender, data);
down_prot.down(rsp_msg);
}, sleep_time, TimeUnit.MILLISECONDS, sends_can_block);
return;
}
log.trace("%s: received GET_MBRS_REQ from %s, sending response %s", local_addr, sender, data);
down_prot.down(rsp_msg);
}
protected static String addressAsString(Address address) {
if(address == null)
return "";
if(address instanceof UUID)
return ((UUID) address).toStringLong();
return address.toString();
}
protected boolean isCoord(Address member) {return member.equals(current_coord);}
/** Disseminates cache information (UUID/IP adddress/port/name) to the given members
* @param current_mbrs The current members. Guaranteed to be non-null. This is a copy and can be modified.
* @param left_mbrs The members which left. These are excluded from dissemination. Can be null if no members left
* @param new_mbrs The new members that we need to disseminate the information to. Will be all members if null.
*/
protected void disseminateDiscoveryInformation(List current_mbrs, List left_mbrs, List new_mbrs) {
if(new_mbrs == null || new_mbrs.isEmpty())
return;
if(local_addr != null)
current_mbrs.remove(local_addr);
if(left_mbrs != null)
current_mbrs.removeAll(left_mbrs);
// 1. Send information about to new_mbrs
Set info=new HashSet<>(current_mbrs);
for(Address addr : info) {
PhysicalAddress phys_addr=(PhysicalAddress)down_prot.down(new Event(Event.GET_PHYSICAL_ADDRESS,addr));
if(phys_addr == null)
continue;
boolean is_coordinator=isCoord(addr);
for(Address target : new_mbrs)
sendDiscoveryResponse(addr,phys_addr,NameCache.get(addr),target,is_coordinator);
}
// 2. Send information about new_mbrs to
Set targets=new HashSet<>(current_mbrs);
targets.removeAll(new_mbrs);
if(!targets.isEmpty()) {
for(Address addr : new_mbrs) {
PhysicalAddress phys_addr=(PhysicalAddress)down_prot.down(new Event(Event.GET_PHYSICAL_ADDRESS,addr));
if(phys_addr == null)
continue;
boolean is_coordinator=isCoord(addr);
for(Address target : targets)
sendDiscoveryResponse(addr,phys_addr,NameCache.get(addr),target,is_coordinator);
}
}
}
protected class DiscoveryCacheDisseminationTask implements Runnable {
protected final List curr_mbrs, left_mbrs, new_mbrs;
public DiscoveryCacheDisseminationTask(List curr_mbrs,List left_mbrs,List new_mbrs) {
this.curr_mbrs=curr_mbrs;
this.left_mbrs=left_mbrs;
this.new_mbrs=new_mbrs;
}
public void run() {
disseminateDiscoveryInformation(curr_mbrs, left_mbrs, new_mbrs);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy