All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jgroups.protocols.FD_SOCK Maven / Gradle / Ivy

Go to download

This artifact provides a single jar that contains all classes required to use remote EJB and JMS, including all dependencies. It is intended for use by those not using maven, maven users should just import the EJB and JMS BOM's instead (shaded JAR's cause lots of problems with maven, as it is very easy to inadvertently end up with different versions on classes on the class path).

There is a newer version: 34.0.0.Final
Show newest version
package org.jgroups.protocols;

import org.jgroups.*;
import org.jgroups.annotations.*;
import org.jgroups.blocks.LazyRemovalCache;
import org.jgroups.conf.AttributeType;
import org.jgroups.stack.IpAddress;
import org.jgroups.stack.Protocol;
import org.jgroups.util.*;

import java.io.*;
import java.net.*;
import java.util.*;
import java.util.concurrent.ConcurrentSkipListSet;
import java.util.concurrent.Future;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import java.util.function.Supplier;


/**
 * Failure detection protocol based on sockets. Failure detection is ring-based. Each member creates a
 * server socket and announces its address together with the server socket's address in a multicast.
 * 

* A pinger thread will be started when the membership goes above 1 and will be stopped when it drops below * 2. The pinger thread connects to its neighbor on the right and waits until the socket is closed. When * the socket is closed by the monitored peer in an abnormal fashion (IOException), the neighbor will be * suspected. *

* The main feature of this protocol is that no ping messages need to be exchanged between any 2 peers, as failure * detection relies entirely on TCP sockets. The advantage is that no activity will take place between 2 peers as long * as they are alive (i.e. have their server sockets open). The disadvantage is that hung servers or crashed routers * will not cause sockets to be closed, therefore they won't be detected. *

* The costs involved are 2 additional threads: one that monitors the client side of the socket connection * (to monitor a peer) and another one that manages the server socket. However, those threads will be idle as long as * both peers are running. * @author Bela Ban May 29 2001 */ @MBean(description="Failure detection protocol based on sockets connecting members") public class FD_SOCK extends Protocol implements Runnable { protected static final int NORMAL_TERMINATION=9; protected static final int ABNORMAL_TERMINATION=-1; /* ----------------------------------------- Properties -------------------------------------------------- */ @LocalAddress @Property(description="The NIC on which the ServerSocket should listen on. " + "The following special values are also recognized: GLOBAL, SITE_LOCAL, LINK_LOCAL and NON_LOOPBACK", systemProperty={Global.BIND_ADDR},writable=false) protected InetAddress bind_addr; @Property(description="Use \"external_addr\" if you have hosts on different networks, behind " + "firewalls. On each firewall, set up a port forwarding rule (sometimes called \"virtual server\") to " + "the local IP (e.g. 192.168.1.100) of the host then on each host, set \"external_addr\" TCP transport " + "parameter to the external (public IP) address of the firewall.", systemProperty=Global.EXTERNAL_ADDR,writable=false) protected InetAddress external_addr; @Property(description="Used to map the internal port (bind_port) to an external port. Only used if > 0", systemProperty=Global.EXTERNAL_PORT,writable=false) protected int external_port; @Property(description="Timeout for getting socket cache from coordinator",type=AttributeType.TIME) protected long get_cache_timeout=1000; @Property(description="Max number of elements in the cache until deleted elements are removed") protected int cache_max_elements=200; @Property(description="Max age (in ms) an element marked as removed has to have until it is removed", type=AttributeType.TIME) protected long cache_max_age=10000; @Property(description="Interval for broadcasting suspect messages",type=AttributeType.TIME) protected long suspect_msg_interval=5000; @Property(description="Number of attempts coordinator is solicited for socket cache until we give up") protected int num_tries=3; @Property(description="Start port for server socket. Default value of 0 picks a random port") protected int start_port; @Property(description="Start port for client socket. Default value of 0 picks a random port") protected int client_bind_port; @Property(description="Number of ports to probe for start_port and client_bind_port") protected int port_range=50; @Property(description="Whether to use KEEP_ALIVE on the ping socket or not. Default is true") protected boolean keep_alive=true; @Property(description="Max time in millis to wait for ping Socket.connect() to return",type=AttributeType.TIME) protected int sock_conn_timeout=1000; /* --------------------------------------------- JMX ------------------------------------------------------ */ protected int num_suspect_events; protected final BoundedList suspect_history=new BoundedList<>(20); /* --------------------------------------------- Fields ------------------------------------------------------ */ protected volatile List

members=new ArrayList<>(11); // volatile eliminates the lock protected final Set
suspected_mbrs=new ConcurrentSkipListSet<>(); protected final List
pingable_mbrs=new ArrayList<>(); protected volatile boolean srv_sock_sent; // has own socket been broadcast yet ? /** Used to rendezvous on GET_CACHE and GET_CACHE_RSP */ protected final Promise> get_cache_promise=new Promise<>(); protected volatile boolean got_cache_from_coord; // was cache already fetched ? protected ServerSocket srv_sock; // server socket to which another member connects to monitor me protected ServerSocketHandler srv_sock_handler; // accepts new connections on srv_sock protected IpAddress srv_sock_addr; // pair of server_socket:port protected Address ping_dest; // address of the member we monitor protected Socket ping_sock; // socket to the member we monitor protected InputStream ping_input; // input stream of the socket to the member we monitor @GuardedBy("this") protected volatile Thread pinger_thread; // listens on ping_sock, suspects member if socket is closed /** Cache of member addresses and their ServerSocket addresses */ protected LazyRemovalCache cache; protected final Promise ping_addr_promise=new Promise<>(); // to fetch the ping_addr for ping_dest protected final Lock lock=new ReentrantLock(); // for access to ping_sock, ping_input protected TimeScheduler timer; protected final BroadcastTask bcast_task=new BroadcastTask(); // to transmit SUSPECT message (until view change) protected volatile boolean regular_sock_close; // used by interruptPingerThread() when new ping_dest is computed protected volatile boolean shuttin_down; protected boolean log_suspected_msgs=true; public FD_SOCK() { } @ManagedAttribute(description="List of cluster members") public String getMembers() {return Util.printListWithDelimiter(members, ",");} @ManagedAttribute(description="List of pingable members of a cluster") public String getPingableMembers() {return printPingableMembers();} @ManagedAttribute(description="List of currently suspected members") public String getSuspectedMembers() {return suspected_mbrs.toString();} @ManagedAttribute(description="The number of currently suspected members") public int getNumSuspectedMembers() {return suspected_mbrs.size();} @ManagedAttribute(description="Ping destination") public String getPingDest() {return ping_dest != null? ping_dest.toString() : "null";} @ManagedAttribute(description="Number of suspect event generated") public int getNumSuspectEventsGenerated() {return num_suspect_events;} @ManagedAttribute(description="Whether the node crash detection monitor is running") public boolean isNodeCrashMonitorRunning() {return isPingerThreadRunning(); } @ManagedAttribute(description="Whether or not to log suspect messages") public boolean isLogSuspectedMessages() { return log_suspected_msgs; } public FD_SOCK setLogSuspectedMessages(boolean log_suspected_msgs) { this.log_suspected_msgs=log_suspected_msgs; return this; } @ManagedAttribute(description="The actual client_bind_port") public int getClientBindPortActual() {return ping_sock != null? ping_sock.getLocalPort() : 0;} public InetAddress getBindAddress() {return bind_addr;} public FD_SOCK setBindAddress(InetAddress b) {this.bind_addr=b; return this;} public InetAddress getExternalAddress() {return external_addr;} public FD_SOCK setExternalAddress(InetAddress e) {this.external_addr=e; return this;} public int getExternalPort() {return external_port;} public FD_SOCK setExternalPort(int e) {this.external_port=e; return this;} public long getGetCacheTimeout() {return get_cache_timeout;} public FD_SOCK setGetCacheTimeout(long g) {this.get_cache_timeout=g; return this;} public int getCacheMaxElements() {return cache_max_elements;} public FD_SOCK setCacheMaxElements(int c) {this.cache_max_elements=c; return this;} public long getCacheMaxAge() {return cache_max_age;} public FD_SOCK setCacheMaxAge(long c) {this.cache_max_age=c; return this;} public long getSuspectMsgInterval() {return suspect_msg_interval;} public FD_SOCK setSuspectMsgInterval(long s) {this.suspect_msg_interval=s; return this;} public int getNumTries() {return num_tries;} public FD_SOCK setNumTries(int n) {this.num_tries=n; return this;} public int getStartPort() {return start_port;} public FD_SOCK setStartPort(int s) {this.start_port=s; return this;} public int getClientBindPort() {return client_bind_port;} public FD_SOCK setClientBindPort(int c) {this.client_bind_port=c; return this;} public int getPortRange() {return port_range;} public FD_SOCK setPortRange(int p) {this.port_range=p; return this;} public boolean keepAlive() {return keep_alive;} public FD_SOCK keepAlive(boolean k) {this.keep_alive=k; return this;} public int getSockConnTimeout() {return sock_conn_timeout;} public FD_SOCK setSockConnTimeout(int s) {this.sock_conn_timeout=s; return this;} @ManagedOperation(description="Print suspect history") public String printSuspectHistory() { StringBuilder sb=new StringBuilder(); for(String suspect: suspect_history) sb.append(suspect).append("\n"); return sb.toString(); } @ManagedOperation public String printCache() { return cache.printCache(); } @ManagedOperation(description="Starts node crash monitor if member count > 1 and monitor is not running") public boolean startNodeCrashMonitor() { if(members.size() > 1) { if(startPingerThread()) { log.warn("Node crash detection manually started, was not running for some reason."); return true; } log.debug("Node crash detection is already running."); } else log.debug("Single node cluster, no need for node crash detection."); return false; } public void init() throws Exception { cache=new LazyRemovalCache<>(cache_max_elements, cache_max_age); shuttin_down=false; srv_sock_handler=new ServerSocketHandler(); } public void start() throws Exception { if(bind_addr == null) bind_addr=getTransport().getBindAddr(); shuttin_down=false; super.start(); timer=getTransport().getTimer(); if(timer == null) throw new Exception("timer is null"); } public void stop() { shuttin_down=true; resetPingableMembers(null); stopPingerThread(); stopServerSocket(true); // graceful close bcast_task.removeAll(); suspected_mbrs.clear(); } public void resetStats() { super.resetStats(); num_suspect_events=0; suspect_history.clear(); } public Object up(Event evt) { switch(evt.getType()) { case Event.CONFIG: Map config=evt.getArg(); if(bind_addr == null) bind_addr=(InetAddress)config.get("bind_addr"); if(external_addr == null) external_addr=(InetAddress)config.get("external_addr"); if(external_port <= 0) { Object val=config.get("external_port"); if(val != null) external_port=(Integer)val; } break; } return up_prot.up(evt); } public Object up(Message msg) { FdHeader hdr=msg.getHeader(this.id); if(hdr == null) return up_prot.up(msg); // message did not originate from FD_SOCK layer, just pass up switch(hdr.type) { case FdHeader.SUSPECT: if(hdr.mbrs != null) { log.trace("%s: received SUSPECT message from %s: suspects=%s", local_addr, msg.getSrc(), hdr.mbrs); suspect(hdr.mbrs); } break; case FdHeader.UNSUSPECT: if(hdr.mbrs != null) { log.trace("%s: received UNSUSPECT message from %s: mbrs=%s", local_addr, msg.getSrc(), hdr.mbrs); hdr.mbrs.forEach(this::unsuspect); } break; // If I have the sock for 'hdr.mbr', return it. Otherwise look it up in my cache and return it case FdHeader.WHO_HAS_SOCK: if(Objects.equals(local_addr, msg.getSrc())) return null; // don't reply to WHO_HAS bcasts sent by me ! if(hdr.mbr == null) return null; log.trace("%s: who-has-sock %s", local_addr, hdr.mbr); // 1. Try my own address, maybe it's me whose socket is wanted if(local_addr != null && local_addr.equals(hdr.mbr) && srv_sock_addr != null) { sendIHaveSockMessage(msg.getSrc(), local_addr, srv_sock_addr); // unicast message to msg.getSrc() return null; } // 2. If I don't have it, maybe it is in the cache IpAddress addr=cache.get(hdr.mbr); if(addr != null) sendIHaveSockMessage(msg.getSrc(), hdr.mbr, addr); // ucast msg break; // Update the cache with the addr:sock_addr entry (if on the same host) case FdHeader.I_HAVE_SOCK: if(hdr.mbr == null || hdr.sock_addr == null) return null; cache.add(hdr.mbr, hdr.sock_addr); // update the cache log.trace("%s: i-have-sock: %s --> %s (cache is %s)", local_addr, hdr.mbr, hdr.sock_addr, cache); if(hdr.mbr.equals(ping_dest)) ping_addr_promise.setResult(hdr.sock_addr); break; // Return the cache to the sender of this message case FdHeader.GET_CACHE: msg=new BytesMessage(msg.getSrc()) .putHeader(this.id, new FdHeader(FdHeader.GET_CACHE_RSP)).setArray(marshal(cache)); down_prot.down(msg); break; case FdHeader.GET_CACHE_RSP: Map cachedAddrs=unmarshal(msg.getArray(), msg.getOffset(), msg.getLength()); if(cachedAddrs != null) get_cache_promise.setResult(cachedAddrs); break; } return null; } public Object down(Event evt) { switch(evt.getType()) { case Event.UNSUSPECT: broadcastUnuspectMessage(evt.getArg()); break; case Event.CONNECT: case Event.CONNECT_WITH_STATE_TRANSFER: case Event.CONNECT_USE_FLUSH: case Event.CONNECT_WITH_STATE_TRANSFER_USE_FLUSH: shuttin_down=false; Object ret=down_prot.down(evt); try { startServerSocket(); } catch(Exception e) { throw new IllegalArgumentException("failed to start server socket", e); } return ret; case Event.DISCONNECT: shuttin_down=true; stopServerSocket(true); // graceful close break; case Event.VIEW_CHANGE: View v=evt.getArg(); final List
new_mbrs=v.getMembers(); members=new_mbrs; // volatile write will ensure all reads after this see the new membership suspected_mbrs.retainAll(new_mbrs); cache.keySet().retainAll(new_mbrs); // remove all entries in 'cache' which are not in the new membership bcast_task.adjustSuspectedMembers(new_mbrs); resetPingableMembers(new_mbrs); if(new_mbrs.size() > 1) { Address tmp_ping_dest=determinePingDest(); boolean hasNewPingDest = tmp_ping_dest != null && !tmp_ping_dest.equals(ping_dest); if(hasNewPingDest) { interruptPingerThread(false); // allows the thread to use the new socket startPingerThread(); // in case it wasn't running; only starts if not yet running } } else { ping_dest=null; stopPingerThread(); } break; default: return down_prot.down(evt); } return down_prot.down(evt); } /** * Runs as long as there are 2 members and more. Determines the member to be monitored and fetches its * server socket address (if n/a, sends a message to obtain it). The creates a client socket and listens on * it until the connection breaks. If it breaks, emits a SUSPECT message. It the connection is closed regularly, * nothing happens. In both cases, a new member to be monitored will be chosen and monitoring continues (unless * there are fewer than 2 members). */ public void run() { // 1. Broadcast my own addr:sock to all members so they can update their cache if(!srv_sock_sent && srv_sock_addr != null) { sendIHaveSockMessage(null, local_addr, srv_sock_addr); srv_sock_sent=true; } // 2. Get the addr:pid cache from the coordinator (only if not already fetched) if(!got_cache_from_coord) { getCacheFromCoordinator(); got_cache_from_coord=true; } log.trace("%s: pinger_thread started", local_addr); while(hasPingableMembers()) { regular_sock_close=false; ping_dest=determinePingDest(); // gets the neighbor to our right if(ping_dest == null || !isPingerThreadRunning()) break; log.debug("%s: pingable_mbrs=%s, ping_dest=%s", local_addr, printPingableMembers(), ping_dest); IpAddress ping_addr=fetchPingAddress(ping_dest); if(ping_addr == null) { log.trace("%s: socket address for %s could not be fetched, retrying", local_addr, ping_dest); Util.sleep(1000); continue; } if(!setupPingSocket(ping_addr) && isPingerThreadRunning()) { // log.debug("%s: failed connecting to to %s", local_addr, ping_dest); broadcastSuspectMessage(ping_dest); removeFromPingableMembers(ping_dest); continue; } log.trace("%s: ping_dest=%s, ping_sock=%s, cache=%s", local_addr, ping_dest, ping_sock, cache); // at this point ping_input must be non-null, otherwise setupPingSocket() would have thrown an exception try { if(ping_input != null) { int c=ping_input.read(); switch(c) { case NORMAL_TERMINATION: log.debug("%s: %s closed socket gracefully", local_addr, ping_dest); removeFromPingableMembers(ping_dest); break; case ABNORMAL_TERMINATION: // -1 means EOF handleSocketClose(null); break; default: break; } } } catch(IOException ex) { // we got here when the peer closed the socket --> suspect peer and then continue handleSocketClose(ex); } catch(Throwable catch_all_the_rest) { log.error("exception", catch_all_the_rest); } } log.trace("%s: pinger thread terminated", local_addr); } protected synchronized boolean isPingerThreadRunning() { return pinger_thread != null; } protected void resetPingableMembers(Collection
new_mbrs) { synchronized(pingable_mbrs) { pingable_mbrs.clear(); if(new_mbrs != null) pingable_mbrs.addAll(new_mbrs); } } protected boolean hasPingableMembers() { synchronized(pingable_mbrs) { return !pingable_mbrs.isEmpty(); } } protected boolean removeFromPingableMembers(Address mbr) { if(mbr == null) return false; synchronized(pingable_mbrs) { return pingable_mbrs.remove(mbr); } } protected String printPingableMembers() { synchronized(pingable_mbrs) { return pingable_mbrs.toString(); } } protected void suspect(Set
suspects) { if(suspects == null) return; suspects.remove(local_addr); suspects.forEach(suspect -> suspect_history.add(String.format("%s: %s", new Date(), suspect))); suspected_mbrs.addAll(suspects); List
eligible_mbrs=new ArrayList<>(this.members); eligible_mbrs.removeAll(suspected_mbrs); Collection
suspects_copy=new ArrayList<>(suspected_mbrs); // Check if we're coord, then send up the stack, make a copy (https://issues.redhat.com/browse/JGRP-2552) if(!suspects_copy.isEmpty() && local_addr != null && !eligible_mbrs.isEmpty() && local_addr.equals(eligible_mbrs.get(0))) { log.debug("%s: suspecting %s", local_addr, suspects_copy); up_prot.up(new Event(Event.SUSPECT, suspects_copy)); down_prot.down(new Event(Event.SUSPECT, suspects_copy)); } } protected void unsuspect(Address mbr) { if(mbr == null) return; suspected_mbrs.remove(mbr); bcast_task.removeSuspectedMember(mbr); } protected void handleSocketClose(Exception ex) { teardownPingSocket(); // make sure we have no leftovers if(!regular_sock_close) { // only suspect if socket was not closed regularly (by interruptPingerThread()) log.debug("%s: %s closed socket (%s)", local_addr, ping_dest, (ex != null ? ex.toString() : "eof")); broadcastSuspectMessage(ping_dest); removeFromPingableMembers(ping_dest); } else { log.debug("%s: socket to %s was closed gracefully", local_addr, ping_dest); regular_sock_close=false; } } /** * Does *not* need to be synchronized on pinger_mutex because the caller (down()) already has the mutex acquired */ protected synchronized boolean startPingerThread() { if(!isPingerThreadRunning()) { ThreadFactory factory=getThreadFactory(); pinger_thread=factory.newThread(this, "FD_SOCK pinger"); pinger_thread.setDaemon(true); pinger_thread.start(); return true; } return false; } /** * Interrupts the pinger thread. The Thread.interrupt() method doesn't seem to work under Linux with JDK 1.3.1 * (JDK 1.2.2 had no problems here), therefore we close the socket (setSoLinger has to be set !) if we are * running under Linux. This should be tested under Windows. (Solaris 8 and JDK 1.3.1 definitely works).

* Oct 29 2001 (bela): completely removed Thread.interrupt(), but used socket close on all OSs. This makes this * code portable and we don't have to check for OSs.

*/ protected synchronized void interruptPingerThread(boolean sendTerminationSignal) { if(isPingerThreadRunning()) { regular_sock_close=true; if (sendTerminationSignal) { sendPingTermination(); // PATCH by Bruce Schuchardt (https://issues.redhat.com/browse/JGRP-246) } teardownPingSocket(); // will wake up the pinger thread. less elegant than Thread.interrupt(), but does the job } } protected synchronized void stopPingerThread() { ping_addr_promise.setResult(null); get_cache_promise.setResult(null); interruptPingerThread(true); if(pinger_thread != null) { try { pinger_thread.join(Global.THREAD_SHUTDOWN_WAIT_TIME); } catch(InterruptedException ignored) { Thread.currentThread().interrupt(); } pinger_thread=null; } } // PATCH: send something so the connection handler can exit protected void sendPingTermination() { sendPingSignal(NORMAL_TERMINATION); } protected void sendPingSignal(int signal) { lock.lock(); try { if(ping_sock != null) { OutputStream out=ping_sock.getOutputStream(); out.write(signal); out.flush(); } } catch(Throwable t) { log.trace("%s: problem sending signal %s: %s", local_addr, signalToString(signal), t); } finally { lock.unlock(); } } protected void startServerSocket() throws Exception { srv_sock=Util.createServerSocket(getSocketFactory(), "jgroups.fd_sock.srv_sock", bind_addr, start_port, start_port+port_range, 0); // grab a random unused port above 10000 srv_sock_addr=new IpAddress(external_addr != null? external_addr : bind_addr, external_port > 0? external_port : srv_sock.getLocalPort()); if(local_addr != null) cache.add(local_addr, srv_sock_addr); if(srv_sock_handler != null) srv_sock_handler.start(); // won't start if already running } public void stopServerSocket(boolean graceful) { if(srv_sock_handler != null) srv_sock_handler.stop(graceful); } /** * Creates a socket to {@code dest}, and assigns it to ping_sock. Also assigns ping_input */ protected boolean setupPingSocket(IpAddress dest) { lock.lock(); try { SocketAddress destAddr=new InetSocketAddress(dest.getIpAddress(), dest.getPort()); ping_sock=getSocketFactory().createSocket("jgroups.fd.ping_sock"); Util.bind(ping_sock, bind_addr, client_bind_port, client_bind_port+port_range); ping_sock.setSoLinger(true, 1); ping_sock.setKeepAlive(keep_alive); Util.connect(ping_sock, destAddr, sock_conn_timeout); ping_input=ping_sock.getInputStream(); return true; } catch(Throwable ex) { if(!shuttin_down) log.debug("%s: failed connecting to %s: %s", local_addr, ping_dest != null? ping_dest : dest, ex.getMessage()); return false; } finally { lock.unlock(); } } protected void teardownPingSocket() { lock.lock(); try { if(ping_sock != null) { try { ping_sock.shutdownInput(); ping_sock.close(); } catch(Exception ignored) { } } Util.close(ping_input); } finally { ping_sock=null; ping_input=null; lock.unlock(); } } /** * Determines coordinator C. If C is null and we are the first member, return. Else loop: send GET_CACHE message * to coordinator and wait for GET_CACHE_RSP response. Loop until valid response has been received. */ protected void getCacheFromCoordinator() { Address coord; int attempts=num_tries; get_cache_promise.reset(); while(attempts > 0 && isPingerThreadRunning()) { if((coord=determineCoordinator()) != null) { if(coord.equals(local_addr))// we are the first member --> empty cache return; // always sent to coord != self, so we don't need the DONT_LOOPBACK flag here: Message msg=new EmptyMessage(coord).putHeader(this.id, new FdHeader(FdHeader.GET_CACHE)); down_prot.down(msg); Map result=get_cache_promise.getResult(get_cache_timeout); if(result != null) { cache.addAll(result); log.trace("%s: got cache from %s: cache is %s", local_addr, coord, cache); return; } } --attempts; } } /** * Sends a SUSPECT message to all group members. Only the coordinator (or the next member in line if the coord * itself is suspected) will react to this message by installing a new view. To overcome the unreliability * of the SUSPECT message (it may be lost because we are not above any retransmission layer), the following scheme * is used: after sending the SUSPECT message, it is also added to the broadcast task, which will periodically * re-send the SUSPECT until a view is received in which the suspected process is not a member anymore. The reason is * that - at one point - either the coordinator or another participant taking over for a crashed coordinator, will * react to the SUSPECT message and issue a new view, at which point the broadcast task stops. */ protected void broadcastSuspectMessage(Address suspected_mbr) { if(suspected_mbr == null) return; log.debug("%s: broadcasting suspect(%s)", local_addr, suspected_mbr); // 1. Send a SUSPECT message right away; the broadcast task will take some time to send it (sleeps first) FdHeader hdr=new FdHeader(FdHeader.SUSPECT).mbrs(Collections.singleton(suspected_mbr)); Message suspect_msg=new EmptyMessage().putHeader(this.id, hdr); down_prot.down(suspect_msg); // 2. Add to broadcast task and start latter (if not yet running). The task will end when // suspected members are removed from the membership bcast_task.addSuspectedMember(suspected_mbr); if(stats) { num_suspect_events++; suspect_history.add(String.format("%s: %s", new Date(), suspected_mbr)); } } protected void broadcastUnuspectMessage(Address mbr) { if(mbr == null) return; log.debug("%s: broadcasting unsuspect(%s)", local_addr, mbr); // 1. Send a SUSPECT message right away; the broadcast task will take some time to send it (sleeps first) FdHeader hdr=new FdHeader(FdHeader.UNSUSPECT).mbrs(Collections.singleton(mbr)); Message suspect_msg=new EmptyMessage().putHeader(this.id, hdr); down_prot.down(suspect_msg); } /** Sends or broadcasts a I_HAVE_SOCK response. If 'dst' is null, the reponse will be broadcast, otherwise it will be unicast back to the requester */ protected void sendIHaveSockMessage(Address dst, Address mbr, IpAddress addr) { Message msg=new EmptyMessage(dst).setFlag(Message.TransientFlag.DONT_LOOPBACK); FdHeader hdr=new FdHeader(FdHeader.I_HAVE_SOCK, mbr).sockAddress(addr); msg.putHeader(this.id, hdr); down_prot.down(msg); } /** Attempts to obtain the ping_addr first from the cache, then by unicasting q request to {@code mbr}, then by multicasting a request to all members. */ protected IpAddress fetchPingAddress(final Address mbr) { IpAddress ret; if(mbr == null) return null; // 1. Try to get the server socket address from the cache if((ret=cache.get(mbr)) != null) return ret; if(!isPingerThreadRunning()) return null; // 2. Try to get the server socket address from mbr (or all, as fallback) ping_addr_promise.reset(); for(Address dest: Arrays.asList(mbr, null)) { Message msg=new EmptyMessage(dest).setFlag(Message.TransientFlag.DONT_LOOPBACK) .putHeader(this.id, new FdHeader(FdHeader.WHO_HAS_SOCK, mbr)); down_prot.down(msg); if((ret=ping_addr_promise.getResult(500)) != null) return ret; if(!isPingerThreadRunning()) return null; } return null; } protected Address determinePingDest() { if(local_addr == null) return null; Address next; synchronized(pingable_mbrs) { next=Util.pickNext(pingable_mbrs, local_addr); } return Objects.equals(local_addr, next) ? null : next; } public static ByteArray marshal(LazyRemovalCache addrs) { final ByteArrayDataOutputStream out=new ByteArrayDataOutputStream(512); try { int size=addrs != null? addrs.size() : 0; out.writeInt(size); if(size > 0) { for(Map.Entry> entry: addrs.entrySet()) { Address key=entry.getKey(); IpAddress val=entry.getValue().getVal(); Util.writeAddress(key, out); Util.writeStreamable(val, out); } } return out.getBuffer(); } catch(Exception ex) { return null; } } protected Map unmarshal(byte[] buffer, int offset, int length) { if(buffer == null) return null; DataInput in=new ByteArrayDataInputStream(buffer, offset, length); HashMap addrs=null; try { int size=in.readInt(); if(size > 0) { addrs=new HashMap<>(size); for(int i=0; i < size; i++) { Address key=Util.readAddress(in); IpAddress val=Util.readStreamable(IpAddress::new, in); addrs.put(key, val); } } return addrs; } catch(Exception ex) { log.error("%s: failed reading addresses from message: %s", local_addr, ex); return null; } } protected Address determineCoordinator() { List

tmp=members; return !tmp.isEmpty()? tmp.get(0) : null; } protected static String signalToString(int signal) { switch(signal) { case NORMAL_TERMINATION: return "NORMAL_TERMINATION"; case ABNORMAL_TERMINATION: return "ABNORMAL_TERMINATION"; default: return "n/a"; } } public static class FdHeader extends Header { public static final byte SUSPECT = 10; public static final byte UNSUSPECT = 11; public static final byte WHO_HAS_SOCK = 12; public static final byte I_HAVE_SOCK = 13; public static final byte GET_CACHE = 14; // sent by joining member to coordinator public static final byte GET_CACHE_RSP = 15; // sent by coordinator to joining member in response to GET_CACHE protected byte type=SUSPECT; protected Address mbr; // set on WHO_HAS_SOCK (requested mbr), I_HAVE_SOCK protected IpAddress sock_addr; // set on I_HAVE_SOCK protected Set
mbrs; // set on SUSPECT (list of suspected members) public FdHeader() { } public FdHeader(byte type) { this.type=type; } public FdHeader(byte type, Address mbr) { this.type=type; this.mbr=mbr; } public FdHeader(byte type, Address mbr, IpAddress sock_addr) { this.type=type; this.mbr=mbr; this.sock_addr=sock_addr; } public short getMagicId() { return 51; } public Supplier create() { return FdHeader::new; } public FdHeader mbrs(Set
members) { this.mbrs=members; return this; } public FdHeader sockAddress(IpAddress a) {this.sock_addr=a; return this;}; public String toString() { StringBuilder sb=new StringBuilder(type2String(type)); if(mbr != null) sb.append(", mbr=").append(mbr); if(sock_addr != null) sb.append(", sock_addr=").append(sock_addr); if(mbrs != null) sb.append(", mbrs=").append(mbrs); return sb.toString(); } public static String type2String(byte type) { switch(type) { case SUSPECT: return "SUSPECT"; case UNSUSPECT: return "UNSUSPECT"; case WHO_HAS_SOCK: return "WHO_HAS_SOCK"; case I_HAVE_SOCK: return "I_HAVE_SOCK"; case GET_CACHE: return "GET_CACHE"; case GET_CACHE_RSP: return "GET_CACHE_RSP"; default: return "unknown type (" + type + ')'; } } @Override public int serializedSize() { int retval=Global.BYTE_SIZE; // type retval+=Util.size(mbr); // use of Util.size(Address) with IpAddress overestimates size by one byte. // replace: retval+=Util.size(sock_addr); with the following: int ipaddr_size = 0 ; ipaddr_size += Global.BYTE_SIZE ; // presence byte if (sock_addr != null) ipaddr_size += sock_addr.serializedSize(); // IpAddress size retval += ipaddr_size ; retval+=Global.INT_SIZE; // mbrs size if(mbrs != null) for(Address m: mbrs) retval+=Util.size(m); return retval; } @Override public void writeTo(DataOutput out) throws IOException { out.writeByte(type); Util.writeAddress(mbr, out); Util.writeStreamable(sock_addr, out); int size=mbrs != null? mbrs.size() : 0; out.writeInt(size); if(size > 0) for(Address address: mbrs) Util.writeAddress(address, out); } @Override public void readFrom(DataInput in) throws IOException, ClassNotFoundException { type=in.readByte(); mbr=Util.readAddress(in); sock_addr=Util.readStreamable(IpAddress::new, in); int size=in.readInt(); if(size > 0) { mbrs=new HashSet<>(); for(int i=0; i < size; i++) mbrs.add(Util.readAddress(in)); } } } /** * Handles the server-side of a client-server socket connection. Waits until a client connects, and then loops * until that client closes the connection. Note that there is no new thread spawned for the listening on the * client socket, therefore there can only be 1 client connection at the same time. Subsequent clients attempting * to create a connection will be blocked until the first client closes its connection. This should not be a problem * as the ring nature of the FD_SOCK protocol always has only 1 client connect to its right-hand-side neighbor. */ protected class ServerSocketHandler implements Runnable { protected Thread acceptor; protected final List clients=new LinkedList<>(); protected String getName() { return acceptor != null? acceptor.getName() : null; } protected ServerSocketHandler() { start(); } protected void start() { if(acceptor == null) { acceptor=getThreadFactory().newThread(this, "FD_SOCK acceptor"); acceptor.setDaemon(true); acceptor.start(); } } protected void stop(boolean graceful) { if(acceptor != null && acceptor.isAlive()) Util.close(srv_sock); // this will terminate thread, peer will receive SocketException (socket close) synchronized(clients) { clients.forEach(client -> client.stopThread(graceful)); clients.clear(); } acceptor=null; } /** Only accepts 1 client connection at a time (saving threads) */ public void run() { Socket client_sock; while(acceptor != null && srv_sock != null) { try { client_sock=srv_sock.accept(); log.trace("%s: accepted connection from %s:%s", local_addr, client_sock.getInetAddress(), client_sock.getPort()); client_sock.setKeepAlive(keep_alive); ClientConnectionHandler client_conn_handler=new ClientConnectionHandler(client_sock, clients); ThreadFactory factory=getThreadFactory(); Thread t = factory != null? factory.newThread(client_conn_handler, "FD_SOCK conn-handler") : new Thread(client_conn_handler, "FD_SOCK conn-handler"); t.setDaemon(true); synchronized(clients) { clients.add(client_conn_handler); } t.start(); } catch(IOException io_ex2) { break; } } acceptor=null; } } /** Handles a client connection; multiple client can connect at the same time */ protected static class ClientConnectionHandler implements Runnable { Socket client_sock; InputStream in; final List clients; protected ClientConnectionHandler(Socket client_sock, List clients) { this.client_sock=client_sock; this.clients=clients; } protected synchronized void stopThread(boolean graceful) { if(client_sock != null) { try { if(graceful) { OutputStream out=client_sock.getOutputStream(); out.write(NORMAL_TERMINATION); out.flush(); } Util.close(client_sock); client_sock=null; } catch(Throwable ignored) { } } } public void run() { try { synchronized(this) { if(client_sock == null) return; in=client_sock.getInputStream(); } int b; do { b=in.read(); } while(b != ABNORMAL_TERMINATION && b != NORMAL_TERMINATION); } catch(IOException ignored) { } finally { Socket sock=client_sock; // PATCH: avoid race condition causing NPE if (sock != null && !sock.isClosed()) { Util.close(sock); client_sock=null; } synchronized(clients) { clients.remove(this); } } } } /** * Task that periodically broadcasts a list of suspected members to the group. Goal is not to lose * a SUSPECT message: since these are bcast unreliably, they might get dropped. The BroadcastTask makes * sure they are retransmitted until a view has been received which doesn't contain the suspected members * any longer. Then the task terminates. */ protected class BroadcastTask implements Runnable { protected final Set
suspects=new HashSet<>(); protected Future future; /** Adds a suspected member. Starts the task if not yet running */ protected void addSuspectedMember(Address mbr) { if(mbr == null) return; if(!members.contains(mbr)) return; synchronized(suspects) { if(suspects.add(mbr)) startTask(); } } protected void removeSuspectedMember(Address suspected_mbr) { if(suspected_mbr == null) return; synchronized(suspects) { if(suspects.remove(suspected_mbr) && suspects.isEmpty()) stopTask(); } } protected void removeAll() { synchronized(suspects) { suspects.clear(); stopTask(); } } protected void startTask() { if(future == null || future.isDone()) { try { future=timer.scheduleWithFixedDelay(this, suspect_msg_interval, suspect_msg_interval, TimeUnit.MILLISECONDS, getTransport() instanceof TCP); } catch(RejectedExecutionException e) { log.warn("%s: task %s was rejected as timer thread pool is shutting down", local_addr, this); } } } protected void stopTask() { if(future != null) { future.cancel(false); future=null; } } /** * Removes all elements from suspected_mbrs that are not in the new membership */ protected void adjustSuspectedMembers(List
new_mbrship) { if(new_mbrship == null || new_mbrship.isEmpty()) return; synchronized(suspects) { boolean modified=suspects.retainAll(new_mbrship); if(modified) log.trace("%s: adjusted suspected_mbrs: %s", local_addr, suspects); if(suspects.isEmpty()) stopTask(); } } public void run() { log.trace("%s: broadcasting SUSPECT message (suspected_mbrs=%s)", local_addr, suspects); FdHeader hdr; synchronized(suspects) { if(suspects.isEmpty()) { stopTask(); return; } hdr=new FdHeader(FdHeader.SUSPECT).mbrs(new HashSet<>(suspects)); } Message suspect_msg=new EmptyMessage().putHeader(id, hdr); // mcast SUSPECT to all members down_prot.down(suspect_msg); } public String toString() { return FD_SOCK.class.getSimpleName() + ": " + getClass().getSimpleName(); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy