All Downloads are FREE. Search and download functionalities are using the official Maven repository.

bboss.org.jgroups.protocols.FD_ALL Maven / Gradle / Ivy

The newest version!
package bboss.org.jgroups.protocols;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;
import java.util.Map.Entry;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ScheduledFuture;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import bboss.org.jgroups.Address;
import bboss.org.jgroups.Event;
import bboss.org.jgroups.Global;
import bboss.org.jgroups.Message;
import bboss.org.jgroups.View;
import bboss.org.jgroups.annotations.DeprecatedProperty;
import bboss.org.jgroups.annotations.GuardedBy;
import bboss.org.jgroups.annotations.MBean;
import bboss.org.jgroups.annotations.ManagedAttribute;
import bboss.org.jgroups.annotations.ManagedOperation;
import bboss.org.jgroups.annotations.Property;
import bboss.org.jgroups.stack.Protocol;
import bboss.org.jgroups.util.BoundedList;
import bboss.org.jgroups.util.TimeScheduler;
import bboss.org.jgroups.util.Util;

/**
 * Failure detection based on simple heartbeat protocol. Every member
 * periodically multicasts a heartbeat. Every member also maintains a table of
 * all members (minus itself). When data or a heartbeat from P are received, we
 * reset the timestamp for P to the current time. Periodically, we check for
 * expired members, and suspect those.
 * 
 * @author Bela Ban
 * @version $Id: FD_ALL.java,v 1.34 2010/06/15 06:44:35 belaban Exp $
 */
@MBean(description="Failure detection based on simple heartbeat protocol")
@DeprecatedProperty(names={"shun"})
public class FD_ALL extends Protocol {
    
    /* -----------------------------------------    Properties     -------------------------------------------------- */

    @Property(description="Interval in which a HEARTBEAT is sent to the cluster")
    long interval=3000;

    @Property(description="Timeout after which a node P is suspected if neither a heartbeat nor data were received from P")
    long timeout=5000;
    
    @Property(description="Treat messages received from members as heartbeats. Note that this means we're updating " +
            "a value in a hashmap every time a message is passing up the stack through FD_ALL, which is costly. Default is false")
    boolean msg_counts_as_heartbeat=false;
    /* ---------------------------------------------   JMX      ------------------------------------------------------ */

    @ManagedAttribute(description="Number of heartbeats sent")
    protected int num_heartbeats_sent;

    @ManagedAttribute(description="Number of heartbeats received")
    protected int num_heartbeats_received=0;

    @ManagedAttribute(description="Number of suspected events received")
    protected int num_suspect_events=0;

    
    /* --------------------------------------------- Fields ------------------------------------------------------ */

    
    /** Map of addresses and timestamps of last updates */
    private final Map timestamps=new ConcurrentHashMap();

    private Address local_addr=null;
    
    private final List
members=Collections.synchronizedList(new ArrayList
()); private TimeScheduler timer=null; // task which multicasts HEARTBEAT message after 'interval' ms @GuardedBy("lock") private ScheduledFuture heartbeat_sender_future=null; // task which checks for members exceeding timeout and suspects them @GuardedBy("lock") private ScheduledFuture timeout_checker_future=null; private final BoundedList
suspect_history=new BoundedList
(20); private final Lock lock=new ReentrantLock(); public FD_ALL() {} @ManagedAttribute(description="Member address") public String getLocalAddress() {return local_addr != null? local_addr.toString() : "null";} @ManagedAttribute(description="Lists members of a cluster") public String getMembers() {return members.toString();} public int getHeartbeatsSent() {return num_heartbeats_sent;} public int getHeartbeatsReceived() {return num_heartbeats_received;} public int getSuspectEventsSent() {return num_suspect_events;} public long getTimeout() {return timeout;} public void setTimeout(long timeout) {this.timeout=timeout;} public long getInterval() {return interval;} public void setInterval(long interval) {this.interval=interval;} @Deprecated public static boolean isShun() {return false;} @Deprecated public void setShun(boolean flag) {} @ManagedAttribute(description="Are heartbeat tasks running") public boolean isRunning() { lock.lock(); try{ return isTimeoutCheckerRunning() && isHeartbeatSenderRunning(); } finally{ lock.unlock(); } } @ManagedOperation(description="Prints suspect history") public String printSuspectHistory() { StringBuilder sb=new StringBuilder(); for(Address tmp: suspect_history) { sb.append(new Date()).append(": ").append(tmp).append("\n"); } return sb.toString(); } @ManagedOperation(description="Prints timestamps") public String printTimestamps() { return printTimeStamps(); } public void resetStats() { num_heartbeats_sent=num_heartbeats_received=num_suspect_events=0; suspect_history.clear(); } public void init() throws Exception { timer=getTransport().getTimer(); if(timer == null) throw new Exception("timer not set"); } public void stop() { stopHeartbeatSender(); stopTimeoutChecker(); } public Object up(Event evt) { Message msg; Header hdr; switch(evt.getType()) { case Event.MSG: msg=(Message)evt.getArg(); hdr=(Header)msg.getHeader(this.id); if(msg_counts_as_heartbeat) update(msg.getSrc()); // update when data is received too ? maybe a bit costly if(hdr == null) break; // message did not originate from FD_ALL layer, just pass up switch(hdr.type) { case Header.HEARTBEAT: Address sender=msg.getSrc(); if(sender.equals(local_addr)) break; update(sender); // updates the heartbeat entry for 'sender' num_heartbeats_received++; break; // don't pass up ! case Header.SUSPECT: if(log.isTraceEnabled()) log.trace("[SUSPECT] suspect hdr is " + hdr); down_prot.down(new Event(Event.SUSPECT, hdr.suspected_mbr)); up_prot.up(new Event(Event.SUSPECT, hdr.suspected_mbr)); break; } return null; } return up_prot.up(evt); // pass up to the layer above us } public Object down(Event evt) { switch(evt.getType()) { case Event.VIEW_CHANGE: down_prot.down(evt); View v=(View)evt.getArg(); handleViewChange(v); return null; case Event.SET_LOCAL_ADDRESS: local_addr=(Address)evt.getArg(); break; } return down_prot.down(evt); } private void startTimeoutChecker() { lock.lock(); try { if(!isTimeoutCheckerRunning()) { timeout_checker_future=timer.scheduleWithFixedDelay(new TimeoutChecker(), interval, interval, TimeUnit.MILLISECONDS); } } finally { lock.unlock(); } } private void stopTimeoutChecker() { lock.lock(); try { if(timeout_checker_future != null) { timeout_checker_future.cancel(true); timeout_checker_future=null; } } finally { lock.unlock(); } } private void startHeartbeatSender() { lock.lock(); try { if(!isHeartbeatSenderRunning()) { heartbeat_sender_future=timer.scheduleWithFixedDelay(new HeartbeatSender(), interval, interval, TimeUnit.MILLISECONDS); } } finally { lock.unlock(); } } private void stopHeartbeatSender() { lock.lock(); try { if(heartbeat_sender_future != null) { heartbeat_sender_future.cancel(true); heartbeat_sender_future=null; } } finally { lock.unlock(); } } private boolean isTimeoutCheckerRunning() { return timeout_checker_future != null && !timeout_checker_future.isDone(); } private boolean isHeartbeatSenderRunning() { return heartbeat_sender_future != null && !heartbeat_sender_future.isDone(); } private void update(Address sender) { if(sender != null && !sender.equals(local_addr)) timestamps.put(sender, System.currentTimeMillis()); } private void handleViewChange(View v) { Vector
mbrs=v.getMembers(); boolean has_at_least_two=mbrs.size() > 1; members.clear(); members.addAll(mbrs); Set
keys=timestamps.keySet(); keys.retainAll(mbrs); // remove all nodes which have left the cluster for(Address member:mbrs) update(member); if(has_at_least_two) { startHeartbeatSender(); startTimeoutChecker(); } else { stopHeartbeatSender(); stopTimeoutChecker(); } } private String printTimeStamps() { StringBuilder sb=new StringBuilder(); long current_time=System.currentTimeMillis(); for(Iterator> it=timestamps.entrySet().iterator(); it.hasNext();) { Entry entry=it.next(); sb.append(entry.getKey()).append(": "); sb.append(current_time - entry.getValue().longValue()).append(" ms old\n"); } return sb.toString(); } void suspect(Address mbr) { Message suspect_msg=new Message(); suspect_msg.setFlag(Message.OOB); Header hdr=new Header(Header.SUSPECT, mbr); suspect_msg.putHeader(this.id, hdr); down_prot.down(new Event(Event.MSG, suspect_msg)); num_suspect_events++; suspect_history.add(mbr); } public static class Header extends bboss.org.jgroups.Header { public static final byte HEARTBEAT = 0; public static final byte SUSPECT = 1; byte type=Header.HEARTBEAT; Address suspected_mbr=null; public Header() { } public Header(byte type) { this.type=type; } public Header(byte type, Address suspect) { this(type); this.suspected_mbr=suspect; } public String toString() { switch(type) { case FD_ALL.Header.HEARTBEAT: return "heartbeat"; case FD_ALL.Header.SUSPECT: return "SUSPECT (suspected_mbr=" + suspected_mbr + ")"; default: return "unknown type (" + type + ")"; } } public int size() { int retval=Global.BYTE_SIZE; // type retval+=Util.size(suspected_mbr); return retval; } public void writeTo(DataOutputStream out) throws IOException { out.writeByte(type); Util.writeAddress(suspected_mbr, out); } public void readFrom(DataInputStream in) throws IOException, IllegalAccessException, InstantiationException { type=in.readByte(); suspected_mbr=Util.readAddress(in); } } /** * Class which periodically multicasts a HEARTBEAT message to the cluster */ class HeartbeatSender implements Runnable { public void run() { Message heartbeat=new Message(); // send to all heartbeat.setFlag(Message.OOB); Header hdr=new Header(Header.HEARTBEAT); heartbeat.putHeader(id, hdr); down_prot.down(new Event(Event.MSG, heartbeat)); if(log.isTraceEnabled()) log.trace(local_addr + " sent heartbeat to cluster"); num_heartbeats_sent++; } } class TimeoutChecker implements Runnable { public void run() { if(log.isTraceEnabled()) log.trace("checking for expired senders, table is:\n" + printTimeStamps()); long current_time=System.currentTimeMillis(), diff; for(Iterator> it=timestamps.entrySet().iterator(); it.hasNext();) { Entry entry=it.next(); Address key=entry.getKey(); Long val=entry.getValue(); if(val == null) { it.remove(); continue; } diff=current_time - val.longValue(); if(diff > timeout) { if(log.isTraceEnabled()) log.trace("haven't received a heartbeat from " + key + " for " + diff + " ms, suspecting it"); suspect(key); } } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy