All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.master.ClusterStatusPublisher Maven / Gradle / Ivy

The newest version!
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */


package org.apache.hadoop.hbase.master;


import io.netty.bootstrap.Bootstrap;
import io.netty.bootstrap.ChannelFactory;
import io.netty.buffer.Unpooled;
import io.netty.channel.Channel;
import io.netty.channel.ChannelException;
import io.netty.channel.ChannelHandlerContext;
import io.netty.channel.ChannelOption;
import io.netty.channel.EventLoopGroup;
import io.netty.channel.nio.NioEventLoopGroup;
import io.netty.channel.socket.DatagramChannel;
import io.netty.channel.socket.DatagramPacket;
import io.netty.channel.socket.InternetProtocolFamily;
import io.netty.channel.socket.nio.NioDatagramChannel;
import io.netty.handler.codec.MessageToMessageEncoder;
import io.netty.util.internal.StringUtil;

import java.io.Closeable;
import java.io.IOException;
import java.net.Inet6Address;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.NetworkInterface;
import java.net.UnknownHostException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ClusterStatus;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.ScheduledChore;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.protobuf.generated.ClusterStatusProtos;
import org.apache.hadoop.hbase.util.Addressing;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.ExceptionUtil;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.ReflectionUtils;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.util.VersionInfo;


/**
 * Class to publish the cluster status to the client. This allows them to know immediately
 *  the dead region servers, hence to cut the connection they have with them, eventually stop
 *  waiting on the socket. This improves the mean time to recover, and as well allows to increase
 *  on the client the different timeouts, as the dead servers will be detected separately.
 */
@InterfaceAudience.Private
public class ClusterStatusPublisher extends ScheduledChore {
  /**
   * The implementation class used to publish the status. Default is null (no publish).
   * Use org.apache.hadoop.hbase.master.ClusterStatusPublisher.MulticastPublisher to multicast the
   * status.
   */
  public static final String STATUS_PUBLISHER_CLASS = "hbase.status.publisher.class";
  public static final Class
      DEFAULT_STATUS_PUBLISHER_CLASS =
      org.apache.hadoop.hbase.master.ClusterStatusPublisher.MulticastPublisher.class;

  /**
   * The minimum time between two status messages, in milliseconds.
   */
  public static final String STATUS_PUBLISH_PERIOD = "hbase.status.publish.period";
  public static final int DEFAULT_STATUS_PUBLISH_PERIOD = 10000;

  private long lastMessageTime = 0;
  private final HMaster master;
  private final int messagePeriod; // time between two message
  private final ConcurrentMap lastSent =
      new ConcurrentHashMap();
  private Publisher publisher;
  private boolean connected = false;

  /**
   * We want to limit the size of the protobuf message sent, do fit into a single packet.
   * a reasonable size for ip / ethernet is less than 1Kb.
   */
  public final static int MAX_SERVER_PER_MESSAGE = 10;

  /**
   * If a server dies, we're sending the information multiple times in case a receiver misses the
   * message.
   */
  public final static int NB_SEND = 5;

  public ClusterStatusPublisher(HMaster master, Configuration conf,
                                Class publisherClass)
      throws IOException {
    super("HBase clusterStatusPublisher for " + master.getName(), master, conf.getInt(
      STATUS_PUBLISH_PERIOD, DEFAULT_STATUS_PUBLISH_PERIOD));
    this.master = master;
    this.messagePeriod = conf.getInt(STATUS_PUBLISH_PERIOD, DEFAULT_STATUS_PUBLISH_PERIOD);
    try {
      this.publisher = publisherClass.newInstance();
    } catch (InstantiationException e) {
      throw new IOException("Can't create publisher " + publisherClass.getName(), e);
    } catch (IllegalAccessException e) {
      throw new IOException("Can't create publisher " + publisherClass.getName(), e);
    }
    this.publisher.connect(conf);
    connected = true;
  }

  // For tests only
  protected ClusterStatusPublisher() {
    master = null;
    messagePeriod = 0;
  }

  @Override
  protected void chore() {
    if (!connected) {
      return;
    }

    List sns = generateDeadServersListToSend();
    if (sns.isEmpty()) {
      // Nothing to send. Done.
      return;
    }

    final long curTime = EnvironmentEdgeManager.currentTime();
    if (lastMessageTime > curTime - messagePeriod) {
      // We already sent something less than 10 second ago. Done.
      return;
    }

    // Ok, we're going to send something then.
    lastMessageTime = curTime;

    // We're reusing an existing protobuf message, but we don't send everything.
    // This could be extended in the future, for example if we want to send stuff like the
    //  hbase:meta server name.
    ClusterStatus cs = new ClusterStatus(VersionInfo.getVersion(),
        master.getMasterFileSystem().getClusterId().toString(),
        null,
        sns,
        master.getServerName(),
        null,
        null,
        null,
        null);


    publisher.publish(cs);
  }

  protected void cleanup() {
    connected = false;
    publisher.close();
  }

  /**
   * Create the dead server to send. A dead server is sent NB_SEND times. We send at max
   * MAX_SERVER_PER_MESSAGE at a time. if there are too many dead servers, we send the newly
   * dead first.
   */
  protected List generateDeadServersListToSend() {
    // We're getting the message sent since last time, and add them to the list
    long since = EnvironmentEdgeManager.currentTime() - messagePeriod * 2;
    for (Pair dead : getDeadServers(since)) {
      lastSent.putIfAbsent(dead.getFirst(), 0);
    }

    // We're sending the new deads first.
    List> entries = new ArrayList>();
    entries.addAll(lastSent.entrySet());
    Collections.sort(entries, new Comparator>() {
      @Override
      public int compare(Map.Entry o1, Map.Entry o2) {
        return o1.getValue().compareTo(o2.getValue());
      }
    });

    // With a limit of MAX_SERVER_PER_MESSAGE
    int max = entries.size() > MAX_SERVER_PER_MESSAGE ? MAX_SERVER_PER_MESSAGE : entries.size();
    List res = new ArrayList(max);

    for (int i = 0; i < max; i++) {
      Map.Entry toSend = entries.get(i);
      if (toSend.getValue() >= (NB_SEND - 1)) {
        lastSent.remove(toSend.getKey());
      } else {
        lastSent.replace(toSend.getKey(), toSend.getValue(), toSend.getValue() + 1);
      }

      res.add(toSend.getKey());
    }

    return res;
  }

  /**
   * Get the servers which died since a given timestamp.
   * protected because it can be subclassed by the tests.
   */
  protected List> getDeadServers(long since) {
    if (master.getServerManager() == null) {
      return Collections.emptyList();
    }

    return master.getServerManager().getDeadServers().copyDeadServersSince(since);
  }


  public interface Publisher extends Closeable {

    void connect(Configuration conf) throws IOException;

    void publish(ClusterStatus cs);

    @Override
    void close();
  }

  @InterfaceAudience.LimitedPrivate(HBaseInterfaceAudience.CONFIG)
  public static class MulticastPublisher implements Publisher {
    private DatagramChannel channel;
    private final EventLoopGroup group = new NioEventLoopGroup(
        1, Threads.newDaemonThreadFactory("hbase-master-clusterStatusPublisher"));

    public MulticastPublisher() {
    }

    @Override
    public void connect(Configuration conf) throws IOException {
      String mcAddress = conf.get(HConstants.STATUS_MULTICAST_ADDRESS,
          HConstants.DEFAULT_STATUS_MULTICAST_ADDRESS);
      int port = conf.getInt(HConstants.STATUS_MULTICAST_PORT,
          HConstants.DEFAULT_STATUS_MULTICAST_PORT);

      final InetAddress ina;
      try {
        ina = InetAddress.getByName(mcAddress);
      } catch (UnknownHostException e) {
        close();
        throw new IOException("Can't connect to " + mcAddress, e);
      }

      final InetSocketAddress isa = new InetSocketAddress(mcAddress, port);

      InternetProtocolFamily family;
      InetAddress localAddress;
      if (ina instanceof Inet6Address) {
        localAddress = Addressing.getIp6Address();
        family = InternetProtocolFamily.IPv6;
      }else{
        localAddress = Addressing.getIp4Address();
        family = InternetProtocolFamily.IPv4;
      }
      NetworkInterface ni = NetworkInterface.getByInetAddress(localAddress);

      Bootstrap b = new Bootstrap();
      b.group(group)
      .channelFactory(new HBaseDatagramChannelFactory(NioDatagramChannel.class, family))
      .option(ChannelOption.SO_REUSEADDR, true)
      .handler(new ClusterStatusEncoder(isa));

      try {
        channel = (DatagramChannel) b.bind(new InetSocketAddress(0)).sync().channel();
        channel.joinGroup(ina, ni, null, channel.newPromise()).sync();
        channel.connect(isa).sync();
      } catch (InterruptedException e) {
        close();
        throw ExceptionUtil.asInterrupt(e);
      }
    }

    private static final class HBaseDatagramChannelFactory implements ChannelFactory {
      private final Class clazz;
      private InternetProtocolFamily family;

      HBaseDatagramChannelFactory(Class clazz, InternetProtocolFamily family) {
          this.clazz = clazz;
          this.family = family;
      }

      @Override
      public T newChannel() {
          try {
            return ReflectionUtils.instantiateWithCustomCtor(clazz.getName(),
              new Class[] { InternetProtocolFamily.class }, new Object[] { family });

          } catch (Throwable t) {
              throw new ChannelException("Unable to create Channel from class " + clazz, t);
          }
      }

      @Override
      public String toString() {
          return StringUtil.simpleClassName(clazz) + ".class";
      }
  }

    private static class ClusterStatusEncoder extends MessageToMessageEncoder {
      final private InetSocketAddress isa;

      private ClusterStatusEncoder(InetSocketAddress isa) {
        this.isa = isa;
      }

      @Override
      protected void encode(ChannelHandlerContext channelHandlerContext,
                            ClusterStatus clusterStatus, List objects) {
        ClusterStatusProtos.ClusterStatus csp = clusterStatus.convert();
        objects.add(new DatagramPacket(Unpooled.wrappedBuffer(csp.toByteArray()), isa));
      }
    }

    @Override
    public void publish(ClusterStatus cs) {
      channel.writeAndFlush(cs).syncUninterruptibly();
    }

    @Override
    public void close() {
      if (channel != null) {
        channel.close();
      }
      group.shutdownGracefully();
    }
  }
}