All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.master.assignment.RegionStates Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.master.assignment;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;

/**
 * RegionStates contains a set of Maps that describes the in-memory state of the AM, with
 * the regions available in the system, the region in transition, the offline regions and
 * the servers holding regions.
 */
@InterfaceAudience.Private
public class RegionStates {
  private static final Logger LOG = LoggerFactory.getLogger(RegionStates.class);

  protected static final State[] STATES_EXPECTED_ON_OPEN = new State[] {
    State.OPEN, // State may already be OPEN if we died after receiving the OPEN from regionserver
                // but before complete finish of AssignProcedure. HBASE-20100.
    State.OFFLINE, State.CLOSED,      // disable/offline
    State.SPLITTING, State.SPLIT,     // ServerCrashProcedure
    State.OPENING, State.FAILED_OPEN, // already in-progress (retrying)
  };

  protected static final State[] STATES_EXPECTED_ON_CLOSE = new State[] {
    State.SPLITTING, State.SPLIT, State.MERGING, // ServerCrashProcedure
    State.OPEN,                   // enabled/open
    State.CLOSING                 // already in-progress (retrying)
  };

  private static class AssignmentProcedureEvent extends ProcedureEvent {
    public AssignmentProcedureEvent(final RegionInfo regionInfo) {
      super(regionInfo);
    }
  }

  private static class ServerReportEvent extends ProcedureEvent {
    public ServerReportEvent(final ServerName serverName) {
      super(serverName);
    }
  }

  /**
   * Current Region State.
   * In-memory only. Not persisted.
   */
  // Mutable/Immutable? Changes have to be synchronized or not?
  // Data members are volatile which seems to say multi-threaded access is fine.
  // In the below we do check and set but the check state could change before
  // we do the set because no synchronization....which seems dodgy. Clear up
  // understanding here... how many threads accessing? Do locks make it so one
  // thread at a time working on a single Region's RegionStateNode? Lets presume
  // so for now. Odd is that elsewhere in this RegionStates, we synchronize on
  // the RegionStateNode instance. TODO.
  public static class RegionStateNode implements Comparable {
    private final RegionInfo regionInfo;
    private final ProcedureEvent event;

    private volatile RegionTransitionProcedure procedure = null;
    private volatile ServerName regionLocation = null;
    // notice that, the lastHost will only be updated when a region is successfully CLOSED through
    // UnassignProcedure, so do not use it for critical condition as the data maybe stale and unsync
    // with the data in meta.
    private volatile ServerName lastHost = null;
    /**
     * A Region-in-Transition (RIT) moves through states.
     * See {@link State} for complete list. A Region that
     * is opened moves from OFFLINE => OPENING => OPENED.
     */
    private volatile State state = State.OFFLINE;

    /**
     * Updated whenever a call to {@link #setRegionLocation(ServerName)}
     * or {@link #setState(State, State...)}.
     */
    private volatile long lastUpdate = 0;

    private volatile long openSeqNum = HConstants.NO_SEQNUM;

    public RegionStateNode(final RegionInfo regionInfo) {
      this.regionInfo = regionInfo;
      this.event = new AssignmentProcedureEvent(regionInfo);
    }

    /**
     * @param update new region state this node should be assigned.
     * @param expected current state should be in this given list of expected states
     * @return true, if current state is in expected list; otherwise false.
     */
    public boolean setState(final State update, final State... expected) {
      if (!isInState(expected)) {
        return false;
      }
      this.state = update;
      this.lastUpdate = EnvironmentEdgeManager.currentTime();
      return true;
    }

    /**
     * Put region into OFFLINE mode (set state and clear location).
     * @return Last recorded server deploy
     */
    public ServerName offline() {
      setState(State.OFFLINE);
      return setRegionLocation(null);
    }

    /**
     * Set new {@link State} but only if currently in expected State
     * (if not, throw {@link UnexpectedStateException}.
     */
    public void transitionState(final State update, final State... expected)
    throws UnexpectedStateException {
      if (!setState(update, expected)) {
        throw new UnexpectedStateException("Expected " + Arrays.toString(expected) +
          " so could move to " + update + " but current state=" + getState());
      }
    }

    public boolean isInState(final State... expected) {
      if (expected != null && expected.length > 0) {
        boolean expectedState = false;
        for (int i = 0; i < expected.length; ++i) {
          expectedState |= (getState() == expected[i]);
        }
        return expectedState;
      }
      return true;
    }

    public boolean isStuck() {
      return isInState(State.FAILED_OPEN) && getProcedure() != null;
    }

    public boolean isInTransition() {
      return getProcedure() != null;
    }

    public long getLastUpdate() {
      return procedure != null ? procedure.getLastUpdate() : lastUpdate;
    }

    public void setLastHost(final ServerName serverName) {
      this.lastHost = serverName;
    }

    public void setOpenSeqNum(final long seqId) {
      this.openSeqNum = seqId;
    }

    public ServerName setRegionLocation(final ServerName serverName) {
      ServerName lastRegionLocation = this.regionLocation;
      if (LOG.isTraceEnabled() && serverName == null) {
        LOG.trace("Tracking when we are set to null " + this, new Throwable("TRACE"));
      }
      this.regionLocation = serverName;
      this.lastUpdate = EnvironmentEdgeManager.currentTime();
      return lastRegionLocation;
    }

    public boolean setProcedure(final RegionTransitionProcedure proc) {
      if (this.procedure != null && this.procedure != proc) {
        return false;
      }
      this.procedure = proc;
      return true;
    }

    public boolean unsetProcedure(final RegionTransitionProcedure proc) {
      if (this.procedure != null && this.procedure != proc) {
        return false;
      }
      this.procedure = null;
      return true;
    }

    public RegionTransitionProcedure getProcedure() {
      return procedure;
    }

    public ProcedureEvent getProcedureEvent() {
      return event;
    }

    public RegionInfo getRegionInfo() {
      return regionInfo;
    }

    public TableName getTable() {
      return getRegionInfo().getTable();
    }

    public boolean isSystemTable() {
      return getTable().isSystemTable();
    }

    public ServerName getLastHost() {
      return lastHost;
    }

    public ServerName getRegionLocation() {
      return regionLocation;
    }

    public State getState() {
      return state;
    }

    public long getOpenSeqNum() {
      return openSeqNum;
    }

    public int getFormatVersion() {
      // we don't have any format for now
      // it should probably be in regionInfo.getFormatVersion()
      return 0;
    }

    public RegionState toRegionState() {
      return new RegionState(getRegionInfo(), getState(), getLastUpdate(), getRegionLocation());
    }

    @Override
    public int compareTo(final RegionStateNode other) {
      // NOTE: RegionInfo sort by table first, so we are relying on that.
      // we have a TestRegionState#testOrderedByTable() that check for that.
      return RegionInfo.COMPARATOR.compare(getRegionInfo(), other.getRegionInfo());
    }

    @Override
    public int hashCode() {
      return getRegionInfo().hashCode();
    }

    @Override
    public boolean equals(final Object other) {
      if (this == other) return true;
      if (!(other instanceof RegionStateNode)) return false;
      return compareTo((RegionStateNode)other) == 0;
    }

    @Override
    public String toString() {
      return toDescriptiveString();
    }

    public String toShortString() {
      // rit= is the current Region-In-Transition State -- see State enum.
      return String.format("rit=%s, location=%s", getState(), getRegionLocation());
    }

    public String toDescriptiveString() {
      return String.format("%s, table=%s, region=%s",
        toShortString(), getTable(), getRegionInfo().getEncodedName());
    }
  }

  // This comparator sorts the RegionStates by time stamp then Region name.
  // Comparing by timestamp alone can lead us to discard different RegionStates that happen
  // to share a timestamp.
  private static class RegionStateStampComparator implements Comparator {
    @Override
    public int compare(final RegionState l, final RegionState r) {
      int stampCmp = Long.compare(l.getStamp(), r.getStamp());
      return stampCmp != 0 ? stampCmp : RegionInfo.COMPARATOR.compare(l.getRegion(), r.getRegion());
    }
  }

  /**
   * Server State.
   */
  public enum ServerState {
    /**
     * Initial state. Available.
     */
    ONLINE,

    /**
     * Only server which carries meta can have this state. We will split wal for meta and then
     * assign meta first before splitting other wals.
     */
    SPLITTING_META,

    /**
     * Indicate that the meta splitting is done. We need this state so that the UnassignProcedure
     * for meta can safely quit. See the comments in UnassignProcedure.remoteCallFailed for more
     * details.
     */
    SPLITTING_META_DONE,

    /**
     * Server expired/crashed. Currently undergoing WAL splitting.
     */
    SPLITTING,

    /**
     * WAL splitting done. This state will be used to tell the UnassignProcedure that it can safely
     * quit. See the comments in UnassignProcedure.remoteCallFailed for more details.
     */
    OFFLINE
  }

  /**
   * State of Server; list of hosted regions, etc.
   */
  public static class ServerStateNode implements Comparable {
    private final ServerReportEvent reportEvent;

    private final Set regions;
    private final ServerName serverName;

    private volatile ServerState state = ServerState.ONLINE;

    public ServerStateNode(final ServerName serverName) {
      this.serverName = serverName;
      this.regions = ConcurrentHashMap.newKeySet();
      this.reportEvent = new ServerReportEvent(serverName);
    }

    public ServerName getServerName() {
      return serverName;
    }

    public ServerState getState() {
      return state;
    }

    public ProcedureEvent getReportEvent() {
      return reportEvent;
    }

    public boolean isInState(final ServerState... expected) {
      boolean expectedState = false;
      if (expected != null) {
        for (int i = 0; i < expected.length; ++i) {
          expectedState |= (state == expected[i]);
        }
      }
      return expectedState;
    }

    private void setState(final ServerState state) {
      this.state = state;
    }

    public Set getRegions() {
      return regions;
    }

    public int getRegionCount() {
      return regions.size();
    }

    public ArrayList getRegionInfoList() {
      ArrayList hris = new ArrayList(regions.size());
      for (RegionStateNode region: regions) {
        hris.add(region.getRegionInfo());
      }
      return hris;
    }

    public void addRegion(final RegionStateNode regionNode) {
      this.regions.add(regionNode);
    }

    public void removeRegion(final RegionStateNode regionNode) {
      this.regions.remove(regionNode);
    }

    @Override
    public int compareTo(final ServerStateNode other) {
      return getServerName().compareTo(other.getServerName());
    }

    @Override
    public int hashCode() {
      return getServerName().hashCode();
    }

    @Override
    public boolean equals(final Object other) {
      if (this == other) return true;
      if (!(other instanceof ServerStateNode)) return false;
      return compareTo((ServerStateNode)other) == 0;
    }

    @Override
    public String toString() {
      return String.format("name=%s, state=%s, regionCount=%d", getServerName(), getState(),
          getRegionCount());
    }
  }

  public final static RegionStateStampComparator REGION_STATE_STAMP_COMPARATOR =
      new RegionStateStampComparator();

  // TODO: Replace the ConcurrentSkipListMaps
  /**
   * RegionName -- i.e. RegionInfo.getRegionName() -- as bytes to {@link RegionStateNode}
   */
  private final ConcurrentSkipListMap regionsMap =
      new ConcurrentSkipListMap(Bytes.BYTES_COMPARATOR);

  private final ConcurrentSkipListMap regionInTransition =
    new ConcurrentSkipListMap(RegionInfo.COMPARATOR);

  /**
   * Regions marked as offline on a read of hbase:meta. Unused or at least, once
   * offlined, regions have no means of coming on line again. TODO.
   */
  private final ConcurrentSkipListMap regionOffline =
    new ConcurrentSkipListMap();

  private final ConcurrentSkipListMap regionFailedOpen =
    new ConcurrentSkipListMap(Bytes.BYTES_COMPARATOR);

  private final ConcurrentHashMap serverMap =
      new ConcurrentHashMap();

  public RegionStates() { }

  public void clear() {
    regionsMap.clear();
    regionInTransition.clear();
    regionOffline.clear();
    serverMap.clear();
  }

  @VisibleForTesting
  public boolean isRegionInRegionStates(final RegionInfo hri) {
    return (regionsMap.containsKey(hri.getRegionName()) || regionInTransition.containsKey(hri)
        || regionOffline.containsKey(hri));
  }

  // ==========================================================================
  //  RegionStateNode helpers
  // ==========================================================================
  protected RegionStateNode createRegionStateNode(final RegionInfo regionInfo) {
    RegionStateNode newNode = new RegionStateNode(regionInfo);
    RegionStateNode oldNode = regionsMap.putIfAbsent(regionInfo.getRegionName(), newNode);
    return oldNode != null ? oldNode : newNode;
  }

  protected RegionStateNode getOrCreateRegionStateNode(final RegionInfo regionInfo) {
    RegionStateNode node = regionsMap.get(regionInfo.getRegionName());
    return node != null ? node : createRegionStateNode(regionInfo);
  }

  RegionStateNode getRegionStateNodeFromName(final byte[] regionName) {
    return regionsMap.get(regionName);
  }

  public RegionStateNode getRegionStateNode(final RegionInfo regionInfo) {
    return getRegionStateNodeFromName(regionInfo.getRegionName());
  }

  public void deleteRegion(final RegionInfo regionInfo) {
    regionsMap.remove(regionInfo.getRegionName());
    // See HBASE-20860
    // After master restarts, merged regions' RIT state may not be cleaned,
    // making sure they are cleaned here
    if (regionInTransition.containsKey(regionInfo)) {
      regionInTransition.remove(regionInfo);
    }
    // Remove from the offline regions map too if there.
    if (this.regionOffline.containsKey(regionInfo)) {
      if (LOG.isTraceEnabled()) LOG.trace("Removing from regionOffline Map: " + regionInfo);
      this.regionOffline.remove(regionInfo);
    }
  }

  public void deleteRegions(final List regionInfos) {
    regionInfos.forEach(this::deleteRegion);
  }

  ArrayList getTableRegionStateNodes(final TableName tableName) {
    final ArrayList regions = new ArrayList();
    for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) {
      if (!node.getTable().equals(tableName)) break;
      regions.add(node);
    }
    return regions;
  }

  ArrayList getTableRegionStates(final TableName tableName) {
    final ArrayList regions = new ArrayList();
    for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) {
      if (!node.getTable().equals(tableName)) break;
      regions.add(node.toRegionState());
    }
    return regions;
  }

  ArrayList getTableRegionsInfo(final TableName tableName) {
    final ArrayList regions = new ArrayList();
    for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) {
      if (!node.getTable().equals(tableName)) break;
      regions.add(node.getRegionInfo());
    }
    return regions;
  }

  Collection getRegionStateNodes() {
    return regionsMap.values();
  }

  public ArrayList getRegionStates() {
    final ArrayList regions = new ArrayList(regionsMap.size());
    for (RegionStateNode node: regionsMap.values()) {
      regions.add(node.toRegionState());
    }
    return regions;
  }

  // ==========================================================================
  //  RegionState helpers
  // ==========================================================================
  public RegionState getRegionState(final RegionInfo regionInfo) {
    RegionStateNode regionStateNode = getRegionStateNode(regionInfo);
    return regionStateNode == null ? null : regionStateNode.toRegionState();
  }

  public RegionState getRegionState(final String encodedRegionName) {
    // TODO: Need a map  but it is just dispatch merge...
    for (RegionStateNode node: regionsMap.values()) {
      if (node.getRegionInfo().getEncodedName().equals(encodedRegionName)) {
        return node.toRegionState();
      }
    }
    return null;
  }

  // ============================================================================================
  //  TODO: helpers
  // ============================================================================================
  public boolean hasTableRegionStates(final TableName tableName) {
    // TODO
    return !getTableRegionStates(tableName).isEmpty();
  }

  /**
   * @return Return online regions of table; does not include OFFLINE or SPLITTING regions.
   */
  public List getRegionsOfTable(final TableName table) {
    return getRegionsOfTable(table, false);
  }

  private HRegionLocation createRegionForReopen(RegionStateNode node) {
    synchronized (node) {
      if (!include(node, false)) {
        return null;
      }
      if (node.isInState(State.OPEN)) {
        return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(),
          node.getOpenSeqNum());
      } else if (node.isInState(State.OPENING)) {
        return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), -1);
      } else {
        return null;
      }
    }
  }

  /**
   * Get the regions to be reopened when modifying a table.
   * 

* Notice that the {@code openSeqNum} in the returned HRegionLocation is also used to indicate the * state of this region, positive means the region is in {@link State#OPEN}, -1 means * {@link State#OPENING}. And for regions in other states we do not need reopen them. */ public List getRegionsOfTableForReopen(TableName tableName) { return getTableRegionStateNodes(tableName).stream().map(this::createRegionForReopen) .filter(r -> r != null).collect(Collectors.toList()); } /** * Check whether the region has been reopened. The meaning of the {@link HRegionLocation} is the * same with {@link #getRegionsOfTableForReopen(TableName)}. *

* For a region which is in {@link State#OPEN} before, if the region state is changed or the open * seq num is changed, we can confirm that it has been reopened. *

* For a region which is in {@link State#OPENING} before, usually it will be in {@link State#OPEN} * now and we will schedule a MRP to reopen it. But there are several exceptions: *

    *
  • The region is in state other than {@link State#OPEN} or {@link State#OPENING}.
  • *
  • The location of the region has been changed
  • *
* Of course the region could still be in {@link State#OPENING} state and still on the same * server, then here we will still return a {@link HRegionLocation} for it, just like * {@link #getRegionsOfTableForReopen(TableName)}. * @param oldLoc the previous state/location of this region * @return null if the region has been reopened, otherwise a new {@link HRegionLocation} which * means we still need to reopen the region. * @see #getRegionsOfTableForReopen(TableName) */ public HRegionLocation checkReopened(HRegionLocation oldLoc) { RegionStateNode node = getRegionStateNode(oldLoc.getRegion()); // HBASE-20921 // if the oldLoc's state node does not exist, that means the region is // merged or split, no need to check it if (node == null) { return null; } synchronized (node) { if (oldLoc.getSeqNum() >= 0) { // in OPEN state before if (node.isInState(State.OPEN)) { if (node.getOpenSeqNum() > oldLoc.getSeqNum()) { // normal case, the region has been reopened return null; } else { // the open seq num does not change, need to reopen again return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), node.getOpenSeqNum()); } } else { // the state has been changed so we can make sure that the region has been reopened(not // finished maybe, but not a problem). return null; } } else { // in OPENING state before if (!node.isInState(State.OPEN, State.OPENING)) { // not in OPEN or OPENING state, then we can make sure that the region has been // reopened(not finished maybe, but not a problem) return null; } else { if (!node.getRegionLocation().equals(oldLoc.getServerName())) { // the region has been moved, so we can make sure that the region has been reopened. return null; } // normal case, we are still in OPENING state, or the reopen has been opened and the state // is changed to OPEN. long openSeqNum = node.isInState(State.OPEN) ? node.getOpenSeqNum() : -1; return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), openSeqNum); } } } } /** * @return Return online regions of table; does not include OFFLINE or SPLITTING regions. */ public List getRegionsOfTable(TableName table, boolean offline) { return getRegionsOfTable(table, state -> include(state, offline)); } /** * @return Return the regions of the table; does not include OFFLINE unless you set * offline to true. Does not include regions that are in the * {@link State#SPLIT} state. */ private List getRegionsOfTable(TableName table, Predicate filter) { return getTableRegionStateNodes(table).stream().filter(filter).map(n -> n.getRegionInfo()) .collect(Collectors.toList()); } /** * Utility. Whether to include region in list of regions. Default is to * weed out split and offline regions. * @return True if we should include the node (do not include * if split or offline unless offline is set to true. */ boolean include(final RegionStateNode node, final boolean offline) { if (LOG.isTraceEnabled()) { LOG.trace("WORKING ON " + node + " " + node.getRegionInfo()); } if (node.isInState(State.SPLIT)) return false; if (node.isInState(State.OFFLINE) && !offline) return false; final RegionInfo hri = node.getRegionInfo(); return (!hri.isOffline() && !hri.isSplit()) || ((hri.isOffline() || hri.isSplit()) && offline); } /** * Returns the set of regions hosted by the specified server * @param serverName the server we are interested in * @return set of RegionInfo hosted by the specified server */ public List getServerRegionInfoSet(final ServerName serverName) { final ServerStateNode serverInfo = getServerNode(serverName); if (serverInfo == null) return Collections.emptyList(); synchronized (serverInfo) { return serverInfo.getRegionInfoList(); } } // ============================================================================================ // Split helpers // These methods will only be called in ServerCrashProcedure, and at the end of SCP we will remove // the ServerStateNode by calling removeServer. // ============================================================================================ private void setServerState(ServerName serverName, ServerState state) { ServerStateNode serverNode = getOrCreateServer(serverName); synchronized (serverNode) { serverNode.setState(state); } } /** * Call this when we start meta log splitting a crashed Server. * @see #metaLogSplit(ServerName) */ public void metaLogSplitting(ServerName serverName) { setServerState(serverName, ServerState.SPLITTING_META); } /** * Called after we've split the meta logs on a crashed Server. * @see #metaLogSplitting(ServerName) */ public void metaLogSplit(ServerName serverName) { setServerState(serverName, ServerState.SPLITTING_META_DONE); } /** * Call this when we start log splitting for a crashed Server. * @see #logSplit(ServerName) */ public void logSplitting(final ServerName serverName) { setServerState(serverName, ServerState.SPLITTING); } /** * Called after we've split all logs on a crashed Server. * @see #logSplitting(ServerName) */ public void logSplit(final ServerName serverName) { setServerState(serverName, ServerState.OFFLINE); } public void updateRegionState(final RegionInfo regionInfo, final State state) { final RegionStateNode regionNode = getOrCreateRegionStateNode(regionInfo); synchronized (regionNode) { regionNode.setState(state); } } // ============================================================================================ // TODO: // ============================================================================================ public List getAssignedRegions() { final List result = new ArrayList(); for (RegionStateNode node: regionsMap.values()) { if (!node.isInTransition()) { result.add(node.getRegionInfo()); } } return result; } public boolean isRegionInState(final RegionInfo regionInfo, final State... state) { final RegionStateNode region = getRegionStateNode(regionInfo); if (region != null) { synchronized (region) { return region.isInState(state); } } return false; } public boolean isRegionOnline(final RegionInfo regionInfo) { return isRegionInState(regionInfo, State.OPEN); } /** * @return True if region is offline (In OFFLINE or CLOSED state). */ public boolean isRegionOffline(final RegionInfo regionInfo) { return isRegionInState(regionInfo, State.OFFLINE, State.CLOSED); } public Map> getSnapShotOfAssignment( final Collection regions) { final Map> result = new HashMap>(); if (regions != null) { for (RegionInfo hri : regions) { final RegionStateNode node = getRegionStateNode(hri); if (node == null) { continue; } createSnapshot(node, result); } } else { for (RegionStateNode node : regionsMap.values()) { if (node == null) { continue; } createSnapshot(node, result); } } return result; } private void createSnapshot(RegionStateNode node, Map> result) { final ServerName serverName = node.getRegionLocation(); if (serverName == null) { return; } List serverRegions = result.get(serverName); if (serverRegions == null) { serverRegions = new ArrayList(); result.put(serverName, serverRegions); } serverRegions.add(node.getRegionInfo()); } public Map getRegionAssignments() { final HashMap assignments = new HashMap(); for (RegionStateNode node: regionsMap.values()) { assignments.put(node.getRegionInfo(), node.getRegionLocation()); } return assignments; } public Map> getRegionByStateOfTable(TableName tableName) { final State[] states = State.values(); final Map> tableRegions = new HashMap>(states.length); for (int i = 0; i < states.length; ++i) { tableRegions.put(states[i], new ArrayList()); } for (RegionStateNode node: regionsMap.values()) { if (node.getTable().equals(tableName)) { tableRegions.get(node.getState()).add(node.getRegionInfo()); } } return tableRegions; } public ServerName getRegionServerOfRegion(final RegionInfo regionInfo) { final RegionStateNode region = getRegionStateNode(regionInfo); if (region != null) { synchronized (region) { ServerName server = region.getRegionLocation(); return server != null ? server : region.getLastHost(); } } return null; } /** * This is an EXPENSIVE clone. Cloning though is the safest thing to do. * Can't let out original since it can change and at least the load balancer * wants to iterate this exported list. We need to synchronize on regions * since all access to this.servers is under a lock on this.regions. * * @param isByTable If true, return the assignments by table. If false, * return the assignments which aggregate the server-load to the cluster level. * @return A clone of current assignments. */ public Map>> getAssignmentsForBalancer( boolean isByTable) { final Map>> result = new HashMap<>(); if (isByTable) { for (RegionStateNode node : regionsMap.values()) { Map> tableResult = result.computeIfAbsent(node.getTable(), t -> new HashMap<>()); final ServerName serverName = node.getRegionLocation(); if (serverName == null) { LOG.info("Skipping, no server for " + node); continue; } List serverResult = tableResult.computeIfAbsent(serverName, s -> new ArrayList<>()); serverResult.add(node.getRegionInfo()); } // Add online servers with no assignment for the table. for (Map> table : result.values()) { for (ServerName serverName : serverMap.keySet()) { table.putIfAbsent(serverName, new ArrayList<>()); } } } else { final HashMap> ensemble = new HashMap<>(serverMap.size()); for (ServerStateNode serverNode : serverMap.values()) { ensemble.put(serverNode.getServerName(), serverNode.getRegionInfoList()); } // Use a fake table name to represent the whole cluster's assignments result.put(HConstants.ENSEMBLE_TABLE_NAME, ensemble); } return result; } // ========================================================================== // Region in transition helpers // ========================================================================== protected boolean addRegionInTransition(final RegionStateNode regionNode, final RegionTransitionProcedure procedure) { if (procedure != null && !regionNode.setProcedure(procedure)) return false; regionInTransition.put(regionNode.getRegionInfo(), regionNode); return true; } protected void removeRegionInTransition(final RegionStateNode regionNode, final RegionTransitionProcedure procedure) { regionInTransition.remove(regionNode.getRegionInfo()); regionNode.unsetProcedure(procedure); } public boolean hasRegionsInTransition() { return !regionInTransition.isEmpty(); } public boolean isRegionInTransition(final RegionInfo regionInfo) { final RegionStateNode node = regionInTransition.get(regionInfo); return node != null ? node.isInTransition() : false; } /** * @return If a procedure-in-transition for hri, return it else null. */ public RegionTransitionProcedure getRegionTransitionProcedure(final RegionInfo hri) { RegionStateNode node = regionInTransition.get(hri); if (node == null) return null; return node.getProcedure(); } public RegionState getRegionTransitionState(final RegionInfo hri) { RegionStateNode node = regionInTransition.get(hri); if (node == null) return null; synchronized (node) { return node.isInTransition() ? node.toRegionState() : null; } } public List getRegionsInTransition() { return new ArrayList(regionInTransition.values()); } /** * Get the number of regions in transition. */ public int getRegionsInTransitionCount() { return regionInTransition.size(); } public List getRegionsStateInTransition() { final List rit = new ArrayList(regionInTransition.size()); for (RegionStateNode node: regionInTransition.values()) { rit.add(node.toRegionState()); } return rit; } public SortedSet getRegionsInTransitionOrderedByTimestamp() { final SortedSet rit = new TreeSet(REGION_STATE_STAMP_COMPARATOR); for (RegionStateNode node: regionInTransition.values()) { rit.add(node.toRegionState()); } return rit; } // ========================================================================== // Region offline helpers // ========================================================================== // TODO: Populated when we read meta but regions never make it out of here. public void addToOfflineRegions(final RegionStateNode regionNode) { LOG.info("Added to offline, CURRENTLY NEVER CLEARED!!! " + regionNode); regionOffline.put(regionNode.getRegionInfo(), regionNode); } // TODO: Unused. public void removeFromOfflineRegions(final RegionInfo regionInfo) { regionOffline.remove(regionInfo); } // ========================================================================== // Region FAIL_OPEN helpers // ========================================================================== public static final class RegionFailedOpen { private final RegionStateNode regionNode; private volatile Exception exception = null; private AtomicInteger retries = new AtomicInteger(); public RegionFailedOpen(final RegionStateNode regionNode) { this.regionNode = regionNode; } public RegionStateNode getRegionStateNode() { return regionNode; } public RegionInfo getRegionInfo() { return regionNode.getRegionInfo(); } public int incrementAndGetRetries() { return this.retries.incrementAndGet(); } public int getRetries() { return retries.get(); } public void setException(final Exception exception) { this.exception = exception; } public Exception getException() { return this.exception; } } public RegionFailedOpen addToFailedOpen(final RegionStateNode regionNode) { final byte[] key = regionNode.getRegionInfo().getRegionName(); RegionFailedOpen node = regionFailedOpen.get(key); if (node == null) { RegionFailedOpen newNode = new RegionFailedOpen(regionNode); RegionFailedOpen oldNode = regionFailedOpen.putIfAbsent(key, newNode); node = oldNode != null ? oldNode : newNode; } return node; } public RegionFailedOpen getFailedOpen(final RegionInfo regionInfo) { return regionFailedOpen.get(regionInfo.getRegionName()); } public void removeFromFailedOpen(final RegionInfo regionInfo) { regionFailedOpen.remove(regionInfo.getRegionName()); } public List getRegionFailedOpen() { if (regionFailedOpen.isEmpty()) return Collections.emptyList(); ArrayList regions = new ArrayList(regionFailedOpen.size()); for (RegionFailedOpen r: regionFailedOpen.values()) { regions.add(r.getRegionStateNode().toRegionState()); } return regions; } // ========================================================================== // Servers // ========================================================================== /** * Be judicious calling this method. Do it on server register ONLY otherwise * you could mess up online server accounting. TOOD: Review usage and convert * to {@link #getServerNode(ServerName)} where we can. */ ServerStateNode getOrCreateServer(final ServerName serverName) { ServerStateNode node = serverMap.get(serverName); if (node == null) { LOG.trace("CREATING! {}", serverName, new RuntimeException("WHERE AM I?")); node = new ServerStateNode(serverName); ServerStateNode oldNode = serverMap.putIfAbsent(serverName, node); node = oldNode != null ? oldNode : node; } return node; } public void removeServer(final ServerName serverName) { serverMap.remove(serverName); } public ServerStateNode getServerNode(final ServerName serverName) { return serverMap.get(serverName); } public double getAverageLoad() { int numServers = 0; int totalLoad = 0; for (ServerStateNode node: serverMap.values()) { totalLoad += node.getRegionCount(); numServers++; } return numServers == 0 ? 0.0: (double)totalLoad / (double)numServers; } /** * Add reference to region to serverstatenode. * DOES NOT AUTO-CREATE ServerStateNode instance. * @return Return serverstatenode or null if none. */ ServerStateNode addRegionToServer(final RegionStateNode regionNode) { ServerStateNode ssn = getServerNode(regionNode.getRegionLocation()); if (ssn == null) { return ssn; } ssn.addRegion(regionNode); return ssn; } public boolean isReplicaAvailableForRegion(final RegionInfo info) { // if the region info itself is a replica return true. if (!RegionReplicaUtil.isDefaultReplica(info)) { return true; } // iterate the regionsMap for the given region name. If there are replicas it should // list them in order. for (RegionStateNode node : regionsMap.tailMap(info.getRegionName()).values()) { if (!node.getTable().equals(info.getTable()) || !ServerRegionReplicaUtil.isReplicasForSameRegion(info, node.getRegionInfo())) { break; } else if (!RegionReplicaUtil.isDefaultReplica(node.getRegionInfo())) { // we have replicas return true; } } // we don have replicas return false; } public ServerStateNode removeRegionFromServer(final ServerName serverName, final RegionStateNode regionNode) { ServerStateNode serverNode = getServerNode(serverName); if (serverNode != null) { serverNode.removeRegion(regionNode); } return serverNode; } // ========================================================================== // ToString helpers // ========================================================================== public static String regionNamesToString(final Collection regions) { final StringBuilder sb = new StringBuilder(); final Iterator it = regions.iterator(); sb.append("["); if (it.hasNext()) { sb.append(Bytes.toStringBinary(it.next())); while (it.hasNext()) { sb.append(", "); sb.append(Bytes.toStringBinary(it.next())); } } sb.append("]"); return sb.toString(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy