org.apache.hadoop.hbase.master.assignment.RegionStates Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hbase-server Show documentation
Show all versions of hbase-server Show documentation
Server functionality for HBase
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.master.assignment;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentSkipListMap;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.RegionInfo;
import org.apache.hadoop.hbase.client.RegionReplicaUtil;
import org.apache.hadoop.hbase.exceptions.UnexpectedStateException;
import org.apache.hadoop.hbase.master.RegionState;
import org.apache.hadoop.hbase.master.RegionState.State;
import org.apache.hadoop.hbase.procedure2.ProcedureEvent;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hbase.thirdparty.com.google.common.annotations.VisibleForTesting;
/**
* RegionStates contains a set of Maps that describes the in-memory state of the AM, with
* the regions available in the system, the region in transition, the offline regions and
* the servers holding regions.
*/
@InterfaceAudience.Private
public class RegionStates {
private static final Logger LOG = LoggerFactory.getLogger(RegionStates.class);
protected static final State[] STATES_EXPECTED_ON_OPEN = new State[] {
State.OPEN, // State may already be OPEN if we died after receiving the OPEN from regionserver
// but before complete finish of AssignProcedure. HBASE-20100.
State.OFFLINE, State.CLOSED, // disable/offline
State.SPLITTING, State.SPLIT, // ServerCrashProcedure
State.OPENING, State.FAILED_OPEN, // already in-progress (retrying)
};
protected static final State[] STATES_EXPECTED_ON_CLOSE = new State[] {
State.SPLITTING, State.SPLIT, State.MERGING, // ServerCrashProcedure
State.OPEN, // enabled/open
State.CLOSING // already in-progress (retrying)
};
private static class AssignmentProcedureEvent extends ProcedureEvent {
public AssignmentProcedureEvent(final RegionInfo regionInfo) {
super(regionInfo);
}
}
private static class ServerReportEvent extends ProcedureEvent {
public ServerReportEvent(final ServerName serverName) {
super(serverName);
}
}
/**
* Current Region State.
* In-memory only. Not persisted.
*/
// Mutable/Immutable? Changes have to be synchronized or not?
// Data members are volatile which seems to say multi-threaded access is fine.
// In the below we do check and set but the check state could change before
// we do the set because no synchronization....which seems dodgy. Clear up
// understanding here... how many threads accessing? Do locks make it so one
// thread at a time working on a single Region's RegionStateNode? Lets presume
// so for now. Odd is that elsewhere in this RegionStates, we synchronize on
// the RegionStateNode instance. TODO.
public static class RegionStateNode implements Comparable {
private final RegionInfo regionInfo;
private final ProcedureEvent> event;
private volatile RegionTransitionProcedure procedure = null;
private volatile ServerName regionLocation = null;
// notice that, the lastHost will only be updated when a region is successfully CLOSED through
// UnassignProcedure, so do not use it for critical condition as the data maybe stale and unsync
// with the data in meta.
private volatile ServerName lastHost = null;
/**
* A Region-in-Transition (RIT) moves through states.
* See {@link State} for complete list. A Region that
* is opened moves from OFFLINE => OPENING => OPENED.
*/
private volatile State state = State.OFFLINE;
/**
* Updated whenever a call to {@link #setRegionLocation(ServerName)}
* or {@link #setState(State, State...)}.
*/
private volatile long lastUpdate = 0;
private volatile long openSeqNum = HConstants.NO_SEQNUM;
public RegionStateNode(final RegionInfo regionInfo) {
this.regionInfo = regionInfo;
this.event = new AssignmentProcedureEvent(regionInfo);
}
/**
* @param update new region state this node should be assigned.
* @param expected current state should be in this given list of expected states
* @return true, if current state is in expected list; otherwise false.
*/
public boolean setState(final State update, final State... expected) {
if (!isInState(expected)) {
return false;
}
this.state = update;
this.lastUpdate = EnvironmentEdgeManager.currentTime();
return true;
}
/**
* Put region into OFFLINE mode (set state and clear location).
* @return Last recorded server deploy
*/
public ServerName offline() {
setState(State.OFFLINE);
return setRegionLocation(null);
}
/**
* Set new {@link State} but only if currently in expected
State
* (if not, throw {@link UnexpectedStateException}.
*/
public void transitionState(final State update, final State... expected)
throws UnexpectedStateException {
if (!setState(update, expected)) {
throw new UnexpectedStateException("Expected " + Arrays.toString(expected) +
" so could move to " + update + " but current state=" + getState());
}
}
public boolean isInState(final State... expected) {
if (expected != null && expected.length > 0) {
boolean expectedState = false;
for (int i = 0; i < expected.length; ++i) {
expectedState |= (getState() == expected[i]);
}
return expectedState;
}
return true;
}
public boolean isStuck() {
return isInState(State.FAILED_OPEN) && getProcedure() != null;
}
public boolean isInTransition() {
return getProcedure() != null;
}
public long getLastUpdate() {
return procedure != null ? procedure.getLastUpdate() : lastUpdate;
}
public void setLastHost(final ServerName serverName) {
this.lastHost = serverName;
}
public void setOpenSeqNum(final long seqId) {
this.openSeqNum = seqId;
}
public ServerName setRegionLocation(final ServerName serverName) {
ServerName lastRegionLocation = this.regionLocation;
if (LOG.isTraceEnabled() && serverName == null) {
LOG.trace("Tracking when we are set to null " + this, new Throwable("TRACE"));
}
this.regionLocation = serverName;
this.lastUpdate = EnvironmentEdgeManager.currentTime();
return lastRegionLocation;
}
public boolean setProcedure(final RegionTransitionProcedure proc) {
if (this.procedure != null && this.procedure != proc) {
return false;
}
this.procedure = proc;
return true;
}
public boolean unsetProcedure(final RegionTransitionProcedure proc) {
if (this.procedure != null && this.procedure != proc) {
return false;
}
this.procedure = null;
return true;
}
public RegionTransitionProcedure getProcedure() {
return procedure;
}
public ProcedureEvent> getProcedureEvent() {
return event;
}
public RegionInfo getRegionInfo() {
return regionInfo;
}
public TableName getTable() {
return getRegionInfo().getTable();
}
public boolean isSystemTable() {
return getTable().isSystemTable();
}
public ServerName getLastHost() {
return lastHost;
}
public ServerName getRegionLocation() {
return regionLocation;
}
public State getState() {
return state;
}
public long getOpenSeqNum() {
return openSeqNum;
}
public int getFormatVersion() {
// we don't have any format for now
// it should probably be in regionInfo.getFormatVersion()
return 0;
}
public RegionState toRegionState() {
return new RegionState(getRegionInfo(), getState(), getLastUpdate(), getRegionLocation());
}
@Override
public int compareTo(final RegionStateNode other) {
// NOTE: RegionInfo sort by table first, so we are relying on that.
// we have a TestRegionState#testOrderedByTable() that check for that.
return RegionInfo.COMPARATOR.compare(getRegionInfo(), other.getRegionInfo());
}
@Override
public int hashCode() {
return getRegionInfo().hashCode();
}
@Override
public boolean equals(final Object other) {
if (this == other) return true;
if (!(other instanceof RegionStateNode)) return false;
return compareTo((RegionStateNode)other) == 0;
}
@Override
public String toString() {
return toDescriptiveString();
}
public String toShortString() {
// rit= is the current Region-In-Transition State -- see State enum.
return String.format("rit=%s, location=%s", getState(), getRegionLocation());
}
public String toDescriptiveString() {
return String.format("%s, table=%s, region=%s",
toShortString(), getTable(), getRegionInfo().getEncodedName());
}
}
// This comparator sorts the RegionStates by time stamp then Region name.
// Comparing by timestamp alone can lead us to discard different RegionStates that happen
// to share a timestamp.
private static class RegionStateStampComparator implements Comparator {
@Override
public int compare(final RegionState l, final RegionState r) {
int stampCmp = Long.compare(l.getStamp(), r.getStamp());
return stampCmp != 0 ? stampCmp : RegionInfo.COMPARATOR.compare(l.getRegion(), r.getRegion());
}
}
/**
* Server State.
*/
public enum ServerState {
/**
* Initial state. Available.
*/
ONLINE,
/**
* Only server which carries meta can have this state. We will split wal for meta and then
* assign meta first before splitting other wals.
*/
SPLITTING_META,
/**
* Indicate that the meta splitting is done. We need this state so that the UnassignProcedure
* for meta can safely quit. See the comments in UnassignProcedure.remoteCallFailed for more
* details.
*/
SPLITTING_META_DONE,
/**
* Server expired/crashed. Currently undergoing WAL splitting.
*/
SPLITTING,
/**
* WAL splitting done. This state will be used to tell the UnassignProcedure that it can safely
* quit. See the comments in UnassignProcedure.remoteCallFailed for more details.
*/
OFFLINE
}
/**
* State of Server; list of hosted regions, etc.
*/
public static class ServerStateNode implements Comparable {
private final ServerReportEvent reportEvent;
private final Set regions;
private final ServerName serverName;
private volatile ServerState state = ServerState.ONLINE;
public ServerStateNode(final ServerName serverName) {
this.serverName = serverName;
this.regions = ConcurrentHashMap.newKeySet();
this.reportEvent = new ServerReportEvent(serverName);
}
public ServerName getServerName() {
return serverName;
}
public ServerState getState() {
return state;
}
public ProcedureEvent> getReportEvent() {
return reportEvent;
}
public boolean isInState(final ServerState... expected) {
boolean expectedState = false;
if (expected != null) {
for (int i = 0; i < expected.length; ++i) {
expectedState |= (state == expected[i]);
}
}
return expectedState;
}
private void setState(final ServerState state) {
this.state = state;
}
public Set getRegions() {
return regions;
}
public int getRegionCount() {
return regions.size();
}
public ArrayList getRegionInfoList() {
ArrayList hris = new ArrayList(regions.size());
for (RegionStateNode region: regions) {
hris.add(region.getRegionInfo());
}
return hris;
}
public void addRegion(final RegionStateNode regionNode) {
this.regions.add(regionNode);
}
public void removeRegion(final RegionStateNode regionNode) {
this.regions.remove(regionNode);
}
@Override
public int compareTo(final ServerStateNode other) {
return getServerName().compareTo(other.getServerName());
}
@Override
public int hashCode() {
return getServerName().hashCode();
}
@Override
public boolean equals(final Object other) {
if (this == other) return true;
if (!(other instanceof ServerStateNode)) return false;
return compareTo((ServerStateNode)other) == 0;
}
@Override
public String toString() {
return String.format("name=%s, state=%s, regionCount=%d", getServerName(), getState(),
getRegionCount());
}
}
public final static RegionStateStampComparator REGION_STATE_STAMP_COMPARATOR =
new RegionStateStampComparator();
// TODO: Replace the ConcurrentSkipListMaps
/**
* RegionName -- i.e. RegionInfo.getRegionName() -- as bytes to {@link RegionStateNode}
*/
private final ConcurrentSkipListMap regionsMap =
new ConcurrentSkipListMap(Bytes.BYTES_COMPARATOR);
private final ConcurrentSkipListMap regionInTransition =
new ConcurrentSkipListMap(RegionInfo.COMPARATOR);
/**
* Regions marked as offline on a read of hbase:meta. Unused or at least, once
* offlined, regions have no means of coming on line again. TODO.
*/
private final ConcurrentSkipListMap regionOffline =
new ConcurrentSkipListMap();
private final ConcurrentSkipListMap regionFailedOpen =
new ConcurrentSkipListMap(Bytes.BYTES_COMPARATOR);
private final ConcurrentHashMap serverMap =
new ConcurrentHashMap();
public RegionStates() { }
public void clear() {
regionsMap.clear();
regionInTransition.clear();
regionOffline.clear();
serverMap.clear();
}
@VisibleForTesting
public boolean isRegionInRegionStates(final RegionInfo hri) {
return (regionsMap.containsKey(hri.getRegionName()) || regionInTransition.containsKey(hri)
|| regionOffline.containsKey(hri));
}
// ==========================================================================
// RegionStateNode helpers
// ==========================================================================
protected RegionStateNode createRegionStateNode(final RegionInfo regionInfo) {
RegionStateNode newNode = new RegionStateNode(regionInfo);
RegionStateNode oldNode = regionsMap.putIfAbsent(regionInfo.getRegionName(), newNode);
return oldNode != null ? oldNode : newNode;
}
protected RegionStateNode getOrCreateRegionStateNode(final RegionInfo regionInfo) {
RegionStateNode node = regionsMap.get(regionInfo.getRegionName());
return node != null ? node : createRegionStateNode(regionInfo);
}
RegionStateNode getRegionStateNodeFromName(final byte[] regionName) {
return regionsMap.get(regionName);
}
public RegionStateNode getRegionStateNode(final RegionInfo regionInfo) {
return getRegionStateNodeFromName(regionInfo.getRegionName());
}
public void deleteRegion(final RegionInfo regionInfo) {
regionsMap.remove(regionInfo.getRegionName());
// See HBASE-20860
// After master restarts, merged regions' RIT state may not be cleaned,
// making sure they are cleaned here
if (regionInTransition.containsKey(regionInfo)) {
regionInTransition.remove(regionInfo);
}
// Remove from the offline regions map too if there.
if (this.regionOffline.containsKey(regionInfo)) {
if (LOG.isTraceEnabled()) LOG.trace("Removing from regionOffline Map: " + regionInfo);
this.regionOffline.remove(regionInfo);
}
}
public void deleteRegions(final List regionInfos) {
regionInfos.forEach(this::deleteRegion);
}
ArrayList getTableRegionStateNodes(final TableName tableName) {
final ArrayList regions = new ArrayList();
for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) {
if (!node.getTable().equals(tableName)) break;
regions.add(node);
}
return regions;
}
ArrayList getTableRegionStates(final TableName tableName) {
final ArrayList regions = new ArrayList();
for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) {
if (!node.getTable().equals(tableName)) break;
regions.add(node.toRegionState());
}
return regions;
}
ArrayList getTableRegionsInfo(final TableName tableName) {
final ArrayList regions = new ArrayList();
for (RegionStateNode node: regionsMap.tailMap(tableName.getName()).values()) {
if (!node.getTable().equals(tableName)) break;
regions.add(node.getRegionInfo());
}
return regions;
}
Collection getRegionStateNodes() {
return regionsMap.values();
}
public ArrayList getRegionStates() {
final ArrayList regions = new ArrayList(regionsMap.size());
for (RegionStateNode node: regionsMap.values()) {
regions.add(node.toRegionState());
}
return regions;
}
// ==========================================================================
// RegionState helpers
// ==========================================================================
public RegionState getRegionState(final RegionInfo regionInfo) {
RegionStateNode regionStateNode = getRegionStateNode(regionInfo);
return regionStateNode == null ? null : regionStateNode.toRegionState();
}
public RegionState getRegionState(final String encodedRegionName) {
// TODO: Need a map but it is just dispatch merge...
for (RegionStateNode node: regionsMap.values()) {
if (node.getRegionInfo().getEncodedName().equals(encodedRegionName)) {
return node.toRegionState();
}
}
return null;
}
// ============================================================================================
// TODO: helpers
// ============================================================================================
public boolean hasTableRegionStates(final TableName tableName) {
// TODO
return !getTableRegionStates(tableName).isEmpty();
}
/**
* @return Return online regions of table; does not include OFFLINE or SPLITTING regions.
*/
public List getRegionsOfTable(final TableName table) {
return getRegionsOfTable(table, false);
}
private HRegionLocation createRegionForReopen(RegionStateNode node) {
synchronized (node) {
if (!include(node, false)) {
return null;
}
if (node.isInState(State.OPEN)) {
return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(),
node.getOpenSeqNum());
} else if (node.isInState(State.OPENING)) {
return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), -1);
} else {
return null;
}
}
}
/**
* Get the regions to be reopened when modifying a table.
*
* Notice that the {@code openSeqNum} in the returned HRegionLocation is also used to indicate the
* state of this region, positive means the region is in {@link State#OPEN}, -1 means
* {@link State#OPENING}. And for regions in other states we do not need reopen them.
*/
public List getRegionsOfTableForReopen(TableName tableName) {
return getTableRegionStateNodes(tableName).stream().map(this::createRegionForReopen)
.filter(r -> r != null).collect(Collectors.toList());
}
/**
* Check whether the region has been reopened. The meaning of the {@link HRegionLocation} is the
* same with {@link #getRegionsOfTableForReopen(TableName)}.
*
* For a region which is in {@link State#OPEN} before, if the region state is changed or the open
* seq num is changed, we can confirm that it has been reopened.
*
* For a region which is in {@link State#OPENING} before, usually it will be in {@link State#OPEN}
* now and we will schedule a MRP to reopen it. But there are several exceptions:
*
* - The region is in state other than {@link State#OPEN} or {@link State#OPENING}.
* - The location of the region has been changed
*
* Of course the region could still be in {@link State#OPENING} state and still on the same
* server, then here we will still return a {@link HRegionLocation} for it, just like
* {@link #getRegionsOfTableForReopen(TableName)}.
* @param oldLoc the previous state/location of this region
* @return null if the region has been reopened, otherwise a new {@link HRegionLocation} which
* means we still need to reopen the region.
* @see #getRegionsOfTableForReopen(TableName)
*/
public HRegionLocation checkReopened(HRegionLocation oldLoc) {
RegionStateNode node = getRegionStateNode(oldLoc.getRegion());
// HBASE-20921
// if the oldLoc's state node does not exist, that means the region is
// merged or split, no need to check it
if (node == null) {
return null;
}
synchronized (node) {
if (oldLoc.getSeqNum() >= 0) {
// in OPEN state before
if (node.isInState(State.OPEN)) {
if (node.getOpenSeqNum() > oldLoc.getSeqNum()) {
// normal case, the region has been reopened
return null;
} else {
// the open seq num does not change, need to reopen again
return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(),
node.getOpenSeqNum());
}
} else {
// the state has been changed so we can make sure that the region has been reopened(not
// finished maybe, but not a problem).
return null;
}
} else {
// in OPENING state before
if (!node.isInState(State.OPEN, State.OPENING)) {
// not in OPEN or OPENING state, then we can make sure that the region has been
// reopened(not finished maybe, but not a problem)
return null;
} else {
if (!node.getRegionLocation().equals(oldLoc.getServerName())) {
// the region has been moved, so we can make sure that the region has been reopened.
return null;
}
// normal case, we are still in OPENING state, or the reopen has been opened and the state
// is changed to OPEN.
long openSeqNum = node.isInState(State.OPEN) ? node.getOpenSeqNum() : -1;
return new HRegionLocation(node.getRegionInfo(), node.getRegionLocation(), openSeqNum);
}
}
}
}
/**
* @return Return online regions of table; does not include OFFLINE or SPLITTING regions.
*/
public List getRegionsOfTable(TableName table, boolean offline) {
return getRegionsOfTable(table, state -> include(state, offline));
}
/**
* @return Return the regions of the table; does not include OFFLINE unless you set
* offline
to true. Does not include regions that are in the
* {@link State#SPLIT} state.
*/
private List getRegionsOfTable(TableName table, Predicate filter) {
return getTableRegionStateNodes(table).stream().filter(filter).map(n -> n.getRegionInfo())
.collect(Collectors.toList());
}
/**
* Utility. Whether to include region in list of regions. Default is to
* weed out split and offline regions.
* @return True if we should include the node
(do not include
* if split or offline unless offline
is set to true.
*/
boolean include(final RegionStateNode node, final boolean offline) {
if (LOG.isTraceEnabled()) {
LOG.trace("WORKING ON " + node + " " + node.getRegionInfo());
}
if (node.isInState(State.SPLIT)) return false;
if (node.isInState(State.OFFLINE) && !offline) return false;
final RegionInfo hri = node.getRegionInfo();
return (!hri.isOffline() && !hri.isSplit()) ||
((hri.isOffline() || hri.isSplit()) && offline);
}
/**
* Returns the set of regions hosted by the specified server
* @param serverName the server we are interested in
* @return set of RegionInfo hosted by the specified server
*/
public List getServerRegionInfoSet(final ServerName serverName) {
final ServerStateNode serverInfo = getServerNode(serverName);
if (serverInfo == null) return Collections.emptyList();
synchronized (serverInfo) {
return serverInfo.getRegionInfoList();
}
}
// ============================================================================================
// Split helpers
// These methods will only be called in ServerCrashProcedure, and at the end of SCP we will remove
// the ServerStateNode by calling removeServer.
// ============================================================================================
private void setServerState(ServerName serverName, ServerState state) {
ServerStateNode serverNode = getOrCreateServer(serverName);
synchronized (serverNode) {
serverNode.setState(state);
}
}
/**
* Call this when we start meta log splitting a crashed Server.
* @see #metaLogSplit(ServerName)
*/
public void metaLogSplitting(ServerName serverName) {
setServerState(serverName, ServerState.SPLITTING_META);
}
/**
* Called after we've split the meta logs on a crashed Server.
* @see #metaLogSplitting(ServerName)
*/
public void metaLogSplit(ServerName serverName) {
setServerState(serverName, ServerState.SPLITTING_META_DONE);
}
/**
* Call this when we start log splitting for a crashed Server.
* @see #logSplit(ServerName)
*/
public void logSplitting(final ServerName serverName) {
setServerState(serverName, ServerState.SPLITTING);
}
/**
* Called after we've split all logs on a crashed Server.
* @see #logSplitting(ServerName)
*/
public void logSplit(final ServerName serverName) {
setServerState(serverName, ServerState.OFFLINE);
}
public void updateRegionState(final RegionInfo regionInfo, final State state) {
final RegionStateNode regionNode = getOrCreateRegionStateNode(regionInfo);
synchronized (regionNode) {
regionNode.setState(state);
}
}
// ============================================================================================
// TODO:
// ============================================================================================
public List getAssignedRegions() {
final List result = new ArrayList();
for (RegionStateNode node: regionsMap.values()) {
if (!node.isInTransition()) {
result.add(node.getRegionInfo());
}
}
return result;
}
public boolean isRegionInState(final RegionInfo regionInfo, final State... state) {
final RegionStateNode region = getRegionStateNode(regionInfo);
if (region != null) {
synchronized (region) {
return region.isInState(state);
}
}
return false;
}
public boolean isRegionOnline(final RegionInfo regionInfo) {
return isRegionInState(regionInfo, State.OPEN);
}
/**
* @return True if region is offline (In OFFLINE or CLOSED state).
*/
public boolean isRegionOffline(final RegionInfo regionInfo) {
return isRegionInState(regionInfo, State.OFFLINE, State.CLOSED);
}
public Map> getSnapShotOfAssignment(
final Collection regions) {
final Map> result = new HashMap>();
if (regions != null) {
for (RegionInfo hri : regions) {
final RegionStateNode node = getRegionStateNode(hri);
if (node == null) {
continue;
}
createSnapshot(node, result);
}
} else {
for (RegionStateNode node : regionsMap.values()) {
if (node == null) {
continue;
}
createSnapshot(node, result);
}
}
return result;
}
private void createSnapshot(RegionStateNode node, Map> result) {
final ServerName serverName = node.getRegionLocation();
if (serverName == null) {
return;
}
List serverRegions = result.get(serverName);
if (serverRegions == null) {
serverRegions = new ArrayList();
result.put(serverName, serverRegions);
}
serverRegions.add(node.getRegionInfo());
}
public Map getRegionAssignments() {
final HashMap assignments = new HashMap();
for (RegionStateNode node: regionsMap.values()) {
assignments.put(node.getRegionInfo(), node.getRegionLocation());
}
return assignments;
}
public Map> getRegionByStateOfTable(TableName tableName) {
final State[] states = State.values();
final Map> tableRegions =
new HashMap>(states.length);
for (int i = 0; i < states.length; ++i) {
tableRegions.put(states[i], new ArrayList());
}
for (RegionStateNode node: regionsMap.values()) {
if (node.getTable().equals(tableName)) {
tableRegions.get(node.getState()).add(node.getRegionInfo());
}
}
return tableRegions;
}
public ServerName getRegionServerOfRegion(final RegionInfo regionInfo) {
final RegionStateNode region = getRegionStateNode(regionInfo);
if (region != null) {
synchronized (region) {
ServerName server = region.getRegionLocation();
return server != null ? server : region.getLastHost();
}
}
return null;
}
/**
* This is an EXPENSIVE clone. Cloning though is the safest thing to do.
* Can't let out original since it can change and at least the load balancer
* wants to iterate this exported list. We need to synchronize on regions
* since all access to this.servers is under a lock on this.regions.
*
* @param isByTable If true
, return the assignments by table. If false
,
* return the assignments which aggregate the server-load to the cluster level.
* @return A clone of current assignments.
*/
public Map>> getAssignmentsForBalancer(
boolean isByTable) {
final Map>> result = new HashMap<>();
if (isByTable) {
for (RegionStateNode node : regionsMap.values()) {
Map> tableResult =
result.computeIfAbsent(node.getTable(), t -> new HashMap<>());
final ServerName serverName = node.getRegionLocation();
if (serverName == null) {
LOG.info("Skipping, no server for " + node);
continue;
}
List serverResult =
tableResult.computeIfAbsent(serverName, s -> new ArrayList<>());
serverResult.add(node.getRegionInfo());
}
// Add online servers with no assignment for the table.
for (Map> table : result.values()) {
for (ServerName serverName : serverMap.keySet()) {
table.putIfAbsent(serverName, new ArrayList<>());
}
}
} else {
final HashMap> ensemble = new HashMap<>(serverMap.size());
for (ServerStateNode serverNode : serverMap.values()) {
ensemble.put(serverNode.getServerName(), serverNode.getRegionInfoList());
}
// Use a fake table name to represent the whole cluster's assignments
result.put(HConstants.ENSEMBLE_TABLE_NAME, ensemble);
}
return result;
}
// ==========================================================================
// Region in transition helpers
// ==========================================================================
protected boolean addRegionInTransition(final RegionStateNode regionNode,
final RegionTransitionProcedure procedure) {
if (procedure != null && !regionNode.setProcedure(procedure)) return false;
regionInTransition.put(regionNode.getRegionInfo(), regionNode);
return true;
}
protected void removeRegionInTransition(final RegionStateNode regionNode,
final RegionTransitionProcedure procedure) {
regionInTransition.remove(regionNode.getRegionInfo());
regionNode.unsetProcedure(procedure);
}
public boolean hasRegionsInTransition() {
return !regionInTransition.isEmpty();
}
public boolean isRegionInTransition(final RegionInfo regionInfo) {
final RegionStateNode node = regionInTransition.get(regionInfo);
return node != null ? node.isInTransition() : false;
}
/**
* @return If a procedure-in-transition for hri
, return it else null.
*/
public RegionTransitionProcedure getRegionTransitionProcedure(final RegionInfo hri) {
RegionStateNode node = regionInTransition.get(hri);
if (node == null) return null;
return node.getProcedure();
}
public RegionState getRegionTransitionState(final RegionInfo hri) {
RegionStateNode node = regionInTransition.get(hri);
if (node == null) return null;
synchronized (node) {
return node.isInTransition() ? node.toRegionState() : null;
}
}
public List getRegionsInTransition() {
return new ArrayList(regionInTransition.values());
}
/**
* Get the number of regions in transition.
*/
public int getRegionsInTransitionCount() {
return regionInTransition.size();
}
public List getRegionsStateInTransition() {
final List rit = new ArrayList(regionInTransition.size());
for (RegionStateNode node: regionInTransition.values()) {
rit.add(node.toRegionState());
}
return rit;
}
public SortedSet getRegionsInTransitionOrderedByTimestamp() {
final SortedSet rit = new TreeSet(REGION_STATE_STAMP_COMPARATOR);
for (RegionStateNode node: regionInTransition.values()) {
rit.add(node.toRegionState());
}
return rit;
}
// ==========================================================================
// Region offline helpers
// ==========================================================================
// TODO: Populated when we read meta but regions never make it out of here.
public void addToOfflineRegions(final RegionStateNode regionNode) {
LOG.info("Added to offline, CURRENTLY NEVER CLEARED!!! " + regionNode);
regionOffline.put(regionNode.getRegionInfo(), regionNode);
}
// TODO: Unused.
public void removeFromOfflineRegions(final RegionInfo regionInfo) {
regionOffline.remove(regionInfo);
}
// ==========================================================================
// Region FAIL_OPEN helpers
// ==========================================================================
public static final class RegionFailedOpen {
private final RegionStateNode regionNode;
private volatile Exception exception = null;
private AtomicInteger retries = new AtomicInteger();
public RegionFailedOpen(final RegionStateNode regionNode) {
this.regionNode = regionNode;
}
public RegionStateNode getRegionStateNode() {
return regionNode;
}
public RegionInfo getRegionInfo() {
return regionNode.getRegionInfo();
}
public int incrementAndGetRetries() {
return this.retries.incrementAndGet();
}
public int getRetries() {
return retries.get();
}
public void setException(final Exception exception) {
this.exception = exception;
}
public Exception getException() {
return this.exception;
}
}
public RegionFailedOpen addToFailedOpen(final RegionStateNode regionNode) {
final byte[] key = regionNode.getRegionInfo().getRegionName();
RegionFailedOpen node = regionFailedOpen.get(key);
if (node == null) {
RegionFailedOpen newNode = new RegionFailedOpen(regionNode);
RegionFailedOpen oldNode = regionFailedOpen.putIfAbsent(key, newNode);
node = oldNode != null ? oldNode : newNode;
}
return node;
}
public RegionFailedOpen getFailedOpen(final RegionInfo regionInfo) {
return regionFailedOpen.get(regionInfo.getRegionName());
}
public void removeFromFailedOpen(final RegionInfo regionInfo) {
regionFailedOpen.remove(regionInfo.getRegionName());
}
public List getRegionFailedOpen() {
if (regionFailedOpen.isEmpty()) return Collections.emptyList();
ArrayList regions = new ArrayList(regionFailedOpen.size());
for (RegionFailedOpen r: regionFailedOpen.values()) {
regions.add(r.getRegionStateNode().toRegionState());
}
return regions;
}
// ==========================================================================
// Servers
// ==========================================================================
/**
* Be judicious calling this method. Do it on server register ONLY otherwise
* you could mess up online server accounting. TOOD: Review usage and convert
* to {@link #getServerNode(ServerName)} where we can.
*/
ServerStateNode getOrCreateServer(final ServerName serverName) {
ServerStateNode node = serverMap.get(serverName);
if (node == null) {
LOG.trace("CREATING! {}", serverName, new RuntimeException("WHERE AM I?"));
node = new ServerStateNode(serverName);
ServerStateNode oldNode = serverMap.putIfAbsent(serverName, node);
node = oldNode != null ? oldNode : node;
}
return node;
}
public void removeServer(final ServerName serverName) {
serverMap.remove(serverName);
}
public ServerStateNode getServerNode(final ServerName serverName) {
return serverMap.get(serverName);
}
public double getAverageLoad() {
int numServers = 0;
int totalLoad = 0;
for (ServerStateNode node: serverMap.values()) {
totalLoad += node.getRegionCount();
numServers++;
}
return numServers == 0 ? 0.0: (double)totalLoad / (double)numServers;
}
/**
* Add reference to region to serverstatenode.
* DOES NOT AUTO-CREATE ServerStateNode instance.
* @return Return serverstatenode or null if none.
*/
ServerStateNode addRegionToServer(final RegionStateNode regionNode) {
ServerStateNode ssn = getServerNode(regionNode.getRegionLocation());
if (ssn == null) {
return ssn;
}
ssn.addRegion(regionNode);
return ssn;
}
public boolean isReplicaAvailableForRegion(final RegionInfo info) {
// if the region info itself is a replica return true.
if (!RegionReplicaUtil.isDefaultReplica(info)) {
return true;
}
// iterate the regionsMap for the given region name. If there are replicas it should
// list them in order.
for (RegionStateNode node : regionsMap.tailMap(info.getRegionName()).values()) {
if (!node.getTable().equals(info.getTable())
|| !ServerRegionReplicaUtil.isReplicasForSameRegion(info, node.getRegionInfo())) {
break;
} else if (!RegionReplicaUtil.isDefaultReplica(node.getRegionInfo())) {
// we have replicas
return true;
}
}
// we don have replicas
return false;
}
public ServerStateNode removeRegionFromServer(final ServerName serverName,
final RegionStateNode regionNode) {
ServerStateNode serverNode = getServerNode(serverName);
if (serverNode != null) {
serverNode.removeRegion(regionNode);
}
return serverNode;
}
// ==========================================================================
// ToString helpers
// ==========================================================================
public static String regionNamesToString(final Collection regions) {
final StringBuilder sb = new StringBuilder();
final Iterator it = regions.iterator();
sb.append("[");
if (it.hasNext()) {
sb.append(Bytes.toStringBinary(it.next()));
while (it.hasNext()) {
sb.append(", ");
sb.append(Bytes.toStringBinary(it.next()));
}
}
sb.append("]");
return sb.toString();
}
}