All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.rsgroup.RSGroupInfoManagerImpl Maven / Gradle / Ivy

There is a newer version: 2.6.0-hadoop3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.rsgroup;

import com.google.protobuf.ServiceException;
import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NavigableSet;
import java.util.OptionalLong;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Coprocessor;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.NamespaceDescriptor;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.CoprocessorDescriptorBuilder;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.client.TableDescriptor;
import org.apache.hadoop.hbase.client.TableDescriptorBuilder;
import org.apache.hadoop.hbase.constraint.ConstraintException;
import org.apache.hadoop.hbase.coprocessor.MultiRowMutationEndpoint;
import org.apache.hadoop.hbase.exceptions.DeserializationException;
import org.apache.hadoop.hbase.ipc.CoprocessorRpcChannel;
import org.apache.hadoop.hbase.master.ClusterSchema;
import org.apache.hadoop.hbase.master.MasterServices;
import org.apache.hadoop.hbase.master.ServerListener;
import org.apache.hadoop.hbase.master.TableStateManager;
import org.apache.hadoop.hbase.master.procedure.CreateTableProcedure;
import org.apache.hadoop.hbase.master.procedure.MasterProcedureUtil;
import org.apache.hadoop.hbase.net.Address;
import org.apache.hadoop.hbase.procedure2.Procedure;
import org.apache.hadoop.hbase.protobuf.ProtobufMagic;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.MultiRowMutationProtos;
import org.apache.hadoop.hbase.protobuf.generated.RSGroupProtos;
import org.apache.hadoop.hbase.regionserver.DisabledRegionSplitPolicy;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZKWatcher;
import org.apache.hadoop.hbase.zookeeper.ZNodePaths;
import org.apache.hadoop.util.Shell;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.zookeeper.KeeperException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.collect.Lists;
import org.apache.hbase.thirdparty.com.google.common.collect.Maps;
import org.apache.hbase.thirdparty.com.google.common.collect.Sets;

/**
 * This is an implementation of {@link RSGroupInfoManager} which makes use of an HBase table as the
 * persistence store for the group information. It also makes use of zookeeper to store group
 * information needed for bootstrapping during offline mode.
 * 

Concurrency

RSGroup state is kept locally in Maps. There is a rsgroup name to cached * RSGroupInfo Map at {@link #rsGroupMap} and a Map of tables to the name of the rsgroup they belong * too (in {@link #tableMap}). These Maps are persisted to the hbase:rsgroup table (and cached in * zk) on each modification. *

* Mutations on state are synchronized but reads can continue without having to wait on an instance * monitor, mutations do wholesale replace of the Maps on update -- Copy-On-Write; the local Maps of * state are read-only, just-in-case (see flushConfig). *

* Reads must not block else there is a danger we'll deadlock. *

* Clients of this class, the {@link RSGroupAdminEndpoint} for example, want to query and then act * on the results of the query modifying cache in zookeeper without another thread making * intermediate modifications. These clients synchronize on the 'this' instance so no other has * access concurrently. Reads must be able to continue concurrently. */ @InterfaceAudience.Private final class RSGroupInfoManagerImpl implements RSGroupInfoManager { private static final Logger LOG = LoggerFactory.getLogger(RSGroupInfoManagerImpl.class); /** Table descriptor for hbase:rsgroup catalog table */ private static final TableDescriptor RSGROUP_TABLE_DESC; static { TableDescriptorBuilder builder = TableDescriptorBuilder.newBuilder(RSGROUP_TABLE_NAME) .setColumnFamily(ColumnFamilyDescriptorBuilder.of(META_FAMILY_BYTES)) .setRegionSplitPolicyClassName(DisabledRegionSplitPolicy.class.getName()); try { builder.setCoprocessor( CoprocessorDescriptorBuilder.newBuilder(MultiRowMutationEndpoint.class.getName()) .setPriority(Coprocessor.PRIORITY_SYSTEM).build()); } catch (IOException ex) { throw new Error(ex); } RSGROUP_TABLE_DESC = builder.build(); } // There two Maps are immutable and wholesale replaced on each modification // so are safe to access concurrently. See class comment. private volatile Map rsGroupMap = Collections.emptyMap(); private volatile Map tableMap = Collections.emptyMap(); private final MasterServices masterServices; private final Connection conn; private final ZKWatcher watcher; private final RSGroupStartupWorker rsGroupStartupWorker; // contains list of groups that were last flushed to persistent store private Set prevRSGroups = new HashSet<>(); private final ServerEventsListenerThread serverEventsListenerThread = new ServerEventsListenerThread(); /** Get rsgroup table mapping script */ RSGroupMappingScript script; // Package visibility for testing static class RSGroupMappingScript { static final String RS_GROUP_MAPPING_SCRIPT = "hbase.rsgroup.table.mapping.script"; static final String RS_GROUP_MAPPING_SCRIPT_TIMEOUT = "hbase.rsgroup.table.mapping.script.timeout"; private final String script; private final long scriptTimeout; RSGroupMappingScript(Configuration conf) { script = conf.get(RS_GROUP_MAPPING_SCRIPT); scriptTimeout = conf.getLong(RS_GROUP_MAPPING_SCRIPT_TIMEOUT, 5000); // 5 seconds } String getRSGroup(String namespace, String tablename) { if (script == null || script.isEmpty()) { return null; } Shell.ShellCommandExecutor rsgroupMappingScript = new Shell.ShellCommandExecutor(new String[] { script, "", "" }, null, null, scriptTimeout); String[] exec = rsgroupMappingScript.getExecString(); exec[1] = namespace; exec[2] = tablename; try { rsgroupMappingScript.execute(); } catch (IOException e) { LOG.error("Failed to get RSGroup from script for table {}:{}", namespace, tablename, e); return null; } return rsgroupMappingScript.getOutput().trim(); } } private RSGroupInfoManagerImpl(MasterServices masterServices) throws IOException { this.masterServices = masterServices; this.watcher = masterServices.getZooKeeper(); this.conn = masterServices.getConnection(); this.rsGroupStartupWorker = new RSGroupStartupWorker(); script = new RSGroupMappingScript(masterServices.getConfiguration()); } private synchronized void init() throws IOException { refresh(); serverEventsListenerThread.start(); masterServices.getServerManager().registerListener(serverEventsListenerThread); } static RSGroupInfoManager getInstance(MasterServices master) throws IOException { RSGroupInfoManagerImpl instance = new RSGroupInfoManagerImpl(master); instance.init(); return instance; } public void start() { // create system table of rsgroup rsGroupStartupWorker.start(); } @Override public synchronized void addRSGroup(RSGroupInfo rsGroupInfo) throws IOException { checkGroupName(rsGroupInfo.getName()); if ( rsGroupMap.get(rsGroupInfo.getName()) != null || rsGroupInfo.getName().equals(RSGroupInfo.DEFAULT_GROUP) ) { throw new DoNotRetryIOException("Group already exists: " + rsGroupInfo.getName()); } Map newGroupMap = Maps.newHashMap(rsGroupMap); newGroupMap.put(rsGroupInfo.getName(), rsGroupInfo); flushConfig(newGroupMap); } private RSGroupInfo getRSGroupInfo(final String groupName) throws DoNotRetryIOException { RSGroupInfo rsGroupInfo = getRSGroup(groupName); if (rsGroupInfo == null) { throw new DoNotRetryIOException("RSGroup " + groupName + " does not exist"); } return rsGroupInfo; } /** * @param master the master to get online servers for * @return Set of online Servers named for their hostname and port (not ServerName). */ private static Set

getOnlineServers(final MasterServices master) { Set
onlineServers = new HashSet
(); if (master == null) { return onlineServers; } for (ServerName server : master.getServerManager().getOnlineServers().keySet()) { onlineServers.add(server.getAddress()); } return onlineServers; } @Override public synchronized Set
moveServers(Set
servers, String srcGroup, String dstGroup) throws IOException { RSGroupInfo src = getRSGroupInfo(srcGroup); RSGroupInfo dst = getRSGroupInfo(dstGroup); Set
movedServers = new HashSet<>(); // If destination is 'default' rsgroup, only add servers that are online. If not online, drop // it. If not 'default' group, add server to 'dst' rsgroup EVEN IF IT IS NOT online (could be a // rsgroup of dead servers that are to come back later). Set
onlineServers = dst.getName().equals(RSGroupInfo.DEFAULT_GROUP) ? getOnlineServers(this.masterServices) : null; for (Address el : servers) { src.removeServer(el); if (onlineServers != null) { if (!onlineServers.contains(el)) { if (LOG.isDebugEnabled()) { LOG.debug("Dropping " + el + " during move-to-default rsgroup because not online"); } continue; } } dst.addServer(el); movedServers.add(el); } Map newGroupMap = Maps.newHashMap(rsGroupMap); newGroupMap.put(src.getName(), src); newGroupMap.put(dst.getName(), dst); flushConfig(newGroupMap); return movedServers; } @Override public RSGroupInfo getRSGroupOfServer(Address serverHostPort) throws IOException { for (RSGroupInfo info : rsGroupMap.values()) { if (info.containsServer(serverHostPort)) { return info; } } return null; } @Override public RSGroupInfo getRSGroup(String groupName) { return rsGroupMap.get(groupName); } @Override public String getRSGroupOfTable(TableName tableName) { return tableMap.get(tableName); } @Override public synchronized void moveTables(Set tableNames, String groupName) throws IOException { // Check if rsGroup contains the destination rsgroup if (groupName != null && !rsGroupMap.containsKey(groupName)) { throw new DoNotRetryIOException("Group " + groupName + " does not exist"); } // Make a copy of rsGroupMap to update Map newGroupMap = Maps.newHashMap(rsGroupMap); // Remove tables from their original rsgroups // and update the copy of rsGroupMap for (TableName tableName : tableNames) { if (tableMap.containsKey(tableName)) { RSGroupInfo src = new RSGroupInfo(newGroupMap.get(tableMap.get(tableName))); src.removeTable(tableName); newGroupMap.put(src.getName(), src); } } // Add tables to the destination rsgroup // and update the copy of rsGroupMap if (groupName != null) { RSGroupInfo dstGroup = new RSGroupInfo(newGroupMap.get(groupName)); dstGroup.addAllTables(tableNames); newGroupMap.put(dstGroup.getName(), dstGroup); } // Flush according to the updated copy of rsGroupMap flushConfig(newGroupMap); } @Override public synchronized void removeRSGroup(String groupName) throws IOException { if (!rsGroupMap.containsKey(groupName) || groupName.equals(RSGroupInfo.DEFAULT_GROUP)) { throw new DoNotRetryIOException( "Group " + groupName + " does not exist or is a reserved " + "group"); } Map newGroupMap = Maps.newHashMap(rsGroupMap); newGroupMap.remove(groupName); flushConfig(newGroupMap); } @Override public List listRSGroups() { return Lists.newLinkedList(rsGroupMap.values()); } @Override public boolean isOnline() { return rsGroupStartupWorker.isOnline(); } @Override public void moveServersAndTables(Set
servers, Set tables, String srcGroup, String dstGroup) throws IOException { // get server's group RSGroupInfo srcGroupInfo = getRSGroupInfo(srcGroup); RSGroupInfo dstGroupInfo = getRSGroupInfo(dstGroup); // move servers for (Address el : servers) { srcGroupInfo.removeServer(el); dstGroupInfo.addServer(el); } // move tables for (TableName tableName : tables) { srcGroupInfo.removeTable(tableName); dstGroupInfo.addTable(tableName); } // flush changed groupinfo Map newGroupMap = Maps.newHashMap(rsGroupMap); newGroupMap.put(srcGroupInfo.getName(), srcGroupInfo); newGroupMap.put(dstGroupInfo.getName(), dstGroupInfo); flushConfig(newGroupMap); } @Override public synchronized void removeServers(Set
servers) throws IOException { Map rsGroupInfos = new HashMap(); for (Address el : servers) { RSGroupInfo rsGroupInfo = getRSGroupOfServer(el); if (rsGroupInfo != null) { RSGroupInfo newRsGroupInfo = rsGroupInfos.get(rsGroupInfo.getName()); if (newRsGroupInfo == null) { rsGroupInfo.removeServer(el); rsGroupInfos.put(rsGroupInfo.getName(), rsGroupInfo); } else { newRsGroupInfo.removeServer(el); rsGroupInfos.put(newRsGroupInfo.getName(), newRsGroupInfo); } } else { LOG.warn("Server " + el + " does not belong to any rsgroup."); } } if (rsGroupInfos.size() > 0) { Map newGroupMap = Maps.newHashMap(rsGroupMap); newGroupMap.putAll(rsGroupInfos); flushConfig(newGroupMap); } } @Override public void renameRSGroup(String oldName, String newName) throws IOException { checkGroupName(oldName); checkGroupName(newName); if (oldName.equals(RSGroupInfo.DEFAULT_GROUP)) { throw new ConstraintException("Can't rename default rsgroup"); } RSGroupInfo oldGroup = getRSGroup(oldName); if (oldGroup == null) { throw new ConstraintException("RSGroup " + oldName + " does not exist"); } if (rsGroupMap.containsKey(newName)) { throw new ConstraintException("Group already exists: " + newName); } Map newGroupMap = Maps.newHashMap(rsGroupMap); newGroupMap.remove(oldName); RSGroupInfo newGroup = new RSGroupInfo(newName, (SortedSet
) oldGroup.getServers(), oldGroup.getTables()); newGroupMap.put(newName, newGroup); flushConfig(newGroupMap); } /** * Will try to get the rsgroup from {@code tableMap} first then try to get the rsgroup from * {@code script} try to get the rsgroup from the {@link NamespaceDescriptor} lastly. If still not * present, return default group. */ @Override public RSGroupInfo determineRSGroupInfoForTable(TableName tableName) throws IOException { RSGroupInfo groupFromOldRSGroupInfo = getRSGroup(getRSGroupOfTable(tableName)); if (groupFromOldRSGroupInfo != null) { return groupFromOldRSGroupInfo; } // RSGroup information determined by administrator. RSGroupInfo groupDeterminedByAdmin = getRSGroup( script.getRSGroup(tableName.getNamespaceAsString(), tableName.getQualifierAsString())); if (groupDeterminedByAdmin != null) { return groupDeterminedByAdmin; } // Finally, we will try to fall back to namespace as rsgroup if exists ClusterSchema clusterSchema = masterServices.getClusterSchema(); if (clusterSchema == null) { if (TableName.isMetaTableName(tableName)) { LOG.info("Can not get the namespace rs group config for meta table, since the" + " meta table is not online yet, will use default group to assign meta first"); } else { LOG.warn("ClusterSchema is null, can only use default rsgroup, should not happen?"); } } else { NamespaceDescriptor nd = clusterSchema.getNamespace(tableName.getNamespaceAsString()); RSGroupInfo groupNameOfNs = getRSGroup(nd.getConfigurationValue(RSGroupInfo.NAMESPACE_DESC_PROP_GROUP)); if (groupNameOfNs != null) { return groupNameOfNs; } } return getRSGroup(RSGroupInfo.DEFAULT_GROUP); } @Override public void updateRSGroupConfig(String groupName, Map configuration) throws IOException { if (RSGroupInfo.DEFAULT_GROUP.equals(groupName)) { // We do not persist anything of default group, therefore, it is not supported to update // default group's configuration which lost once master down. throw new ConstraintException( "configuration of " + RSGroupInfo.DEFAULT_GROUP + " can't be stored persistently"); } RSGroupInfo rsGroupInfo = getRSGroupInfo(groupName); new HashSet<>(rsGroupInfo.getConfiguration().keySet()) .forEach(rsGroupInfo::removeConfiguration); configuration.forEach(rsGroupInfo::setConfiguration); flushConfig(); } List retrieveGroupListFromGroupTable() throws IOException { List rsGroupInfoList = Lists.newArrayList(); try (Table table = conn.getTable(RSGROUP_TABLE_NAME); ResultScanner scanner = table.getScanner(new Scan())) { for (Result result;;) { result = scanner.next(); if (result == null) { break; } RSGroupProtos.RSGroupInfo proto = RSGroupProtos.RSGroupInfo .parseFrom(result.getValue(META_FAMILY_BYTES, META_QUALIFIER_BYTES)); rsGroupInfoList.add(RSGroupProtobufUtil.toGroupInfo(proto)); } } return rsGroupInfoList; } List retrieveGroupListFromZookeeper() throws IOException { String groupBasePath = ZNodePaths.joinZNode(watcher.getZNodePaths().baseZNode, rsGroupZNode); List RSGroupInfoList = Lists.newArrayList(); // Overwrite any info stored by table, this takes precedence try { if (ZKUtil.checkExists(watcher, groupBasePath) != -1) { List children = ZKUtil.listChildrenAndWatchForNewChildren(watcher, groupBasePath); if (children == null) { return RSGroupInfoList; } for (String znode : children) { byte[] data = ZKUtil.getData(watcher, ZNodePaths.joinZNode(groupBasePath, znode)); if (data != null && data.length > 0) { ProtobufUtil.expectPBMagicPrefix(data); ByteArrayInputStream bis = new ByteArrayInputStream(data, ProtobufUtil.lengthOfPBMagic(), data.length); RSGroupInfoList .add(RSGroupProtobufUtil.toGroupInfo(RSGroupProtos.RSGroupInfo.parseFrom(bis))); } } LOG.debug("Read ZK GroupInfo count:" + RSGroupInfoList.size()); } } catch (KeeperException | DeserializationException | InterruptedException e) { throw new IOException("Failed to read rsGroupZNode", e); } return RSGroupInfoList; } @Override public void refresh() throws IOException { refresh(false); } /** * Read rsgroup info from the source of truth, the hbase:rsgroup table. Update zk cache. Called on * startup of the manager. */ private synchronized void refresh(boolean forceOnline) throws IOException { List groupList = new LinkedList<>(); // Overwrite anything read from zk, group table is source of truth // if online read from GROUP table if (forceOnline || isOnline()) { LOG.debug("Refreshing in Online mode."); groupList.addAll(retrieveGroupListFromGroupTable()); } else { LOG.debug("Refreshing in Offline mode."); groupList.addAll(retrieveGroupListFromZookeeper()); } // refresh default group, prune NavigableSet orphanTables = new TreeSet<>(); for (String entry : masterServices.getTableDescriptors().getAll().keySet()) { orphanTables.add(TableName.valueOf(entry)); } for (RSGroupInfo group : groupList) { if (!group.getName().equals(RSGroupInfo.DEFAULT_GROUP)) { orphanTables.removeAll(group.getTables()); } } // This is added to the last of the list so it overwrites the 'default' rsgroup loaded // from region group table or zk groupList .add(new RSGroupInfo(RSGroupInfo.DEFAULT_GROUP, getDefaultServers(groupList), orphanTables)); // populate the data HashMap newGroupMap = Maps.newHashMap(); HashMap newTableMap = Maps.newHashMap(); for (RSGroupInfo group : groupList) { newGroupMap.put(group.getName(), group); for (TableName table : group.getTables()) { newTableMap.put(table, group.getName()); } } resetRSGroupAndTableMaps(newGroupMap, newTableMap); updateCacheOfRSGroups(rsGroupMap.keySet()); } private synchronized Map flushConfigTable(Map groupMap) throws IOException { Map newTableMap = Maps.newHashMap(); List mutations = Lists.newArrayList(); // populate deletes for (String groupName : prevRSGroups) { if (!groupMap.containsKey(groupName)) { Delete d = new Delete(Bytes.toBytes(groupName)); mutations.add(d); } } // populate puts for (RSGroupInfo RSGroupInfo : groupMap.values()) { RSGroupProtos.RSGroupInfo proto = RSGroupProtobufUtil.toProtoGroupInfo(RSGroupInfo); Put p = new Put(Bytes.toBytes(RSGroupInfo.getName())); p.addColumn(META_FAMILY_BYTES, META_QUALIFIER_BYTES, proto.toByteArray()); mutations.add(p); for (TableName entry : RSGroupInfo.getTables()) { newTableMap.put(entry, RSGroupInfo.getName()); } } if (mutations.size() > 0) { multiMutate(mutations); } return newTableMap; } private synchronized void flushConfig() throws IOException { flushConfig(this.rsGroupMap); } private synchronized void flushConfig(Map newGroupMap) throws IOException { Map newTableMap; // For offline mode persistence is still unavailable // We're refreshing in-memory state but only for servers in default group if (!isOnline()) { if (newGroupMap == this.rsGroupMap) { // When newGroupMap is this.rsGroupMap itself, // do not need to check default group and other groups as followed return; } Map oldGroupMap = Maps.newHashMap(rsGroupMap); RSGroupInfo oldDefaultGroup = oldGroupMap.remove(RSGroupInfo.DEFAULT_GROUP); RSGroupInfo newDefaultGroup = newGroupMap.remove(RSGroupInfo.DEFAULT_GROUP); if ( !oldGroupMap.equals(newGroupMap) /* compare both tables and servers in other groups */ || !oldDefaultGroup.getTables() .equals(newDefaultGroup.getTables()) /* compare tables in default group */ ) { throw new IOException("Only servers in default group can be updated during offline mode"); } // Restore newGroupMap by putting its default group back newGroupMap.put(RSGroupInfo.DEFAULT_GROUP, newDefaultGroup); // Refresh rsGroupMap // according to the inputted newGroupMap (an updated copy of rsGroupMap) rsGroupMap = newGroupMap; // Do not need to update tableMap // because only the update on servers in default group is allowed above, // or IOException will be thrown return; } /* For online mode, persist to Zookeeper */ newTableMap = flushConfigTable(newGroupMap); // Make changes visible after having been persisted to the source of truth resetRSGroupAndTableMaps(newGroupMap, newTableMap); try { String groupBasePath = ZNodePaths.joinZNode(watcher.getZNodePaths().baseZNode, rsGroupZNode); ZKUtil.createAndFailSilent(watcher, groupBasePath, ProtobufMagic.PB_MAGIC); List zkOps = new ArrayList<>(newGroupMap.size()); for (String groupName : prevRSGroups) { if (!newGroupMap.containsKey(groupName)) { String znode = ZNodePaths.joinZNode(groupBasePath, groupName); zkOps.add(ZKUtil.ZKUtilOp.deleteNodeFailSilent(znode)); } } for (RSGroupInfo RSGroupInfo : newGroupMap.values()) { String znode = ZNodePaths.joinZNode(groupBasePath, RSGroupInfo.getName()); RSGroupProtos.RSGroupInfo proto = RSGroupProtobufUtil.toProtoGroupInfo(RSGroupInfo); LOG.debug("Updating znode: " + znode); ZKUtil.createAndFailSilent(watcher, znode); zkOps.add(ZKUtil.ZKUtilOp.deleteNodeFailSilent(znode)); zkOps.add(ZKUtil.ZKUtilOp.createAndFailSilent(znode, ProtobufUtil.prependPBMagic(proto.toByteArray()))); } LOG.debug("Writing ZK GroupInfo count: " + zkOps.size()); ZKUtil.multiOrSequential(watcher, zkOps, false); } catch (KeeperException e) { LOG.error("Failed to write to rsGroupZNode", e); masterServices.abort("Failed to write to rsGroupZNode", e); throw new IOException("Failed to write to rsGroupZNode", e); } updateCacheOfRSGroups(newGroupMap.keySet()); } /** * Make changes visible. Caller must be synchronized on 'this'. */ private void resetRSGroupAndTableMaps(Map newRSGroupMap, Map newTableMap) { // Make maps Immutable. this.rsGroupMap = Collections.unmodifiableMap(newRSGroupMap); this.tableMap = Collections.unmodifiableMap(newTableMap); } /** * Update cache of rsgroups. Caller must be synchronized on 'this'. * @param currentGroups Current list of Groups. */ private void updateCacheOfRSGroups(final Set currentGroups) { this.prevRSGroups.clear(); this.prevRSGroups.addAll(currentGroups); } // Called by getDefaultServers. Presume it has lock in place. private List getOnlineRS() throws IOException { if (masterServices != null) { return masterServices.getServerManager().getOnlineServersList(); } LOG.debug("Reading online RS from zookeeper"); List servers = new LinkedList<>(); try { for (String el : ZKUtil.listChildrenNoWatch(watcher, watcher.getZNodePaths().rsZNode)) { servers.add(ServerName.parseServerName(el)); } } catch (KeeperException e) { throw new IOException("Failed to retrieve server list from zookeeper", e); } return servers; } // Called by ServerEventsListenerThread. Presume it has lock on this manager when it runs. private SortedSet
getDefaultServers() throws IOException { return getDefaultServers(listRSGroups()); } // Called by ServerEventsListenerThread. Presume it has lock on this manager when it runs. private SortedSet
getDefaultServers(List rsGroupInfoList) throws IOException { // Build a list of servers in other groups than default group, from rsGroupMap Set
serversInOtherGroup = new HashSet<>(); for (RSGroupInfo group : rsGroupInfoList) { if (!RSGroupInfo.DEFAULT_GROUP.equals(group.getName())) { // not default group serversInOtherGroup.addAll(group.getServers()); } } // Get all online servers from Zookeeper and find out servers in default group SortedSet
defaultServers = Sets.newTreeSet(); for (ServerName serverName : getOnlineRS()) { Address server = Address.fromParts(serverName.getHostname(), serverName.getPort()); if (!serversInOtherGroup.contains(server)) { // not in other groups defaultServers.add(server); } } return defaultServers; } // Called by ServerEventsListenerThread. Synchronize on this because redoing // the rsGroupMap then writing it out. private synchronized void updateDefaultServers(SortedSet
servers) throws IOException { RSGroupInfo info = rsGroupMap.get(RSGroupInfo.DEFAULT_GROUP); RSGroupInfo newInfo = new RSGroupInfo(info.getName(), servers, info.getTables()); HashMap newGroupMap = Maps.newHashMap(rsGroupMap); newGroupMap.put(newInfo.getName(), newInfo); flushConfig(newGroupMap); } /** * Calls {@link RSGroupInfoManagerImpl#updateDefaultServers(SortedSet)} to update list of known * servers. Notifications about server changes are received by registering {@link ServerListener}. * As a listener, we need to return immediately, so the real work of updating the servers is done * asynchronously in this thread. */ private class ServerEventsListenerThread extends Thread implements ServerListener { private final Logger LOG = LoggerFactory.getLogger(ServerEventsListenerThread.class); private boolean changed = false; ServerEventsListenerThread() { setDaemon(true); } @Override public void serverAdded(ServerName serverName) { serverChanged(); } @Override public void serverRemoved(ServerName serverName) { serverChanged(); } private synchronized void serverChanged() { changed = true; this.notify(); } @Override public void run() { setName(ServerEventsListenerThread.class.getName() + "-" + masterServices.getServerName()); SortedSet
prevDefaultServers = new TreeSet<>(); while (isMasterRunning(masterServices)) { try { LOG.info("Updating default servers."); SortedSet
servers = RSGroupInfoManagerImpl.this.getDefaultServers(); if (!servers.equals(prevDefaultServers)) { RSGroupInfoManagerImpl.this.updateDefaultServers(servers); prevDefaultServers = servers; LOG.info("Updated with servers: " + servers.size()); } try { synchronized (this) { while (!changed) { wait(); } changed = false; } } catch (InterruptedException e) { LOG.warn("Interrupted", e); } } catch (IOException e) { LOG.warn("Failed to update default servers", e); } } } } private class RSGroupStartupWorker extends Thread { private final Logger LOG = LoggerFactory.getLogger(RSGroupStartupWorker.class); private volatile boolean online = false; RSGroupStartupWorker() { super(RSGroupStartupWorker.class.getName() + "-" + masterServices.getServerName()); setDaemon(true); } @Override public void run() { if (waitForGroupTableOnline()) { LOG.info("GroupBasedLoadBalancer is now online"); } else { LOG.warn("Quit without making region group table online"); } } private boolean waitForGroupTableOnline() { while (isMasterRunning(masterServices)) { try { TableStateManager tsm = masterServices.getTableStateManager(); if (!tsm.isTablePresent(RSGROUP_TABLE_NAME)) { createRSGroupTable(); } // try reading from the table try (Table table = conn.getTable(RSGROUP_TABLE_NAME)) { table.get(new Get(ROW_KEY)); } LOG.info( "RSGroup table=" + RSGROUP_TABLE_NAME + " is online, refreshing cached information"); RSGroupInfoManagerImpl.this.refresh(true); online = true; // flush any inconsistencies between ZK and HTable RSGroupInfoManagerImpl.this.flushConfig(); return true; } catch (Exception e) { LOG.warn("Failed to perform check", e); // 100ms is short so let's just ignore the interrupt Threads.sleepWithoutInterrupt(100); } } return false; } private void createRSGroupTable() throws IOException { OptionalLong optProcId = masterServices.getProcedures().stream() .filter(p -> p instanceof CreateTableProcedure).map(p -> (CreateTableProcedure) p) .filter(p -> p.getTableName().equals(RSGROUP_TABLE_NAME)).mapToLong(Procedure::getProcId) .findFirst(); long procId; if (optProcId.isPresent()) { procId = optProcId.getAsLong(); } else { procId = masterServices.createSystemTable(RSGROUP_TABLE_DESC); } // wait for region to be online int tries = 600; while ( !(masterServices.getMasterProcedureExecutor().isFinished(procId)) && masterServices.getMasterProcedureExecutor().isRunning() && tries > 0 ) { try { Thread.sleep(100); } catch (InterruptedException e) { throw new IOException("Wait interrupted ", e); } tries--; } if (tries <= 0) { throw new IOException("Failed to create group table in a given time."); } else { Procedure result = masterServices.getMasterProcedureExecutor().getResult(procId); if (result != null && result.isFailed()) { throw new IOException( "Failed to create group table. " + MasterProcedureUtil.unwrapRemoteIOException(result)); } } } public boolean isOnline() { return online; } } private static boolean isMasterRunning(MasterServices masterServices) { return !masterServices.isAborted() && !masterServices.isStopped(); } private void multiMutate(List mutations) throws IOException { try (Table table = conn.getTable(RSGROUP_TABLE_NAME)) { CoprocessorRpcChannel channel = table.coprocessorService(ROW_KEY); MultiRowMutationProtos.MutateRowsRequest.Builder mmrBuilder = MultiRowMutationProtos.MutateRowsRequest.newBuilder(); for (Mutation mutation : mutations) { if (mutation instanceof Put) { mmrBuilder.addMutationRequest(org.apache.hadoop.hbase.protobuf.ProtobufUtil.toMutation( org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.MutationType.PUT, mutation)); } else if (mutation instanceof Delete) { mmrBuilder.addMutationRequest(org.apache.hadoop.hbase.protobuf.ProtobufUtil.toMutation( org.apache.hadoop.hbase.protobuf.generated.ClientProtos.MutationProto.MutationType.DELETE, mutation)); } else { throw new DoNotRetryIOException( "multiMutate doesn't support " + mutation.getClass().getName()); } } MultiRowMutationProtos.MultiRowMutationService.BlockingInterface service = MultiRowMutationProtos.MultiRowMutationService.newBlockingStub(channel); try { service.mutateRows(null, mmrBuilder.build()); } catch (ServiceException ex) { ProtobufUtil.toIOException(ex); } } } private void checkGroupName(String groupName) throws ConstraintException { if (!groupName.matches("[a-zA-Z0-9_]+")) { throw new ConstraintException("RSGroup name should only contain alphanumeric characters"); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy