All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.client.HConnectionManager Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.client;

import java.io.Closeable;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.UndeclaredThrowableException;
import java.net.SocketException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NavigableMap;
import java.util.Set;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArraySet;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.Chore;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HRegionLocation;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.KeyValue;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.Stoppable;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitor;
import org.apache.hadoop.hbase.client.MetaScanner.MetaScannerVisitorBase;
import org.apache.hadoop.hbase.client.coprocessor.Batch;
import org.apache.hadoop.hbase.exceptions.DoNotRetryIOException;
import org.apache.hadoop.hbase.exceptions.MasterNotRunningException;
import org.apache.hadoop.hbase.exceptions.RegionMovedException;
import org.apache.hadoop.hbase.exceptions.RegionOpeningException;
import org.apache.hadoop.hbase.exceptions.RegionServerStoppedException;
import org.apache.hadoop.hbase.exceptions.TableNotFoundException;
import org.apache.hadoop.hbase.exceptions.ZooKeeperConnectionException;
import org.apache.hadoop.hbase.ipc.RpcClient;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.RequestConverter;
import org.apache.hadoop.hbase.protobuf.generated.AdminProtos.AdminService;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ClientService;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceRequest;
import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.CoprocessorServiceResponse;
import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.TableSchema;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.AddColumnRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.AddColumnResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.AssignRegionRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.AssignRegionResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.BalanceRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.BalanceResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.CatalogScanRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.CatalogScanResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.CreateTableRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.CreateTableResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteColumnRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteColumnResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteSnapshotRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteSnapshotResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteTableRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DeleteTableResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DisableTableRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DisableTableResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DispatchMergingRegionsRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.DispatchMergingRegionsResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.EnableCatalogJanitorRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.EnableCatalogJanitorResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.EnableTableRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.EnableTableResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsCatalogJanitorEnabledRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsCatalogJanitorEnabledResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsRestoreSnapshotDoneRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsRestoreSnapshotDoneResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsSnapshotDoneRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.IsSnapshotDoneResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ListSnapshotRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ListSnapshotResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.MasterAdminService;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ModifyColumnRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ModifyColumnResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ModifyTableRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ModifyTableResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.MoveRegionRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.MoveRegionResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.OfflineRegionRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.OfflineRegionResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.RestoreSnapshotRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.RestoreSnapshotResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.SetBalancerRunningRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.SetBalancerRunningResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ShutdownRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.ShutdownResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.StopMasterRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.StopMasterResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.TakeSnapshotRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.TakeSnapshotResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.UnassignRegionRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterAdminProtos.UnassignRegionResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetClusterStatusRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetClusterStatusResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetSchemaAlterStatusRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetSchemaAlterStatusResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetTableDescriptorsRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.GetTableDescriptorsResponse;
import org.apache.hadoop.hbase.protobuf.generated.MasterMonitorProtos.MasterMonitorService;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsMasterRunningRequest;
import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.IsMasterRunningResponse;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.SoftValueSortedMap;
import org.apache.hadoop.hbase.util.Triple;
import org.apache.hadoop.hbase.zookeeper.MasterAddressTracker;
import org.apache.hadoop.hbase.zookeeper.ZKUtil;
import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher;
import org.apache.hadoop.ipc.RemoteException;
import org.apache.zookeeper.KeeperException;

import com.google.common.annotations.VisibleForTesting;
import com.google.protobuf.BlockingRpcChannel;
import com.google.protobuf.RpcController;
import com.google.protobuf.ServiceException;

/**
 * A non-instantiable class that manages {@link HConnection}s.
 * This class has a static Map of {@link HConnection} instances keyed by
 * {@link Configuration}; all invocations of {@link #getConnection(Configuration)}
 * that pass the same {@link Configuration} instance will be returned the same
 * {@link  HConnection} instance (Adding properties to a Configuration
 * instance does not change its object identity; for more on how this is done see
 * {@link HConnectionKey}).  Sharing {@link HConnection}
 * instances is usually what you want; all clients of the {@link HConnection}
 * instances share the HConnections' cache of Region locations rather than each
 * having to discover for itself the location of meta, etc.  It makes
 * sense for the likes of the pool of HTables class {@link HTablePool}, for
 * instance (If concerned that a single {@link HConnection} is insufficient
 * for sharing amongst clients in say an heavily-multithreaded environment,
 * in practise its not proven to be an issue.  Besides, {@link HConnection} is
 * implemented atop Hadoop RPC and as of this writing, Hadoop RPC does a
 * connection per cluster-member, exclusively).
 *
 * 

But sharing connections makes clean up of {@link HConnection} instances a little awkward. * Currently, clients cleanup by calling {@link #deleteConnection(Configuration)}. This will * shutdown the zookeeper connection the HConnection was using and clean up all * HConnection resources as well as stopping proxies to servers out on the * cluster. Not running the cleanup will not end the world; it'll * just stall the closeup some and spew some zookeeper connection failed * messages into the log. Running the cleanup on a {@link HConnection} that is * subsequently used by another will cause breakage so be careful running * cleanup. *

To create a {@link HConnection} that is not shared by others, you can * create a new {@link Configuration} instance, pass this new instance to * {@link #getConnection(Configuration)}, and then when done, close it up by * doing something like the following: *

 * {@code
 * Configuration newConfig = new Configuration(originalConf);
 * HConnection connection = HConnectionManager.getConnection(newConfig);
 * // Use the connection to your hearts' delight and then when done...
 * HConnectionManager.deleteConnection(newConfig, true);
 * }
 * 
*

Cleanup used to be done inside in a shutdown hook. On startup we'd * register a shutdown hook that called {@link #deleteAllConnections()} * on its way out but the order in which shutdown hooks run is not defined so * were problematic for clients of HConnection that wanted to register their * own shutdown hooks so we removed ours though this shifts the onus for * cleanup to the client. */ @SuppressWarnings("serial") @InterfaceAudience.Public @InterfaceStability.Evolving public class HConnectionManager { static final Log LOG = LogFactory.getLog(HConnectionManager.class); public static final String RETRIES_BY_SERVER_KEY = "hbase.client.retries.by.server"; // An LRU Map of HConnectionKey -> HConnection (TableServer). All // access must be synchronized. This map is not private because tests // need to be able to tinker with it. static final Map CONNECTION_INSTANCES; public static final int MAX_CACHED_CONNECTION_INSTANCES; static { // We set instances to one more than the value specified for {@link // HConstants#ZOOKEEPER_MAX_CLIENT_CNXNS}. By default, the zk default max // connections to the ensemble from the one client is 30, so in that case we // should run into zk issues before the LRU hit this value of 31. MAX_CACHED_CONNECTION_INSTANCES = HBaseConfiguration.create().getInt( HConstants.ZOOKEEPER_MAX_CLIENT_CNXNS, HConstants.DEFAULT_ZOOKEPER_MAX_CLIENT_CNXNS) + 1; CONNECTION_INSTANCES = new LinkedHashMap( (int) (MAX_CACHED_CONNECTION_INSTANCES / 0.75F) + 1, 0.75F, true) { @Override protected boolean removeEldestEntry( Map.Entry eldest) { return size() > MAX_CACHED_CONNECTION_INSTANCES; } }; } /* * Non-instantiable. */ private HConnectionManager() { super(); } /** * Get the connection that goes with the passed conf configuration instance. * If no current connection exists, method creates a new connection and keys it using * connection-specific properties from the passed {@link Configuration}; see * {@link HConnectionKey}. * @param conf configuration * @return HConnection object for conf * @throws ZooKeeperConnectionException */ @SuppressWarnings("resource") public static HConnection getConnection(final Configuration conf) throws IOException { HConnectionKey connectionKey = new HConnectionKey(conf); synchronized (CONNECTION_INSTANCES) { HConnectionImplementation connection = CONNECTION_INSTANCES.get(connectionKey); if (connection == null) { connection = (HConnectionImplementation)createConnection(conf, true); CONNECTION_INSTANCES.put(connectionKey, connection); } else if (connection.isClosed()) { HConnectionManager.deleteConnection(connectionKey, true); connection = (HConnectionImplementation)createConnection(conf, true); CONNECTION_INSTANCES.put(connectionKey, connection); } connection.incCount(); return connection; } } /** * Create a new HConnection instance using the passed conf instance. *

Note: This bypasses the usual HConnection life cycle management done by * {@link #getConnection(Configuration)}. Use this with caution, the caller is responsible for * calling {@link HConnection#close()} on the returned connection instance. * @param conf configuration * @return HConnection object for conf * @throws ZooKeeperConnectionException */ public static HConnection createConnection(Configuration conf) throws IOException { return createConnection(conf, false); } static HConnection createConnection(final Configuration conf, final boolean managed) throws IOException { String className = conf.get("hbase.client.connection.impl", HConnectionManager.HConnectionImplementation.class.getName()); Class clazz = null; try { clazz = Class.forName(className); } catch (ClassNotFoundException e) { throw new IOException(e); } try { // Default HCM#HCI is not accessible; make it so before invoking. Constructor constructor = clazz.getDeclaredConstructor(Configuration.class, boolean.class); constructor.setAccessible(true); return (HConnection) constructor.newInstance(conf, managed); } catch (Exception e) { throw new IOException(e); } } /** * Delete connection information for the instance specified by passed configuration. * If there are no more references to the designated connection connection, this method will * then close connection to the zookeeper ensemble and let go of all associated resources. * * @param conf configuration whose identity is used to find {@link HConnection} instance. */ public static void deleteConnection(Configuration conf) { deleteConnection(new HConnectionKey(conf), false); } /** * Cleanup a known stale connection. * This will then close connection to the zookeeper ensemble and let go of all resources. * * @param connection */ public static void deleteStaleConnection(HConnection connection) { deleteConnection(connection, true); } /** * Delete information for all connections. */ public static void deleteAllConnections() { synchronized (CONNECTION_INSTANCES) { Set connectionKeys = new HashSet(); connectionKeys.addAll(CONNECTION_INSTANCES.keySet()); for (HConnectionKey connectionKey : connectionKeys) { deleteConnection(connectionKey, false); } CONNECTION_INSTANCES.clear(); } } private static void deleteConnection(HConnection connection, boolean staleConnection) { synchronized (CONNECTION_INSTANCES) { for (Entry e: CONNECTION_INSTANCES.entrySet()) { if (e.getValue() == connection) { deleteConnection(e.getKey(), staleConnection); break; } } } } private static void deleteConnection(HConnectionKey connectionKey, boolean staleConnection) { synchronized (CONNECTION_INSTANCES) { HConnectionImplementation connection = CONNECTION_INSTANCES.get(connectionKey); if (connection != null) { connection.decCount(); if (connection.isZeroReference() || staleConnection) { CONNECTION_INSTANCES.remove(connectionKey); connection.internalClose(); } } else { LOG.error("Connection not found in the list, can't delete it "+ "(connection key=" + connectionKey + "). May be the key was modified?"); } } } /** * It is provided for unit test cases which verify the behavior of region * location cache prefetch. * @return Number of cached regions for the table. * @throws ZooKeeperConnectionException */ static int getCachedRegionCount(Configuration conf, final byte[] tableName) throws IOException { return execute(new HConnectable(conf) { @Override public Integer connect(HConnection connection) { return ((HConnectionImplementation)connection).getNumberOfCachedRegionLocations(tableName); } }); } /** * It's provided for unit test cases which verify the behavior of region * location cache prefetch. * @return true if the region where the table and row reside is cached. * @throws ZooKeeperConnectionException */ static boolean isRegionCached(Configuration conf, final byte[] tableName, final byte[] row) throws IOException { return execute(new HConnectable(conf) { @Override public Boolean connect(HConnection connection) { return ((HConnectionImplementation) connection).isRegionCached(tableName, row); } }); } /** * This convenience method invokes the given {@link HConnectable#connect} * implementation using a {@link HConnection} instance that lasts just for the * duration of the invocation. * * @param the return type of the connect method * @param connectable the {@link HConnectable} instance * @return the value returned by the connect method * @throws IOException */ public static T execute(HConnectable connectable) throws IOException { if (connectable == null || connectable.conf == null) { return null; } Configuration conf = connectable.conf; HConnection connection = HConnectionManager.getConnection(conf); boolean connectSucceeded = false; try { T returnValue = connectable.connect(connection); connectSucceeded = true; return returnValue; } finally { try { connection.close(); } catch (Exception e) { if (connectSucceeded) { throw new IOException("The connection to " + connection + " could not be deleted.", e); } } } } /** Encapsulates connection to zookeeper and regionservers.*/ @edu.umd.cs.findbugs.annotations.SuppressWarnings( value="AT_OPERATION_SEQUENCE_ON_CONCURRENT_ABSTRACTION", justification="Access to the conncurrent hash map is under a lock so should be fine.") static class HConnectionImplementation implements HConnection, Closeable { static final Log LOG = LogFactory.getLog(HConnectionImplementation.class); private final long pause; private final int numTries; final int rpcTimeout; private final int prefetchRegionLimit; private final boolean useServerTrackerForRetries; private final long serverTrackerTimeout; private volatile boolean closed; private volatile boolean aborted; // package protected for the tests ClusterStatusListener clusterStatusListener; private final Object userRegionLock = new Object(); // We have a single lock for master & zk to prevent deadlocks. Having // one lock for ZK and one lock for master is not possible: // When creating a connection to master, we need a connection to ZK to get // its address. But another thread could have taken the ZK lock, and could // be waiting for the master lock => deadlock. private final Object masterAndZKLock = new Object(); private long keepZooKeeperWatcherAliveUntil = Long.MAX_VALUE; private final DelayedClosing delayedClosing = DelayedClosing.createAndStart(this); private final Configuration conf; // Client rpc instance. private RpcClient rpcClient; /** * Map of table to table {@link HRegionLocation}s. The table key is made * by doing a {@link Bytes#mapKey(byte[])} of the table's name. */ private final Map> cachedRegionLocations = new HashMap>(); // The presence of a server in the map implies it's likely that there is an // entry in cachedRegionLocations that map to this server; but the absence // of a server in this map guarentees that there is no entry in cache that // maps to the absent server. // The access to this attribute must be protected by a lock on cachedRegionLocations private final Set cachedServers = new HashSet(); // region cache prefetch is enabled by default. this set contains all // tables whose region cache prefetch are disabled. private final Set regionCachePrefetchDisabledTables = new CopyOnWriteArraySet(); private int refCount; // indicates whether this connection's life cycle is managed (by us) private final boolean managed; /** * Cluster registry of basic info such as clusterid and meta region location. */ final Registry registry; /** * constructor * @param conf Configuration object * @param managed If true, does not do full shutdown on close; i.e. cleanup of connection * to zk and shutdown of all services; we just close down the resources this connection was * responsible for and decrement usage counters. It is up to the caller to do the full * cleanup. It is set when we want have connection sharing going on -- reuse of zk connection, * and cached region locations, established regionserver connections, etc. When connections * are shared, we have reference counting going on and will only do full cleanup when no more * users of an HConnectionImplementation instance. */ HConnectionImplementation(Configuration conf, boolean managed) throws IOException { this.conf = conf; this.managed = managed; this.closed = false; this.pause = conf.getLong(HConstants.HBASE_CLIENT_PAUSE, HConstants.DEFAULT_HBASE_CLIENT_PAUSE); this.numTries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER); this.rpcTimeout = conf.getInt( HConstants.HBASE_RPC_TIMEOUT_KEY, HConstants.DEFAULT_HBASE_RPC_TIMEOUT); this.prefetchRegionLimit = conf.getInt( HConstants.HBASE_CLIENT_PREFETCH_LIMIT, HConstants.DEFAULT_HBASE_CLIENT_PREFETCH_LIMIT); this.useServerTrackerForRetries = conf.getBoolean(RETRIES_BY_SERVER_KEY, true); long serverTrackerTimeout = 0; if (this.useServerTrackerForRetries) { // Server tracker allows us to do faster, and yet useful (hopefully), retries. // However, if we are too useful, we might fail very quickly due to retry count limit. // To avoid this, we are going to cheat for now (see HBASE-7659), and calculate maximum // retry time if normal retries were used. Then we will retry until this time runs out. // If we keep hitting one server, the net effect will be the incremental backoff, and // essentially the same number of retries as planned. If we have to do faster retries, // we will do more retries in aggregate, but the user will be none the wiser. for (int i = 0; i < this.numTries; ++i) { serverTrackerTimeout += ConnectionUtils.getPauseTime(this.pause, i); } } this.serverTrackerTimeout = serverTrackerTimeout; this.registry = setupRegistry(); retrieveClusterId(); this.rpcClient = new RpcClient(this.conf, this.clusterId); // Do we publish the status? Class listenerClass = conf.getClass(ClusterStatusListener.STATUS_LISTENER_CLASS, ClusterStatusListener.DEFAULT_STATUS_LISTENER_CLASS, ClusterStatusListener.Listener.class); if (listenerClass != null) { clusterStatusListener = new ClusterStatusListener( new ClusterStatusListener.DeadServerHandler() { @Override public void newDead(ServerName sn) { clearCaches(sn); rpcClient.cancelConnections(sn.getHostname(), sn.getPort(), new SocketException(sn.getServerName() + " is dead: closing its connection.")); } }, conf, listenerClass); } } /** * @return The cluster registry implementation to use. * @throws IOException */ private Registry setupRegistry() throws IOException { String registryClass = this.conf.get("hbase.client.registry.impl", ZooKeeperRegistry.class.getName()); Registry registry = null; try { registry = (Registry)Class.forName(registryClass).newInstance(); } catch (Throwable t) { throw new IOException(t); } registry.init(this); return registry; } /** * For tests only. * @param rpcClient Client we should use instead. * @return Previous rpcClient */ RpcClient setRpcClient(final RpcClient rpcClient) { RpcClient oldRpcClient = this.rpcClient; this.rpcClient = rpcClient; return oldRpcClient; } /** * An identifier that will remain the same for a given connection. * @return */ public String toString(){ return "hconnection-0x" + Integer.toHexString(hashCode()); } protected String clusterId = null; void retrieveClusterId() { if (clusterId != null) return; this.clusterId = this.registry.getClusterId(); if (clusterId == null) { clusterId = HConstants.CLUSTER_ID_DEFAULT; LOG.debug("clusterid came back null, using default " + clusterId); } } @Override public Configuration getConfiguration() { return this.conf; } private void checkIfBaseNodeAvailable(ZooKeeperWatcher zkw) throws MasterNotRunningException { String errorMsg; try { if (ZKUtil.checkExists(zkw, zkw.baseZNode) == -1) { errorMsg = "The node " + zkw.baseZNode+" is not in ZooKeeper. " + "It should have been written by the master. " + "Check the value configured in 'zookeeper.znode.parent'. " + "There could be a mismatch with the one configured in the master."; LOG.error(errorMsg); throw new MasterNotRunningException(errorMsg); } } catch (KeeperException e) { errorMsg = "Can't get connection to ZooKeeper: " + e.getMessage(); LOG.error(errorMsg); throw new MasterNotRunningException(errorMsg, e); } } /** * @return true if the master is running, throws an exception otherwise * @throws MasterNotRunningException - if the master is not running * @throws ZooKeeperConnectionException */ @Override public boolean isMasterRunning() throws MasterNotRunningException, ZooKeeperConnectionException { // When getting the master connection, we check it's running, // so if there is no exception, it means we've been able to get a // connection on a running master MasterMonitorKeepAliveConnection m = getKeepAliveMasterMonitorService(); try { m.close(); } catch (IOException e) { throw new MasterNotRunningException("Failed close", e); } return true; } @Override public HRegionLocation getRegionLocation(final byte [] name, final byte [] row, boolean reload) throws IOException { return reload? relocateRegion(name, row): locateRegion(name, row); } @Override public boolean isTableEnabled(byte[] tableName) throws IOException { return this.registry.isTableOnlineState(tableName, true); } @Override public boolean isTableDisabled(byte[] tableName) throws IOException { return this.registry.isTableOnlineState(tableName, false); } @Override public boolean isTableAvailable(final byte[] tableName) throws IOException { final AtomicBoolean available = new AtomicBoolean(true); final AtomicInteger regionCount = new AtomicInteger(0); MetaScannerVisitor visitor = new MetaScannerVisitorBase() { @Override public boolean processRow(Result row) throws IOException { HRegionInfo info = MetaScanner.getHRegionInfo(row); if (info != null) { if (Bytes.compareTo(tableName, info.getTableName()) == 0) { ServerName server = HRegionInfo.getServerName(row); if (server == null) { available.set(false); return false; } regionCount.incrementAndGet(); } else if (Bytes.compareTo(tableName, info.getTableName()) < 0) { // Return if we are done with the current table return false; } } return true; } }; MetaScanner.metaScan(conf, visitor, tableName); return available.get() && (regionCount.get() > 0); } @Override public boolean isTableAvailable(final byte[] tableName, final byte[][] splitKeys) throws IOException { final AtomicBoolean available = new AtomicBoolean(true); final AtomicInteger regionCount = new AtomicInteger(0); MetaScannerVisitor visitor = new MetaScannerVisitorBase() { @Override public boolean processRow(Result row) throws IOException { HRegionInfo info = MetaScanner.getHRegionInfo(row); if (info != null) { if (Bytes.compareTo(tableName, info.getTableName()) == 0) { ServerName server = HRegionInfo.getServerName(row); if (server == null) { available.set(false); return false; } if (!Bytes.equals(info.getStartKey(), HConstants.EMPTY_BYTE_ARRAY)) { for (byte[] splitKey : splitKeys) { // Just check if the splitkey is available if (Bytes.equals(info.getStartKey(), splitKey)) { regionCount.incrementAndGet(); break; } } } else { // Always empty start row should be counted regionCount.incrementAndGet(); } } else if (Bytes.compareTo(tableName, info.getTableName()) < 0) { // Return if we are done with the current table return false; } } return true; } }; MetaScanner.metaScan(conf, visitor, tableName); // +1 needs to be added so that the empty start row is also taken into account return available.get() && (regionCount.get() == splitKeys.length + 1); } @Override public HRegionLocation locateRegion(final byte[] regionName) throws IOException { return locateRegion(HRegionInfo.getTableName(regionName), HRegionInfo.getStartKey(regionName), false, true); } @Override public boolean isDeadServer(ServerName sn) { if (clusterStatusListener == null) { return false; } else { return clusterStatusListener.isDeadServer(sn); } } @Override public List locateRegions(final byte[] tableName) throws IOException { return locateRegions (tableName, false, true); } @Override public List locateRegions(final byte[] tableName, final boolean useCache, final boolean offlined) throws IOException { NavigableMap regions = MetaScanner.allTableRegions(conf, tableName, offlined); final List locations = new ArrayList(); for (HRegionInfo regionInfo : regions.keySet()) { locations.add(locateRegion(tableName, regionInfo.getStartKey(), useCache, true)); } return locations; } @Override public HRegionLocation locateRegion(final byte [] tableName, final byte [] row) throws IOException{ return locateRegion(tableName, row, true, true); } @Override public HRegionLocation relocateRegion(final byte [] tableName, final byte [] row) throws IOException{ // Since this is an explicit request not to use any caching, finding // disabled tables should not be desirable. This will ensure that an exception is thrown when // the first time a disabled table is interacted with. if (isTableDisabled(tableName)) { throw new DoNotRetryIOException(Bytes.toString(tableName) + " is disabled."); } return locateRegion(tableName, row, false, true); } private HRegionLocation locateRegion(final byte [] tableName, final byte [] row, boolean useCache, boolean retry) throws IOException { if (this.closed) throw new IOException(toString() + " closed"); if (tableName == null || tableName.length == 0) { throw new IllegalArgumentException( "table name cannot be null or zero length"); } if (Bytes.equals(tableName, HConstants.META_TABLE_NAME)) { return this.registry.getMetaRegionLocation(); } else { // Region not in the cache - have to go to the meta RS return locateRegionInMeta(HConstants.META_TABLE_NAME, tableName, row, useCache, userRegionLock, retry); } } /* * Search .META. for the HRegionLocation info that contains the table and * row we're seeking. It will prefetch certain number of regions info and * save them to the global region cache. */ private void prefetchRegionCache(final byte[] tableName, final byte[] row) { // Implement a new visitor for MetaScanner, and use it to walk through // the .META. MetaScannerVisitor visitor = new MetaScannerVisitorBase() { public boolean processRow(Result result) throws IOException { try { HRegionInfo regionInfo = MetaScanner.getHRegionInfo(result); if (regionInfo == null) { return true; } // possible we got a region of a different table... if (!Bytes.equals(regionInfo.getTableName(), tableName)) { return false; // stop scanning } if (regionInfo.isOffline()) { // don't cache offline regions return true; } ServerName serverName = HRegionInfo.getServerName(result); if (serverName == null) { return true; // don't cache it } // instantiate the location long seqNum = HRegionInfo.getSeqNumDuringOpen(result); HRegionLocation loc = new HRegionLocation(regionInfo, serverName, seqNum); // cache this meta entry cacheLocation(tableName, null, loc); return true; } catch (RuntimeException e) { throw new IOException(e); } } }; try { // pre-fetch certain number of regions info at region cache. MetaScanner.metaScan(conf, visitor, tableName, row, this.prefetchRegionLimit); } catch (IOException e) { LOG.warn("Encountered problems when prefetch META table: ", e); } } /* * Search the .META. table for the HRegionLocation * info that contains the table and row we're seeking. */ private HRegionLocation locateRegionInMeta(final byte [] parentTable, final byte [] tableName, final byte [] row, boolean useCache, Object regionLockObject, boolean retry) throws IOException { HRegionLocation location; // If we are supposed to be using the cache, look in the cache to see if // we already have the region. if (useCache) { location = getCachedLocation(tableName, row); if (location != null) { return location; } } int localNumRetries = retry ? numTries : 1; // build the key of the meta region we should be looking for. // the extra 9's on the end are necessary to allow "exact" matches // without knowing the precise region names. byte [] metaKey = HRegionInfo.createRegionName(tableName, row, HConstants.NINES, false); for (int tries = 0; true; tries++) { if (tries >= localNumRetries) { throw new NoServerForRegionException("Unable to find region for " + Bytes.toStringBinary(row) + " after " + numTries + " tries."); } HRegionLocation metaLocation = null; try { // locate the meta region metaLocation = locateRegion(parentTable, metaKey, true, false); // If null still, go around again. if (metaLocation == null) continue; ClientService.BlockingInterface service = getClient(metaLocation.getServerName()); Result regionInfoRow; // This block guards against two threads trying to load the meta // region at the same time. The first will load the meta region and // the second will use the value that the first one found. synchronized (regionLockObject) { // Check the cache again for a hit in case some other thread made the // same query while we were waiting on the lock. if (useCache) { location = getCachedLocation(tableName, row); if (location != null) { return location; } // If the parent table is META, we may want to pre-fetch some // region info into the global region cache for this table. if (Bytes.equals(parentTable, HConstants.META_TABLE_NAME) && (getRegionCachePrefetch(tableName))) { prefetchRegionCache(tableName, row); } location = getCachedLocation(tableName, row); if (location != null) { return location; } } else { // If we are not supposed to be using the cache, delete any existing cached location // so it won't interfere. forceDeleteCachedLocation(tableName, row); } // Query the meta region for the location of the meta region regionInfoRow = ProtobufUtil.getRowOrBefore(service, metaLocation.getRegionInfo().getRegionName(), metaKey, HConstants.CATALOG_FAMILY); } if (regionInfoRow == null) { throw new TableNotFoundException(Bytes.toString(tableName)); } // convert the row result into the HRegionLocation we need! HRegionInfo regionInfo = MetaScanner.getHRegionInfo(regionInfoRow); if (regionInfo == null) { throw new IOException("HRegionInfo was null or empty in " + Bytes.toString(parentTable) + ", row=" + regionInfoRow); } // possible we got a region of a different table... if (!Bytes.equals(regionInfo.getTableName(), tableName)) { throw new TableNotFoundException( "Table '" + Bytes.toString(tableName) + "' was not found, got: " + Bytes.toString(regionInfo.getTableName()) + "."); } if (regionInfo.isSplit()) { throw new RegionOfflineException("the only available region for" + " the required row is a split parent," + " the daughters should be online soon: " + regionInfo.getRegionNameAsString()); } if (regionInfo.isOffline()) { throw new RegionOfflineException("the region is offline, could" + " be caused by a disable table call: " + regionInfo.getRegionNameAsString()); } ServerName serverName = HRegionInfo.getServerName(regionInfoRow); if (serverName == null) { throw new NoServerForRegionException("No server address listed " + "in " + Bytes.toString(parentTable) + " for region " + regionInfo.getRegionNameAsString() + " containing row " + Bytes.toStringBinary(row)); } if (isDeadServer(serverName)){ throw new RegionServerStoppedException(".META. says the region "+ regionInfo.getRegionNameAsString()+" is managed by the server " + serverName + ", but it is dead."); } // Instantiate the location location = new HRegionLocation(regionInfo, serverName, HRegionInfo.getSeqNumDuringOpen(regionInfoRow)); cacheLocation(tableName, null, location); return location; } catch (TableNotFoundException e) { // if we got this error, probably means the table just plain doesn't // exist. rethrow the error immediately. this should always be coming // from the HTable constructor. throw e; } catch (IOException e) { if (e instanceof RemoteException) { e = ((RemoteException)e).unwrapRemoteException(); } if (tries < numTries - 1) { if (LOG.isDebugEnabled()) { LOG.debug("locateRegionInMeta parentTable=" + Bytes.toString(parentTable) + ", metaLocation=" + ((metaLocation == null)? "null": "{" + metaLocation + "}") + ", attempt=" + tries + " of " + this.numTries + " failed; retrying after sleep of " + ConnectionUtils.getPauseTime(this.pause, tries) + " because: " + e.getMessage()); } } else { throw e; } // Only relocate the parent region if necessary if(!(e instanceof RegionOfflineException || e instanceof NoServerForRegionException)) { relocateRegion(parentTable, metaKey); } } try{ Thread.sleep(ConnectionUtils.getPauseTime(this.pause, tries)); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new IOException("Giving up trying to location region in " + "meta: thread is interrupted."); } } } /* * Search the cache for a location that fits our table and row key. * Return null if no suitable region is located. TODO: synchronization note * *

TODO: This method during writing consumes 15% of CPU doing lookup * into the Soft Reference SortedMap. Improve. * * @param tableName * @param row * @return Null or region location found in cache. */ HRegionLocation getCachedLocation(final byte [] tableName, final byte [] row) { SoftValueSortedMap tableLocations = getTableLocations(tableName); // start to examine the cache. we can only do cache actions // if there's something in the cache for this table. if (tableLocations.isEmpty()) { return null; } HRegionLocation possibleRegion = tableLocations.get(row); if (possibleRegion != null) { return possibleRegion; } possibleRegion = tableLocations.lowerValueByKey(row); if (possibleRegion == null) { return null; } // make sure that the end key is greater than the row we're looking // for, otherwise the row actually belongs in the next region, not // this one. the exception case is when the endkey is // HConstants.EMPTY_END_ROW, signifying that the region we're // checking is actually the last region in the table. byte[] endKey = possibleRegion.getRegionInfo().getEndKey(); if (Bytes.equals(endKey, HConstants.EMPTY_END_ROW) || KeyValue.getRowComparator(tableName).compareRows( endKey, 0, endKey.length, row, 0, row.length) > 0) { return possibleRegion; } // Passed all the way through, so we got nothing - complete cache miss return null; } /** * Delete a cached location, no matter what it is. Called when we were told to not use cache. * @param tableName tableName * @param row */ void forceDeleteCachedLocation(final byte [] tableName, final byte [] row) { HRegionLocation rl = null; synchronized (this.cachedRegionLocations) { Map tableLocations = getTableLocations(tableName); // start to examine the cache. we can only do cache actions // if there's something in the cache for this table. if (!tableLocations.isEmpty()) { rl = getCachedLocation(tableName, row); if (rl != null) { tableLocations.remove(rl.getRegionInfo().getStartKey()); } } } if ((rl != null) && LOG.isDebugEnabled()) { LOG.debug("Removed " + rl.getHostname() + ":" + rl.getPort() + " as a location of " + rl.getRegionInfo().getRegionNameAsString() + " for tableName=" + Bytes.toString(tableName) + " from cache"); } } /* * Delete all cached entries of a table that maps to a specific location. */ @Override public void clearCaches(final ServerName serverName){ boolean deletedSomething = false; synchronized (this.cachedRegionLocations) { if (!cachedServers.contains(serverName)) { return; } for (Map tableLocations : cachedRegionLocations.values()) { for (Entry e : tableLocations.entrySet()) { if (serverName.equals(e.getValue().getServerName())) { tableLocations.remove(e.getKey()); deletedSomething = true; } } } cachedServers.remove(serverName); } if (deletedSomething && LOG.isDebugEnabled()) { LOG.debug("Removed all cached region locations that map to " + serverName); } } /* * @param tableName * @return Map of cached locations for passed tableName */ private SoftValueSortedMap getTableLocations( final byte [] tableName) { // find the map of cached locations for this table Integer key = Bytes.mapKey(tableName); SoftValueSortedMap result; synchronized (this.cachedRegionLocations) { result = this.cachedRegionLocations.get(key); // if tableLocations for this table isn't built yet, make one if (result == null) { result = new SoftValueSortedMap( Bytes.BYTES_COMPARATOR); this.cachedRegionLocations.put(key, result); } } return result; } @Override public void clearRegionCache() { synchronized(this.cachedRegionLocations) { this.cachedRegionLocations.clear(); this.cachedServers.clear(); } } @Override public void clearRegionCache(final byte [] tableName) { synchronized (this.cachedRegionLocations) { this.cachedRegionLocations.remove(Bytes.mapKey(tableName)); } } /** * Put a newly discovered HRegionLocation into the cache. * @param tableName The table name. * @param source the source of the new location, if it's not coming from meta * @param location the new location */ private void cacheLocation(final byte [] tableName, final HRegionLocation source, final HRegionLocation location) { boolean isFromMeta = (source == null); byte [] startKey = location.getRegionInfo().getStartKey(); Map tableLocations = getTableLocations(tableName); boolean isNewCacheEntry = false; boolean isStaleUpdate = false; HRegionLocation oldLocation = null; synchronized (this.cachedRegionLocations) { cachedServers.add(location.getServerName()); oldLocation = tableLocations.get(startKey); isNewCacheEntry = (oldLocation == null); // If the server in cache sends us a redirect, assume it's always valid. if (!isNewCacheEntry && !oldLocation.equals(source)) { long newLocationSeqNum = location.getSeqNum(); // Meta record is stale - some (probably the same) server has closed the region // with later seqNum and told us about the new location. boolean isStaleMetaRecord = isFromMeta && (oldLocation.getSeqNum() > newLocationSeqNum); // Same as above for redirect. However, in this case, if the number is equal to previous // record, the most common case is that first the region was closed with seqNum, and then // opened with the same seqNum; hence we will ignore the redirect. // There are so many corner cases with various combinations of opens and closes that // an additional counter on top of seqNum would be necessary to handle them all. boolean isStaleRedirect = !isFromMeta && (oldLocation.getSeqNum() >= newLocationSeqNum); isStaleUpdate = (isStaleMetaRecord || isStaleRedirect); } if (!isStaleUpdate) { tableLocations.put(startKey, location); } } if (isNewCacheEntry) { if (LOG.isTraceEnabled()) { LOG.trace("Cached location for " + location.getRegionInfo().getRegionNameAsString() + " is " + location.getHostnamePort()); } } else if (isStaleUpdate && !location.equals(oldLocation)) { if (LOG.isTraceEnabled()) { LOG.trace("Ignoring stale location update for " + location.getRegionInfo().getRegionNameAsString() + ": " + location.getHostnamePort() + " at " + location.getSeqNum() + "; local " + oldLocation.getHostnamePort() + " at " + oldLocation.getSeqNum()); } } } // Map keyed by service name + regionserver to service stub implementation private final ConcurrentHashMap stubs = new ConcurrentHashMap(); // Map of locks used creating service stubs per regionserver. private final ConcurrentHashMap connectionLock = new ConcurrentHashMap(); /** * Maintains current state of MasterService instance. */ static abstract class MasterServiceState { HConnection connection; int userCount; long keepAliveUntil = Long.MAX_VALUE; MasterServiceState (final HConnection connection) { super(); this.connection = connection; } abstract Object getStub(); abstract void clearStub(); abstract boolean isMasterRunning() throws ServiceException; } /** * State of the MasterAdminService connection/setup. */ static class MasterAdminServiceState extends MasterServiceState { MasterAdminService.BlockingInterface stub; MasterAdminServiceState(final HConnection connection) { super(connection); } @Override public String toString() { return "MasterAdminService"; } @Override Object getStub() { return this.stub; } @Override void clearStub() { this.stub = null; } @Override boolean isMasterRunning() throws ServiceException { MasterProtos.IsMasterRunningResponse response = this.stub.isMasterRunning(null, RequestConverter.buildIsMasterRunningRequest()); return response != null? response.getIsMasterRunning(): false; } } /** * State of the MasterMonitorService connection/setup. */ static class MasterMonitorServiceState extends MasterServiceState { MasterMonitorService.BlockingInterface stub; MasterMonitorServiceState(final HConnection connection) { super(connection); } @Override public String toString() { return "MasterMonitorService"; } @Override Object getStub() { return this.stub; } @Override void clearStub() { this.stub = null; } @Override boolean isMasterRunning() throws ServiceException { MasterProtos.IsMasterRunningResponse response = this.stub.isMasterRunning(null, RequestConverter.buildIsMasterRunningRequest()); return response != null? response.getIsMasterRunning(): false; } } /** * Makes a client-side stub for master services. Sub-class to specialize. * Depends on hosting class so not static. Exists so we avoid duplicating a bunch of code * when setting up the MasterMonitorService and MasterAdminService. */ abstract class StubMaker { /** * Returns the name of the service stub being created. */ protected abstract String getServiceName(); /** * Make stub and cache it internal so can be used later doing the isMasterRunning call. * @param channel */ protected abstract Object makeStub(final BlockingRpcChannel channel); /** * Once setup, check it works by doing isMasterRunning check. * @throws ServiceException */ protected abstract void isMasterRunning() throws ServiceException; /** * Create a stub. Try once only. It is not typed because there is no common type to * protobuf services nor their interfaces. Let the caller do appropriate casting. * @return A stub for master services. * @throws IOException * @throws KeeperException * @throws ServiceException */ private Object makeStubNoRetries() throws IOException, KeeperException, ServiceException { ZooKeeperKeepAliveConnection zkw; try { zkw = getKeepAliveZooKeeperWatcher(); } catch (IOException e) { throw new ZooKeeperConnectionException("Can't connect to ZooKeeper", e); } try { checkIfBaseNodeAvailable(zkw); ServerName sn = MasterAddressTracker.getMasterAddress(zkw); if (sn == null) { String msg = "ZooKeeper available but no active master location found"; LOG.info(msg); throw new MasterNotRunningException(msg); } if (isDeadServer(sn)) { throw new MasterNotRunningException(sn + " is dead."); } // Use the security info interface name as our stub key String key = getStubKey(getServiceName(), sn.getHostAndPort()); connectionLock.putIfAbsent(key, key); Object stub = null; synchronized (connectionLock.get(key)) { stub = stubs.get(key); if (stub == null) { BlockingRpcChannel channel = rpcClient.createBlockingRpcChannel(sn, User.getCurrent(), rpcTimeout); stub = makeStub(channel); isMasterRunning(); stubs.put(key, stub); } } return stub; } finally { zkw.close(); } } /** * Create a stub against the master. Retry if necessary. * @return A stub to do intf against the master * @throws MasterNotRunningException */ @edu.umd.cs.findbugs.annotations.SuppressWarnings (value="SWL_SLEEP_WITH_LOCK_HELD") Object makeStub() throws MasterNotRunningException { // The lock must be at the beginning to prevent multiple master creations // (and leaks) in a multithread context synchronized (masterAndZKLock) { Exception exceptionCaught = null; Object stub = null; int tries = 0; while (!closed && stub == null) { tries++; try { stub = makeStubNoRetries(); } catch (IOException e) { exceptionCaught = e; } catch (KeeperException e) { exceptionCaught = e; } catch (ServiceException e) { exceptionCaught = e; } if (exceptionCaught != null) // It failed. If it's not the last try, we're going to wait a little if (tries < numTries) { // tries at this point is 1 or more; decrement to start from 0. long pauseTime = ConnectionUtils.getPauseTime(pause, tries - 1); LOG.info("getMaster attempt " + tries + " of " + numTries + " failed; retrying after sleep of " + pauseTime + ", exception=" + exceptionCaught); try { Thread.sleep(pauseTime); } catch (InterruptedException e) { Thread.currentThread().interrupt(); throw new RuntimeException( "Thread was interrupted while trying to connect to master.", e); } } else { // Enough tries, we stop now LOG.info("getMaster attempt " + tries + " of " + numTries + " failed; no more retrying.", exceptionCaught); throw new MasterNotRunningException(exceptionCaught); } } if (stub == null) { // implies this.closed true throw new MasterNotRunningException("Connection was closed while trying to get master"); } return stub; } } } /** * Class to make a MasterMonitorService stub. */ class MasterMonitorServiceStubMaker extends StubMaker { private MasterMonitorService.BlockingInterface stub; @Override protected String getServiceName() { return MasterMonitorService.getDescriptor().getName(); } @Override @edu.umd.cs.findbugs.annotations.SuppressWarnings("SWL_SLEEP_WITH_LOCK_HELD") MasterMonitorService.BlockingInterface makeStub() throws MasterNotRunningException { return (MasterMonitorService.BlockingInterface)super.makeStub(); } @Override protected Object makeStub(BlockingRpcChannel channel) { this.stub = MasterMonitorService.newBlockingStub(channel); return this.stub; } @Override protected void isMasterRunning() throws ServiceException { this.stub.isMasterRunning(null, RequestConverter.buildIsMasterRunningRequest()); } } /** * Class to make a MasterAdminService stub. */ class MasterAdminServiceStubMaker extends StubMaker { private MasterAdminService.BlockingInterface stub; @Override protected String getServiceName() { return MasterAdminService.getDescriptor().getName(); } @Override @edu.umd.cs.findbugs.annotations.SuppressWarnings("SWL_SLEEP_WITH_LOCK_HELD") MasterAdminService.BlockingInterface makeStub() throws MasterNotRunningException { return (MasterAdminService.BlockingInterface)super.makeStub(); } @Override protected Object makeStub(BlockingRpcChannel channel) { this.stub = MasterAdminService.newBlockingStub(channel); return this.stub; } @Override protected void isMasterRunning() throws ServiceException { this.stub.isMasterRunning(null, RequestConverter.buildIsMasterRunningRequest()); } }; @Override public AdminService.BlockingInterface getAdmin(final ServerName serverName) throws IOException { return getAdmin(serverName, false); } @Override // Nothing is done w/ the 'master' parameter. It is ignored. public AdminService.BlockingInterface getAdmin(final ServerName serverName, final boolean master) throws IOException { if (isDeadServer(serverName)) { throw new RegionServerStoppedException(serverName + " is dead."); } String key = getStubKey(AdminService.BlockingInterface.class.getName(), serverName.getHostAndPort()); this.connectionLock.putIfAbsent(key, key); AdminService.BlockingInterface stub = null; synchronized (this.connectionLock.get(key)) { stub = (AdminService.BlockingInterface)this.stubs.get(key); if (stub == null) { BlockingRpcChannel channel = this.rpcClient.createBlockingRpcChannel(serverName, User.getCurrent(), this.rpcTimeout); stub = AdminService.newBlockingStub(channel); this.stubs.put(key, stub); } } return stub; } @Override public ClientService.BlockingInterface getClient(final ServerName sn) throws IOException { if (isDeadServer(sn)) { throw new RegionServerStoppedException(sn + " is dead."); } String key = getStubKey(ClientService.BlockingInterface.class.getName(), sn.getHostAndPort()); this.connectionLock.putIfAbsent(key, key); ClientService.BlockingInterface stub = null; synchronized (this.connectionLock.get(key)) { stub = (ClientService.BlockingInterface)this.stubs.get(key); if (stub == null) { BlockingRpcChannel channel = this.rpcClient.createBlockingRpcChannel(sn, User.getCurrent(), this.rpcTimeout); stub = ClientService.newBlockingStub(channel); // In old days, after getting stub/proxy, we'd make a call. We are not doing that here. // Just fail on first actual call rather than in here on setup. this.stubs.put(key, stub); } } return stub; } static String getStubKey(final String serviceName, final String rsHostnamePort) { return serviceName + "@" + rsHostnamePort; } private ZooKeeperKeepAliveConnection keepAliveZookeeper; private int keepAliveZookeeperUserCount; private boolean canCloseZKW = true; // keepAlive time, in ms. No reason to make it configurable. private static final long keepAlive = 5 * 60 * 1000; /** * Retrieve a shared ZooKeeperWatcher. You must close it it once you've have finished with it. * @return The shared instance. Never returns null. */ ZooKeeperKeepAliveConnection getKeepAliveZooKeeperWatcher() throws IOException { synchronized (masterAndZKLock) { if (keepAliveZookeeper == null) { if (this.closed) { throw new IOException(toString() + " closed"); } // We don't check that our link to ZooKeeper is still valid // But there is a retry mechanism in the ZooKeeperWatcher itself keepAliveZookeeper = new ZooKeeperKeepAliveConnection(conf, this.toString(), this); } keepAliveZookeeperUserCount++; keepZooKeeperWatcherAliveUntil = Long.MAX_VALUE; return keepAliveZookeeper; } } void releaseZooKeeperWatcher(final ZooKeeperWatcher zkw) { if (zkw == null){ return; } synchronized (masterAndZKLock) { --keepAliveZookeeperUserCount; if (keepAliveZookeeperUserCount <= 0 ){ keepZooKeeperWatcherAliveUntil = System.currentTimeMillis() + keepAlive; } } } /** * Creates a Chore thread to check the connections to master & zookeeper * and close them when they reach their closing time ( * {@link MasterServiceState#keepAliveUntil} and * {@link #keepZooKeeperWatcherAliveUntil}). Keep alive time is * managed by the release functions and the variable {@link #keepAlive} */ private static class DelayedClosing extends Chore implements Stoppable { private HConnectionImplementation hci; Stoppable stoppable; private DelayedClosing( HConnectionImplementation hci, Stoppable stoppable){ super( "ZooKeeperWatcher and Master delayed closing for connection "+hci, 60*1000, // We check every minutes stoppable); this.hci = hci; this.stoppable = stoppable; } static DelayedClosing createAndStart(HConnectionImplementation hci){ Stoppable stoppable = new Stoppable() { private volatile boolean isStopped = false; @Override public void stop(String why) { isStopped = true;} @Override public boolean isStopped() {return isStopped;} }; return new DelayedClosing(hci, stoppable); } protected void closeMasterProtocol(MasterServiceState protocolState) { if (System.currentTimeMillis() > protocolState.keepAliveUntil) { hci.closeMasterService(protocolState); protocolState.keepAliveUntil = Long.MAX_VALUE; } } @Override protected void chore() { synchronized (hci.masterAndZKLock) { if (hci.canCloseZKW) { if (System.currentTimeMillis() > hci.keepZooKeeperWatcherAliveUntil) { hci.closeZooKeeperWatcher(); hci.keepZooKeeperWatcherAliveUntil = Long.MAX_VALUE; } } closeMasterProtocol(hci.adminMasterServiceState); closeMasterProtocol(hci.monitorMasterServiceState); } } @Override public void stop(String why) { stoppable.stop(why); } @Override public boolean isStopped() { return stoppable.isStopped(); } } private void closeZooKeeperWatcher() { synchronized (masterAndZKLock) { if (keepAliveZookeeper != null) { LOG.info("Closing zookeeper sessionid=0x" + Long.toHexString( keepAliveZookeeper.getRecoverableZooKeeper().getSessionId())); keepAliveZookeeper.internalClose(); keepAliveZookeeper = null; } keepAliveZookeeperUserCount = 0; } } final MasterAdminServiceState adminMasterServiceState = new MasterAdminServiceState(this); final MasterMonitorServiceState monitorMasterServiceState = new MasterMonitorServiceState(this); @Override public MasterAdminService.BlockingInterface getMasterAdmin() throws MasterNotRunningException { return getKeepAliveMasterAdminService(); } @Override public MasterMonitorService.BlockingInterface getMasterMonitor() throws MasterNotRunningException { return getKeepAliveMasterMonitorService(); } private void resetMasterServiceState(final MasterServiceState mss) { mss.userCount++; mss.keepAliveUntil = Long.MAX_VALUE; } @Override public MasterAdminKeepAliveConnection getKeepAliveMasterAdminService() throws MasterNotRunningException { synchronized (masterAndZKLock) { if (!isKeepAliveMasterConnectedAndRunning(this.adminMasterServiceState)) { MasterAdminServiceStubMaker stubMaker = new MasterAdminServiceStubMaker(); this.adminMasterServiceState.stub = stubMaker.makeStub(); } resetMasterServiceState(this.adminMasterServiceState); } // Ugly delegation just so we can add in a Close method. final MasterAdminService.BlockingInterface stub = this.adminMasterServiceState.stub; return new MasterAdminKeepAliveConnection() { MasterAdminServiceState mss = adminMasterServiceState; @Override public AddColumnResponse addColumn(RpcController controller, AddColumnRequest request) throws ServiceException { return stub.addColumn(controller, request); } @Override public DeleteColumnResponse deleteColumn(RpcController controller, DeleteColumnRequest request) throws ServiceException { return stub.deleteColumn(controller, request); } @Override public ModifyColumnResponse modifyColumn(RpcController controller, ModifyColumnRequest request) throws ServiceException { return stub.modifyColumn(controller, request); } @Override public MoveRegionResponse moveRegion(RpcController controller, MoveRegionRequest request) throws ServiceException { return stub.moveRegion(controller, request); } @Override public DispatchMergingRegionsResponse dispatchMergingRegions( RpcController controller, DispatchMergingRegionsRequest request) throws ServiceException { return stub.dispatchMergingRegions(controller, request); } @Override public AssignRegionResponse assignRegion(RpcController controller, AssignRegionRequest request) throws ServiceException { return stub.assignRegion(controller, request); } @Override public UnassignRegionResponse unassignRegion(RpcController controller, UnassignRegionRequest request) throws ServiceException { return stub.unassignRegion(controller, request); } @Override public OfflineRegionResponse offlineRegion(RpcController controller, OfflineRegionRequest request) throws ServiceException { return stub.offlineRegion(controller, request); } @Override public DeleteTableResponse deleteTable(RpcController controller, DeleteTableRequest request) throws ServiceException { return stub.deleteTable(controller, request); } @Override public EnableTableResponse enableTable(RpcController controller, EnableTableRequest request) throws ServiceException { return stub.enableTable(controller, request); } @Override public DisableTableResponse disableTable(RpcController controller, DisableTableRequest request) throws ServiceException { return stub.disableTable(controller, request); } @Override public ModifyTableResponse modifyTable(RpcController controller, ModifyTableRequest request) throws ServiceException { return stub.modifyTable(controller, request); } @Override public CreateTableResponse createTable(RpcController controller, CreateTableRequest request) throws ServiceException { return stub.createTable(controller, request); } @Override public ShutdownResponse shutdown(RpcController controller, ShutdownRequest request) throws ServiceException { return stub.shutdown(controller, request); } @Override public StopMasterResponse stopMaster(RpcController controller, StopMasterRequest request) throws ServiceException { return stub.stopMaster(controller, request); } @Override public BalanceResponse balance(RpcController controller, BalanceRequest request) throws ServiceException { return stub.balance(controller, request); } @Override public SetBalancerRunningResponse setBalancerRunning( RpcController controller, SetBalancerRunningRequest request) throws ServiceException { return stub.setBalancerRunning(controller, request); } @Override public CatalogScanResponse runCatalogScan(RpcController controller, CatalogScanRequest request) throws ServiceException { return stub.runCatalogScan(controller, request); } @Override public EnableCatalogJanitorResponse enableCatalogJanitor( RpcController controller, EnableCatalogJanitorRequest request) throws ServiceException { return stub.enableCatalogJanitor(controller, request); } @Override public IsCatalogJanitorEnabledResponse isCatalogJanitorEnabled( RpcController controller, IsCatalogJanitorEnabledRequest request) throws ServiceException { return stub.isCatalogJanitorEnabled(controller, request); } @Override public CoprocessorServiceResponse execMasterService( RpcController controller, CoprocessorServiceRequest request) throws ServiceException { return stub.execMasterService(controller, request); } @Override public TakeSnapshotResponse snapshot(RpcController controller, TakeSnapshotRequest request) throws ServiceException { return stub.snapshot(controller, request); } @Override public ListSnapshotResponse getCompletedSnapshots( RpcController controller, ListSnapshotRequest request) throws ServiceException { return stub.getCompletedSnapshots(controller, request); } @Override public DeleteSnapshotResponse deleteSnapshot(RpcController controller, DeleteSnapshotRequest request) throws ServiceException { return stub.deleteSnapshot(controller, request); } @Override public IsSnapshotDoneResponse isSnapshotDone(RpcController controller, IsSnapshotDoneRequest request) throws ServiceException { return stub.isSnapshotDone(controller, request); } @Override public RestoreSnapshotResponse restoreSnapshot( RpcController controller, RestoreSnapshotRequest request) throws ServiceException { return stub.restoreSnapshot(controller, request); } @Override public IsRestoreSnapshotDoneResponse isRestoreSnapshotDone( RpcController controller, IsRestoreSnapshotDoneRequest request) throws ServiceException { return stub.isRestoreSnapshotDone(controller, request); } @Override public IsMasterRunningResponse isMasterRunning( RpcController controller, IsMasterRunningRequest request) throws ServiceException { return stub.isMasterRunning(controller, request); } @Override public void close() { release(this.mss); } }; } private static void release(MasterServiceState mss) { if (mss != null && mss.connection != null) { ((HConnectionImplementation)mss.connection).releaseMaster(mss); } } @Override public MasterMonitorKeepAliveConnection getKeepAliveMasterMonitorService() throws MasterNotRunningException { synchronized (masterAndZKLock) { if (!isKeepAliveMasterConnectedAndRunning(this.monitorMasterServiceState)) { MasterMonitorServiceStubMaker stubMaker = new MasterMonitorServiceStubMaker(); this.monitorMasterServiceState.stub = stubMaker.makeStub(); } resetMasterServiceState(this.monitorMasterServiceState); } // Ugly delegation just so can implement close final MasterMonitorService.BlockingInterface stub = this.monitorMasterServiceState.stub; return new MasterMonitorKeepAliveConnection() { final MasterMonitorServiceState mss = monitorMasterServiceState; @Override public GetSchemaAlterStatusResponse getSchemaAlterStatus( RpcController controller, GetSchemaAlterStatusRequest request) throws ServiceException { return stub.getSchemaAlterStatus(controller, request); } @Override public GetTableDescriptorsResponse getTableDescriptors( RpcController controller, GetTableDescriptorsRequest request) throws ServiceException { return stub.getTableDescriptors(controller, request); } @Override public GetClusterStatusResponse getClusterStatus( RpcController controller, GetClusterStatusRequest request) throws ServiceException { return stub.getClusterStatus(controller, request); } @Override public IsMasterRunningResponse isMasterRunning( RpcController controller, IsMasterRunningRequest request) throws ServiceException { return stub.isMasterRunning(controller, request); } @Override public void close() throws IOException { release(this.mss); } }; } private boolean isKeepAliveMasterConnectedAndRunning(MasterServiceState mss) { if (mss.getStub() == null){ return false; } try { return mss.isMasterRunning(); } catch (UndeclaredThrowableException e) { // It's somehow messy, but we can receive exceptions such as // java.net.ConnectException but they're not declared. So we catch it... LOG.info("Master connection is not running anymore", e.getUndeclaredThrowable()); return false; } catch (ServiceException se) { LOG.warn("Checking master connection", se); return false; } } void releaseMaster(MasterServiceState mss) { if (mss.getStub() == null) return; synchronized (masterAndZKLock) { --mss.userCount; if (mss.userCount <= 0) { mss.keepAliveUntil = System.currentTimeMillis() + keepAlive; } } } private void closeMasterService(MasterServiceState mss) { if (mss.getStub() != null) { LOG.info("Closing master protocol: " + mss); mss.clearStub(); } mss.userCount = 0; } /** * Immediate close of the shared master. Can be by the delayed close or when closing the * connection itself. */ private void closeMaster() { synchronized (masterAndZKLock) { closeMasterService(adminMasterServiceState); closeMasterService(monitorMasterServiceState); } } @Override public T getRegionServerWithRetries(ServerCallable callable) throws IOException, RuntimeException { return callable.withRetries(); } @Override public T getRegionServerWithoutRetries(ServerCallable callable) throws IOException, RuntimeException { return callable.withoutRetries(); } @Deprecated private Callable createCallable(final HRegionLocation loc, final MultiAction multi, final byte[] tableName) { // TODO: This does not belong in here!!! St.Ack HConnections should // not be dealing in Callables; Callables have HConnections, not other // way around. final HConnection connection = this; return new Callable() { @Override public MultiResponse call() throws Exception { ServerCallable callable = new MultiServerCallable(connection, tableName, loc, multi); return callable.withoutRetries(); } }; } void updateCachedLocation(HRegionInfo hri, HRegionLocation source, ServerName serverName, long seqNum) { HRegionLocation newHrl = new HRegionLocation(hri, serverName, seqNum); synchronized (this.cachedRegionLocations) { cacheLocation(hri.getTableName(), source, newHrl); } } /** * Deletes the cached location of the region if necessary, based on some error from source. * @param hri The region in question. * @param source The source of the error that prompts us to invalidate cache. */ void deleteCachedLocation(HRegionInfo hri, HRegionLocation source) { boolean isStaleDelete = false; HRegionLocation oldLocation; synchronized (this.cachedRegionLocations) { Map tableLocations = getTableLocations(hri.getTableName()); oldLocation = tableLocations.get(hri.getStartKey()); if (oldLocation != null) { // Do not delete the cache entry if it's not for the same server that gave us the error. isStaleDelete = (source != null) && !oldLocation.equals(source); if (!isStaleDelete) { tableLocations.remove(hri.getStartKey()); } } } } @Override public void deleteCachedRegionLocation(final HRegionLocation location) { if (location == null) { return; } synchronized (this.cachedRegionLocations) { byte[] tableName = location.getRegionInfo().getTableName(); Map tableLocations = getTableLocations(tableName); if (!tableLocations.isEmpty()) { // Delete if there's something in the cache for this region. HRegionLocation removedLocation = tableLocations.remove(location.getRegionInfo().getStartKey()); if (LOG.isDebugEnabled() && removedLocation != null) { LOG.debug("Removed " + location.getRegionInfo().getRegionNameAsString() + " for tableName=" + Bytes.toString(tableName) + " from cache"); } } } } /** * Update the location with the new value (if the exception is a RegionMovedException) * or delete it from the cache. * @param exception an object (to simplify user code) on which we will try to find a nested * or wrapped or both RegionMovedException * @param source server that is the source of the location update. */ private void updateCachedLocations(final byte[] tableName, Row row, final Object exception, final HRegionLocation source) { if (row == null || tableName == null) { LOG.warn("Coding error, see method javadoc. row=" + (row == null ? "null" : row) + ", tableName=" + (tableName == null ? "null" : Bytes.toString(tableName))); return; } // Is it something we have already updated? final HRegionLocation oldLocation = getCachedLocation(tableName, row.getRow()); if (oldLocation == null) { // There is no such location in the cache => it's been removed already => nothing to do return; } HRegionInfo regionInfo = oldLocation.getRegionInfo(); final RegionMovedException rme = RegionMovedException.find(exception); if (rme != null) { if (LOG.isTraceEnabled()){ LOG.trace("Region " + regionInfo.getRegionNameAsString() + " moved to " + rme.getHostname() + ":" + rme.getPort() + " according to " + source.getHostnamePort()); } updateCachedLocation( regionInfo, source, rme.getServerName(), rme.getLocationSeqNum()); } else if (RegionOpeningException.find(exception) != null) { if (LOG.isTraceEnabled()) { LOG.trace("Region " + regionInfo.getRegionNameAsString() + " is being opened on " + source.getHostnamePort() + "; not deleting the cache entry"); } } else { deleteCachedLocation(regionInfo, source); } } @Override @Deprecated public void processBatch(List list, final byte[] tableName, ExecutorService pool, Object[] results) throws IOException, InterruptedException { // This belongs in HTable!!! Not in here. St.Ack // results must be the same size as list if (results.length != list.size()) { throw new IllegalArgumentException( "argument results must be the same size as argument list"); } processBatchCallback(list, tableName, pool, results, null); } /** * Send the queries in parallel on the different region servers. Retries on failures. * If the method returns it means that there is no error, and the 'results' array will * contain no exception. On error, an exception is thrown, and the 'results' array will * contain results and exceptions. * @deprecated since 0.96 - Use {@link HTable#processBatchCallback} instead */ @Override @Deprecated public void processBatchCallback( List list, byte[] tableName, ExecutorService pool, Object[] results, Batch.Callback callback) throws IOException, InterruptedException { Process p = new Process(this, list, tableName, pool, results, callback); p.processBatchCallback(); } /** * Methods and attributes to manage a batch process are grouped into this single class. * This allows, by creating a Process per batch process to ensure multithread safety. * * This code should be move to HTable once processBatchCallback is not supported anymore in * the HConnection interface. */ private static class Process { // Info on the queries and their context private final HConnectionImplementation hci; private final List rows; private final byte[] tableName; private final ExecutorService pool; private final Object[] results; private final Batch.Callback callback; // Used during the batch process private final List> toReplay; private final LinkedList, HRegionLocation, Future>> inProgress; private ServerErrorTracker errorsByServer = null; private int curNumRetries; // Notified when a tasks is done private final List> finishedTasks = new ArrayList>(); private Process(HConnectionImplementation hci, List list, byte[] tableName, ExecutorService pool, Object[] results, Batch.Callback callback){ this.hci = hci; this.rows = list; this.tableName = tableName; this.pool = pool; this.results = results; this.callback = callback; this.toReplay = new ArrayList>(); this.inProgress = new LinkedList, HRegionLocation, Future>>(); this.curNumRetries = 0; } /** * Group a list of actions per region servers, and send them. The created MultiActions are * added to the inProgress list. * @param actionsList * @param isRetry Whether we are retrying these actions. If yes, backoff * time may be applied before new requests. * @throws IOException - if we can't locate a region after multiple retries. */ private void submit(List> actionsList, final boolean isRetry) throws IOException { // group per location => regions server final Map> actionsByServer = new HashMap>(); for (Action aAction : actionsList) { final Row row = aAction.getAction(); if (row != null) { final HRegionLocation loc = hci.locateRegion(this.tableName, row.getRow()); if (loc == null) { throw new IOException("No location found, aborting submit."); } final byte[] regionName = loc.getRegionInfo().getRegionName(); MultiAction actions = actionsByServer.get(loc); if (actions == null) { actions = new MultiAction(); actionsByServer.put(loc, actions); } actions.add(regionName, aAction); } } // Send the queries and add them to the inProgress list for (Entry> e : actionsByServer.entrySet()) { long backoffTime = 0; if (isRetry) { if (hci.isUsingServerTrackerForRetries()) { assert this.errorsByServer != null; backoffTime = this.errorsByServer.calculateBackoffTime(e.getKey(), hci.pause); } else { // curNumRetries starts with one, subtract to start from 0. backoffTime = ConnectionUtils.getPauseTime(hci.pause, curNumRetries - 1); } } Callable callable = createDelayedCallable(backoffTime, e.getKey(), e.getValue()); if (LOG.isTraceEnabled() && isRetry) { StringBuilder sb = new StringBuilder(); for (Action action : e.getValue().allActions()) { if (sb.length() > 0) sb.append(' '); sb.append(Bytes.toStringBinary(action.getAction().getRow())); } LOG.trace("Attempt #" + this.curNumRetries + " against " + e.getKey().getHostnamePort() + " after=" + backoffTime + "ms, row(s)=" + sb.toString()); } Triple, HRegionLocation, Future> p = new Triple, HRegionLocation, Future>( e.getValue(), e.getKey(), this.pool.submit(callable)); this.inProgress.addLast(p); } } /** * Resubmit the actions which have failed, after a sleep time. * @throws IOException */ private void doRetry() throws IOException{ submit(this.toReplay, true); this.toReplay.clear(); } /** * Parameterized batch processing, allowing varying return types for * different {@link Row} implementations. * Throws an exception on error. If there are no exceptions, it means that the 'results' * array is clean. */ private void processBatchCallback() throws IOException, InterruptedException { if (this.results.length != this.rows.size()) { throw new IllegalArgumentException( "argument results (size="+results.length+") must be the same size as " + "argument list (size="+this.rows.size()+")"); } if (this.rows.isEmpty()) { return; } boolean isTraceEnabled = LOG.isTraceEnabled(); BatchErrors errors = new BatchErrors(); BatchErrors retriedErrors = null; if (isTraceEnabled) { retriedErrors = new BatchErrors(); } // We keep the number of retry per action. int[] nbRetries = new int[this.results.length]; // Build the action list. This list won't change after being created, hence the // indexes will remain constant, allowing a direct lookup. final List> listActions = new ArrayList>(this.rows.size()); for (int i = 0; i < this.rows.size(); i++) { Action action = new Action(this.rows.get(i), i); listActions.add(action); } // execute the actions. We will analyze and resubmit the actions in a 'while' loop. submit(listActions, false); // LastRetry is true if, either: // we had an exception 'DoNotRetry' // we had more than numRetries for any action // In this case, we will finish the current retries but we won't start new ones. boolean lastRetry = false; // If hci.numTries is 1 or 0, we do not retry. boolean noRetry = (hci.numTries < 2); // Analyze and resubmit until all actions are done successfully or failed after numTries while (!this.inProgress.isEmpty()) { // We need the original multi action to find out what actions to replay if // we have a 'total' failure of the Future // We need the HRegionLocation as we give it back if we go out of retries Triple, HRegionLocation, Future> currentTask = removeFirstDone(); // Get the answer, keep the exception if any as we will use it for the analysis MultiResponse responses = null; ExecutionException exception = null; try { responses = currentTask.getThird().get(); } catch (ExecutionException e) { exception = e; } HRegionLocation location = currentTask.getSecond(); // Error case: no result at all for this multi action. We need to redo all actions if (responses == null) { for (List> actions : currentTask.getFirst().actions.values()) { for (Action action : actions) { Row row = action.getAction(); // Do not use the exception for updating cache because it might be coming from // any of the regions in the MultiAction. hci.updateCachedLocations(tableName, row, null, location); if (noRetry) { errors.add(exception, row, location); } else { if (isTraceEnabled) { retriedErrors.add(exception, row, location); } lastRetry = addToReplay(nbRetries, action, location); } } } } else { // Success or partial success // Analyze detailed results. We can still have individual failures to be redo. // two specific exceptions are managed: // - DoNotRetryIOException: we continue to retry for other actions // - RegionMovedException: we update the cache with the new region location for (Entry>> resultsForRS : responses.getResults().entrySet()) { for (Pair regionResult : resultsForRS.getValue()) { Action correspondingAction = listActions.get(regionResult.getFirst()); Object result = regionResult.getSecond(); this.results[correspondingAction.getOriginalIndex()] = result; // Failure: retry if it's make sense else update the errors lists if (result == null || result instanceof Throwable) { Row row = correspondingAction.getAction(); hci.updateCachedLocations(this.tableName, row, result, location); if (result instanceof DoNotRetryIOException || noRetry) { errors.add((Exception)result, row, location); } else { if (isTraceEnabled) { retriedErrors.add((Exception)result, row, location); } lastRetry = addToReplay(nbRetries, correspondingAction, location); } } else // success if (callback != null) { this.callback.update(resultsForRS.getKey(), this.rows.get(regionResult.getFirst()).getRow(), (R) result); } } } } // Retry all actions in toReplay then clear it. if (!noRetry && !toReplay.isEmpty()) { if (isTraceEnabled) { LOG.trace("Retrying #" + this.curNumRetries + (lastRetry ? " (one last time)": "") + " because " + retriedErrors.getDescriptionAndClear()); } doRetry(); if (lastRetry) { noRetry = true; } } } errors.rethrowIfAny(); } private class BatchErrors { private List exceptions = new ArrayList(); private List actions = new ArrayList(); private List addresses = new ArrayList(); public void add(Exception ex, Row row, HRegionLocation location) { exceptions.add(ex); actions.add(row); addresses.add(location.getHostnamePort()); } public void rethrowIfAny() throws RetriesExhaustedWithDetailsException { if (!exceptions.isEmpty()) { throw makeException(); } } public String getDescriptionAndClear(){ if (exceptions.isEmpty()) { return ""; } String result = makeException().getExhaustiveDescription(); exceptions.clear(); actions.clear(); addresses.clear(); return result; } private RetriesExhaustedWithDetailsException makeException() { return new RetriesExhaustedWithDetailsException(exceptions, actions, addresses); } } /** * Put the action that has to be retried in the Replay list. * @return true if we're out of numRetries and it's the last retry. */ private boolean addToReplay(int[] nbRetries, Action action, HRegionLocation source) { this.toReplay.add(action); nbRetries[action.getOriginalIndex()]++; if (nbRetries[action.getOriginalIndex()] > this.curNumRetries) { this.curNumRetries = nbRetries[action.getOriginalIndex()]; } if (hci.isUsingServerTrackerForRetries()) { if (this.errorsByServer == null) { this.errorsByServer = hci.createServerErrorTracker(); } this.errorsByServer.reportServerError(source); return !this.errorsByServer.canRetryMore(); } else { // We need to add 1 to make tries and retries comparable. And as we look for // the last try we compare with '>=' and not '>'. And we need curNumRetries // to means what it says as we don't want to initialize it to 1. return ((this.curNumRetries + 1) >= hci.numTries); } } /** * Wait for one of tasks to be done, and remove it from the list. * @return the tasks done. */ private Triple, HRegionLocation, Future> removeFirstDone() throws InterruptedException { while (true) { synchronized (finishedTasks) { if (!finishedTasks.isEmpty()) { MultiAction done = finishedTasks.remove(finishedTasks.size() - 1); // We now need to remove it from the inProgress part. Iterator, HRegionLocation, Future>> it = inProgress.iterator(); while (it.hasNext()) { Triple, HRegionLocation, Future> task = it.next(); if (task.getFirst() == done) { // We have the exact object. No java equals here. it.remove(); return task; } } LOG.error("Development error: We didn't see a task in the list. " + done.getRegions()); } finishedTasks.wait(10); } } } private Callable createDelayedCallable( final long delay, final HRegionLocation loc, final MultiAction multi) { final Callable delegate = hci.createCallable(loc, multi, tableName); return new Callable() { private final long creationTime = System.currentTimeMillis(); @Override public MultiResponse call() throws Exception { try { final long waitingTime = delay + creationTime - System.currentTimeMillis(); if (waitingTime > 0) { Thread.sleep(waitingTime); } return delegate.call(); } finally { synchronized (finishedTasks) { finishedTasks.add(multi); finishedTasks.notifyAll(); } } } }; } } /* * Return the number of cached region for a table. It will only be called * from a unit test. */ int getNumberOfCachedRegionLocations(final byte[] tableName) { Integer key = Bytes.mapKey(tableName); synchronized (this.cachedRegionLocations) { Map tableLocs = this.cachedRegionLocations.get(key); if (tableLocs == null) { return 0; } return tableLocs.values().size(); } } /** * Check the region cache to see whether a region is cached yet or not. * Called by unit tests. * @param tableName tableName * @param row row * @return Region cached or not. */ boolean isRegionCached(final byte[] tableName, final byte[] row) { HRegionLocation location = getCachedLocation(tableName, row); return location != null; } @Override public void setRegionCachePrefetch(final byte[] tableName, final boolean enable) { if (!enable) { regionCachePrefetchDisabledTables.add(Bytes.mapKey(tableName)); } else { regionCachePrefetchDisabledTables.remove(Bytes.mapKey(tableName)); } } @Override public boolean getRegionCachePrefetch(final byte[] tableName) { return !regionCachePrefetchDisabledTables.contains(Bytes.mapKey(tableName)); } @Override public void abort(final String msg, Throwable t) { if (t instanceof KeeperException.SessionExpiredException && keepAliveZookeeper != null) { synchronized (masterAndZKLock) { if (keepAliveZookeeper != null) { LOG.warn("This client just lost it's session with ZooKeeper," + " closing it." + " It will be recreated next time someone needs it", t); closeZooKeeperWatcher(); } } } else { if (t != null) { LOG.fatal(msg, t); } else { LOG.fatal(msg); } this.aborted = true; close(); this.closed = true; } } @Override public boolean isClosed() { return this.closed; } @Override public boolean isAborted(){ return this.aborted; } @Override public int getCurrentNrHRS() throws IOException { return this.registry.getCurrentNrHRS(); } /** * Increment this client's reference count. */ void incCount() { ++refCount; } /** * Decrement this client's reference count. */ void decCount() { if (refCount > 0) { --refCount; } } /** * Return if this client has no reference * * @return true if this client has no reference; false otherwise */ boolean isZeroReference() { return refCount == 0; } void internalClose() { if (this.closed) { return; } delayedClosing.stop("Closing connection"); closeMaster(); this.closed = true; closeZooKeeperWatcher(); this.stubs.clear(); if (clusterStatusListener != null) { clusterStatusListener.close(); } } @Override public void close() { if (managed) { if (aborted) { HConnectionManager.deleteStaleConnection(this); } else { HConnectionManager.deleteConnection(this, false); } } else { internalClose(); } } /** * Close the connection for good, regardless of what the current value of * {@link #refCount} is. Ideally, {@link #refCount} should be zero at this * point, which would be the case if all of its consumers close the * connection. However, on the off chance that someone is unable to close * the connection, perhaps because it bailed out prematurely, the method * below will ensure that this {@link HConnection} instance is cleaned up. * Caveat: The JVM may take an unknown amount of time to call finalize on an * unreachable object, so our hope is that every consumer cleans up after * itself, like any good citizen. */ @Override protected void finalize() throws Throwable { super.finalize(); // Pretend as if we are about to release the last remaining reference refCount = 1; close(); } @Override public HTableDescriptor[] listTables() throws IOException { MasterMonitorKeepAliveConnection master = getKeepAliveMasterMonitorService(); try { GetTableDescriptorsRequest req = RequestConverter.buildGetTableDescriptorsRequest(null); return ProtobufUtil.getHTableDescriptorArray(master.getTableDescriptors(null, req)); } catch (ServiceException se) { throw ProtobufUtil.getRemoteException(se); } finally { master.close(); } } @Override public HTableDescriptor[] getHTableDescriptors(List tableNames) throws IOException { if (tableNames == null || tableNames.isEmpty()) return new HTableDescriptor[0]; MasterMonitorKeepAliveConnection master = getKeepAliveMasterMonitorService(); try { GetTableDescriptorsRequest req = RequestConverter.buildGetTableDescriptorsRequest(tableNames); return ProtobufUtil.getHTableDescriptorArray(master.getTableDescriptors(null, req)); } catch (ServiceException se) { throw ProtobufUtil.getRemoteException(se); } finally { master.close(); } } /** * Connects to the master to get the table descriptor. * @param tableName table name * @return * @throws IOException if the connection to master fails or if the table * is not found. */ @Override public HTableDescriptor getHTableDescriptor(final byte[] tableName) throws IOException { if (tableName == null || tableName.length == 0) return null; if (Bytes.equals(tableName, HConstants.META_TABLE_NAME)) { return HTableDescriptor.META_TABLEDESC; } MasterMonitorKeepAliveConnection master = getKeepAliveMasterMonitorService(); GetTableDescriptorsResponse htds; try { GetTableDescriptorsRequest req = RequestConverter.buildGetTableDescriptorsRequest(null); htds = master.getTableDescriptors(null, req); } catch (ServiceException se) { throw ProtobufUtil.getRemoteException(se); } finally { master.close(); } for (TableSchema ts : htds.getTableSchemaList()) { if (Bytes.equals(tableName, ts.getName().toByteArray())) { return HTableDescriptor.convert(ts); } } throw new TableNotFoundException(Bytes.toString(tableName)); } /** * The record of errors for servers. Visible for testing. */ @VisibleForTesting static class ServerErrorTracker { private final Map errorsByServer = new HashMap(); private long canRetryUntil = 0; public ServerErrorTracker(long timeout) { LOG.trace("Server tracker timeout is " + timeout + "ms"); this.canRetryUntil = EnvironmentEdgeManager.currentTimeMillis() + timeout; } boolean canRetryMore() { return EnvironmentEdgeManager.currentTimeMillis() < this.canRetryUntil; } /** * Calculates the back-off time for a retrying request to a particular server. * This is here, and package private, for testing (no good way to get at it). * @param server The server in question. * @param basePause The default hci pause. * @return The time to wait before sending next request. */ long calculateBackoffTime(HRegionLocation server, long basePause) { long result = 0; ServerErrors errorStats = errorsByServer.get(server); if (errorStats != null) { result = ConnectionUtils.getPauseTime(basePause, errorStats.retries); // Adjust by the time we already waited since last talking to this server. long now = EnvironmentEdgeManager.currentTimeMillis(); long timeSinceLastError = now - errorStats.getLastErrorTime(); if (timeSinceLastError > 0) { result = Math.max(0, result - timeSinceLastError); } // Finally, see if the backoff time overshoots the timeout. if (result > 0 && (now + result > this.canRetryUntil)) { result = Math.max(0, this.canRetryUntil - now); } } return result; } /** * Reports that there was an error on the server to do whatever bean-counting necessary. * This is here, and package private, for testing (no good way to get at it). * @param server The server in question. */ void reportServerError(HRegionLocation server) { ServerErrors errors = errorsByServer.get(server); if (errors != null) { errors.addError(); } else { errorsByServer.put(server, new ServerErrors()); } } /** * The record of errors for a server. */ private static class ServerErrors { public long lastErrorTime; public int retries; public ServerErrors() { this.lastErrorTime = EnvironmentEdgeManager.currentTimeMillis(); this.retries = 0; } public void addError() { this.lastErrorTime = EnvironmentEdgeManager.currentTimeMillis(); ++this.retries; } public long getLastErrorTime() { return this.lastErrorTime; } } } public boolean isUsingServerTrackerForRetries() { return this.useServerTrackerForRetries; } /** * Creates the server error tracker to use inside process. * Currently, to preserve the main assumption about current retries, and to work well with * the retry-limit-based calculation, the calculation is local per Process object. * We may benefit from connection-wide tracking of server errors. * @return ServerErrorTracker to use. */ ServerErrorTracker createServerErrorTracker() { return new ServerErrorTracker(this.serverTrackerTimeout); } } /** * Set the number of retries to use serverside when trying to communicate * with another server over {@link HConnection}. Used updating catalog * tables, etc. Call this method before we create any Connections. * @param c The Configuration instance to set the retries into. * @param log Used to log what we set in here. */ public static void setServerSideHConnectionRetries(final Configuration c, final String sn, final Log log) { int hcRetries = c.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER); // Go big. Multiply by 10. If we can't get to meta after this many retries // then something seriously wrong. int serversideMultiplier = c.getInt("hbase.client.serverside.retries.multiplier", 10); int retries = hcRetries * serversideMultiplier; c.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, retries); log.debug(sn + " HConnection server-to-server retries=" + retries); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy