org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceManager Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hbase-server Show documentation
Show all versions of hbase-server Show documentation
Server functionality for HBase
/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.replication.regionserver;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.TableDescriptors;
import org.apache.hadoop.hbase.TableName;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.RegionServerCoprocessorHost;
import org.apache.hadoop.hbase.replication.ReplicationEndpoint;
import org.apache.hadoop.hbase.replication.ReplicationException;
import org.apache.hadoop.hbase.replication.ReplicationListener;
import org.apache.hadoop.hbase.replication.ReplicationPeer;
import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
import org.apache.hadoop.hbase.replication.ReplicationPeers;
import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
import org.apache.hadoop.hbase.replication.ReplicationQueues;
import org.apache.hadoop.hbase.replication.ReplicationTracker;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hbase.shaded.com.google.common.util.concurrent.ThreadFactoryBuilder;
/**
* This class is responsible to manage all the replication
* sources. There are two classes of sources:
*
* - Normal sources are persistent and one per peer cluster
* - Old sources are recovered from a failed region server and our
* only goal is to finish replicating the WAL queue it had up in ZK
*
*
* When a region server dies, this class uses a watcher to get notified and it
* tries to grab a lock in order to transfer all the queues in a local
* old source.
*
* This class implements the ReplicationListener interface so that it can track changes in
* replication state.
*/
@InterfaceAudience.Private
public class ReplicationSourceManager implements ReplicationListener {
private static final Log LOG =
LogFactory.getLog(ReplicationSourceManager.class);
// List of all the sources that read this RS's logs
private final List sources;
// List of all the sources we got from died RSs
private final List oldsources;
private final ReplicationQueues replicationQueues;
private final ReplicationTracker replicationTracker;
private final ReplicationPeers replicationPeers;
// UUID for this cluster
private final UUID clusterId;
// All about stopping
private final Server server;
// All logs we are currently tracking
// Index structure of the map is: peer_id->logPrefix/logGroup->logs
private final Map>> walsById;
// Logs for recovered sources we are currently tracking
private final Map>> walsByIdRecoveredQueues;
private final Configuration conf;
private final FileSystem fs;
// The paths to the latest log of each wal group, for new coming peers
private Set latestPaths;
// Path to the wals directories
private final Path logDir;
// Path to the wal archive
private final Path oldLogDir;
private final WALFileLengthProvider walFileLengthProvider;
// The number of ms that we wait before moving znodes, HBASE-3596
private final long sleepBeforeFailover;
// Homemade executer service for replication
private final ThreadPoolExecutor executor;
private final boolean replicationForBulkLoadDataEnabled;
private Connection connection;
private long replicationWaitTime;
private AtomicLong totalBufferUsed = new AtomicLong();
/**
* Creates a replication manager and sets the watch on all the other registered region servers
* @param replicationQueues the interface for manipulating replication queues
* @param replicationPeers
* @param replicationTracker
* @param conf the configuration to use
* @param server the server for this region server
* @param fs the file system to use
* @param logDir the directory that contains all wal directories of live RSs
* @param oldLogDir the directory where old logs are archived
* @param clusterId
*/
public ReplicationSourceManager(ReplicationQueues replicationQueues,
ReplicationPeers replicationPeers, ReplicationTracker replicationTracker, Configuration conf,
Server server, FileSystem fs, Path logDir, Path oldLogDir, UUID clusterId,
WALFileLengthProvider walFileLengthProvider) throws IOException {
//CopyOnWriteArrayList is thread-safe.
//Generally, reading is more than modifying.
this.sources = new CopyOnWriteArrayList<>();
this.replicationQueues = replicationQueues;
this.replicationPeers = replicationPeers;
this.replicationTracker = replicationTracker;
this.server = server;
this.walsById = new HashMap<>();
this.walsByIdRecoveredQueues = new ConcurrentHashMap<>();
this.oldsources = new CopyOnWriteArrayList<>();
this.conf = conf;
this.fs = fs;
this.logDir = logDir;
this.oldLogDir = oldLogDir;
this.sleepBeforeFailover =
conf.getLong("replication.sleep.before.failover", 30000); // 30 seconds
this.clusterId = clusterId;
this.walFileLengthProvider = walFileLengthProvider;
this.replicationTracker.registerListener(this);
this.replicationPeers.getAllPeerIds();
// It's preferable to failover 1 RS at a time, but with good zk servers
// more could be processed at the same time.
int nbWorkers = conf.getInt("replication.executor.workers", 1);
// use a short 100ms sleep since this could be done inline with a RS startup
// even if we fail, other region servers can take care of it
this.executor = new ThreadPoolExecutor(nbWorkers, nbWorkers,
100, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<>());
ThreadFactoryBuilder tfb = new ThreadFactoryBuilder();
tfb.setNameFormat("ReplicationExecutor-%d");
tfb.setDaemon(true);
this.executor.setThreadFactory(tfb.build());
this.latestPaths = new HashSet();
replicationForBulkLoadDataEnabled =
conf.getBoolean(HConstants.REPLICATION_BULKLOAD_ENABLE_KEY,
HConstants.REPLICATION_BULKLOAD_ENABLE_DEFAULT);
this.replicationWaitTime = conf.getLong(HConstants.REPLICATION_SERIALLY_WAITING_KEY,
HConstants.REPLICATION_SERIALLY_WAITING_DEFAULT);
connection = ConnectionFactory.createConnection(conf);
}
/**
* Provide the id of the peer and a log key and this method will figure which
* wal it belongs to and will log, for this region server, the current
* position. It will also clean old logs from the queue.
* @param log Path to the log currently being replicated from
* replication status in zookeeper. It will also delete older entries.
* @param id id of the peer cluster
* @param position current location in the log
* @param queueRecovered indicates if this queue comes from another region server
* @param holdLogInZK if true then the log is retained in ZK
*/
public void logPositionAndCleanOldLogs(Path log, String id, long position,
boolean queueRecovered, boolean holdLogInZK) {
String fileName = log.getName();
this.replicationQueues.setLogPosition(id, fileName, position);
if (holdLogInZK) {
return;
}
cleanOldLogs(fileName, id, queueRecovered);
}
/**
* Cleans a log file and all older files from ZK. Called when we are sure that a
* log file is closed and has no more entries.
* @param key Path to the log
* @param id id of the peer cluster
* @param queueRecovered Whether this is a recovered queue
*/
public void cleanOldLogs(String key, String id, boolean queueRecovered) {
String logPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(key);
if (queueRecovered) {
SortedSet wals = walsByIdRecoveredQueues.get(id).get(logPrefix);
if (wals != null && !wals.first().equals(key)) {
cleanOldLogs(wals, key, id);
}
} else {
synchronized (this.walsById) {
SortedSet wals = walsById.get(id).get(logPrefix);
if (wals != null && !wals.first().equals(key)) {
cleanOldLogs(wals, key, id);
}
}
}
}
private void cleanOldLogs(SortedSet wals, String key, String id) {
SortedSet walSet = wals.headSet(key);
LOG.debug("Removing " + walSet.size() + " logs in the list: " + walSet);
for (String wal : walSet) {
this.replicationQueues.removeLog(id, wal);
}
walSet.clear();
}
/**
* Adds a normal source per registered peer cluster and tries to process all
* old region server wal queues
*/
void init() throws IOException, ReplicationException {
for (String id : this.replicationPeers.getConnectedPeerIds()) {
addSource(id);
if (replicationForBulkLoadDataEnabled) {
// Check if peer exists in hfile-refs queue, if not add it. This can happen in the case
// when a peer was added before replication for bulk loaded data was enabled.
this.replicationQueues.addPeerToHFileRefs(id);
}
}
AdoptAbandonedQueuesWorker adoptionWorker = new AdoptAbandonedQueuesWorker();
try {
this.executor.execute(adoptionWorker);
} catch (RejectedExecutionException ex) {
LOG.info("Cancelling the adoption of abandoned queues because of " + ex.getMessage());
}
}
/**
* Add sources for the given peer cluster on this region server. For the newly added peer, we only
* need to enqueue the latest log of each wal group and do replication
* @param id the id of the peer cluster
* @return the source that was created
* @throws IOException
*/
@VisibleForTesting
ReplicationSourceInterface addSource(String id) throws IOException, ReplicationException {
ReplicationPeerConfig peerConfig = replicationPeers.getReplicationPeerConfig(id);
ReplicationPeer peer = replicationPeers.getConnectedPeer(id);
ReplicationSourceInterface src = getReplicationSource(this.conf, this.fs, this,
this.replicationQueues, this.replicationPeers, server, id, this.clusterId, peerConfig, peer,
walFileLengthProvider);
synchronized (this.walsById) {
this.sources.add(src);
Map> walsByGroup = new HashMap<>();
this.walsById.put(id, walsByGroup);
// Add the latest wal to that source's queue
synchronized (latestPaths) {
if (this.latestPaths.size() > 0) {
for (Path logPath : latestPaths) {
String name = logPath.getName();
String walPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(name);
SortedSet logs = new TreeSet<>();
logs.add(name);
walsByGroup.put(walPrefix, logs);
try {
this.replicationQueues.addLog(id, name);
} catch (ReplicationException e) {
String message =
"Cannot add log to queue when creating a new source, queueId=" + id
+ ", filename=" + name;
server.stop(message);
throw e;
}
src.enqueueLog(logPath);
}
}
}
}
src.startup();
return src;
}
/**
* Delete a complete queue of wals associated with a peer cluster
* @param peerId Id of the peer cluster queue of wals to delete
*/
public void deleteSource(String peerId, boolean closeConnection) {
this.replicationQueues.removeQueue(peerId);
if (closeConnection) {
this.replicationPeers.peerDisconnected(peerId);
}
}
/**
* Terminate the replication on this region server
*/
public void join() {
this.executor.shutdown();
for (ReplicationSourceInterface source : this.sources) {
source.terminate("Region server is closing");
}
}
/**
* Get a copy of the wals of the first source on this rs
* @return a sorted set of wal names
*/
@VisibleForTesting
Map>> getWALs() {
return Collections.unmodifiableMap(walsById);
}
/**
* Get a copy of the wals of the recovered sources on this rs
* @return a sorted set of wal names
*/
@VisibleForTesting
Map>> getWalsByIdRecoveredQueues() {
return Collections.unmodifiableMap(walsByIdRecoveredQueues);
}
/**
* Get a list of all the normal sources of this rs
* @return lis of all sources
*/
public List getSources() {
return this.sources;
}
/**
* Get a list of all the old sources of this rs
* @return list of all old sources
*/
public List getOldSources() {
return this.oldsources;
}
/**
* Get the normal source for a given peer
* @param peerId
* @return the normal source for the give peer if it exists, otherwise null.
*/
public ReplicationSourceInterface getSource(String peerId) {
return getSources().stream().filter(s -> s.getPeerId().equals(peerId)).findFirst().orElse(null);
}
@VisibleForTesting
List getAllQueues() {
return replicationQueues.getAllQueues();
}
void preLogRoll(Path newLog) throws IOException {
recordLog(newLog);
String logName = newLog.getName();
String logPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(logName);
synchronized (latestPaths) {
Iterator iterator = latestPaths.iterator();
while (iterator.hasNext()) {
Path path = iterator.next();
if (path.getName().contains(logPrefix)) {
iterator.remove();
break;
}
}
this.latestPaths.add(newLog);
}
}
/**
* Check and enqueue the given log to the correct source. If there's still no source for the
* group to which the given log belongs, create one
* @param logPath the log path to check and enqueue
* @throws IOException
*/
private void recordLog(Path logPath) throws IOException {
String logName = logPath.getName();
String logPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(logName);
// update replication queues on ZK
// synchronize on replicationPeers to avoid adding source for the to-be-removed peer
synchronized (replicationPeers) {
for (String id : replicationPeers.getConnectedPeerIds()) {
try {
this.replicationQueues.addLog(id, logName);
} catch (ReplicationException e) {
throw new IOException("Cannot add log to replication queue"
+ " when creating a new source, queueId=" + id + ", filename=" + logName, e);
}
}
}
// update walsById map
synchronized (walsById) {
for (Map.Entry>> entry : this.walsById.entrySet()) {
String peerId = entry.getKey();
Map> walsByPrefix = entry.getValue();
boolean existingPrefix = false;
for (Map.Entry> walsEntry : walsByPrefix.entrySet()) {
SortedSet wals = walsEntry.getValue();
if (this.sources.isEmpty()) {
// If there's no slaves, don't need to keep the old wals since
// we only consider the last one when a new slave comes in
wals.clear();
}
if (logPrefix.equals(walsEntry.getKey())) {
wals.add(logName);
existingPrefix = true;
}
}
if (!existingPrefix) {
// The new log belongs to a new group, add it into this peer
LOG.debug("Start tracking logs for wal group " + logPrefix + " for peer " + peerId);
SortedSet wals = new TreeSet<>();
wals.add(logName);
walsByPrefix.put(logPrefix, wals);
}
}
}
}
void postLogRoll(Path newLog) throws IOException {
// This only updates the sources we own, not the recovered ones
for (ReplicationSourceInterface source : this.sources) {
source.enqueueLog(newLog);
}
}
@VisibleForTesting
public AtomicLong getTotalBufferUsed() {
return totalBufferUsed;
}
/**
* Factory method to create a replication source
* @param conf the configuration to use
* @param fs the file system to use
* @param manager the manager to use
* @param server the server object for this region server
* @param peerId the id of the peer cluster
* @return the created source
* @throws IOException
*/
private ReplicationSourceInterface getReplicationSource(Configuration conf, FileSystem fs,
ReplicationSourceManager manager, ReplicationQueues replicationQueues,
ReplicationPeers replicationPeers, Server server, String peerId, UUID clusterId,
ReplicationPeerConfig peerConfig, ReplicationPeer replicationPeer,
WALFileLengthProvider walFileLengthProvider) throws IOException {
RegionServerCoprocessorHost rsServerHost = null;
TableDescriptors tableDescriptors = null;
if (server instanceof HRegionServer) {
rsServerHost = ((HRegionServer) server).getRegionServerCoprocessorHost();
tableDescriptors = ((HRegionServer) server).getTableDescriptors();
}
ReplicationSourceInterface src = ReplicationSourceFactory.create(conf, peerId);
ReplicationEndpoint replicationEndpoint = null;
try {
String replicationEndpointImpl = peerConfig.getReplicationEndpointImpl();
if (replicationEndpointImpl == null) {
// Default to HBase inter-cluster replication endpoint
replicationEndpointImpl = HBaseInterClusterReplicationEndpoint.class.getName();
}
@SuppressWarnings("rawtypes")
Class c = Class.forName(replicationEndpointImpl);
replicationEndpoint = (ReplicationEndpoint) c.newInstance();
if(rsServerHost != null) {
ReplicationEndpoint newReplicationEndPoint = rsServerHost
.postCreateReplicationEndPoint(replicationEndpoint);
if(newReplicationEndPoint != null) {
// Override the newly created endpoint from the hook with configured end point
replicationEndpoint = newReplicationEndPoint;
}
}
} catch (Exception e) {
LOG.warn("Passed replication endpoint implementation throws errors"
+ " while initializing ReplicationSource for peer: " + peerId, e);
throw new IOException(e);
}
MetricsSource metrics = new MetricsSource(peerId);
// init replication source
src.init(conf, fs, manager, replicationQueues, replicationPeers, server, peerId, clusterId,
replicationEndpoint, walFileLengthProvider, metrics);
// init replication endpoint
replicationEndpoint.init(new ReplicationEndpoint.Context(replicationPeer.getConfiguration(),
fs, peerId, clusterId, replicationPeer, metrics, tableDescriptors, server));
return src;
}
/**
* Transfer all the queues of the specified to this region server.
* First it tries to grab a lock and if it works it will move the
* znodes and finally will delete the old znodes.
*
* It creates one old source for any type of source of the old rs.
* @param rsZnode
*/
private void transferQueues(String rsZnode) {
NodeFailoverWorker transfer =
new NodeFailoverWorker(rsZnode, this.replicationQueues, this.replicationPeers,
this.clusterId);
try {
this.executor.execute(transfer);
} catch (RejectedExecutionException ex) {
LOG.info("Cancelling the transfer of " + rsZnode + " because of " + ex.getMessage());
}
}
/**
* Clear the references to the specified old source
* @param src source to clear
*/
public void closeRecoveredQueue(ReplicationSourceInterface src) {
LOG.info("Done with the recovered queue " + src.getPeerClusterZnode());
if (src instanceof ReplicationSource) {
((ReplicationSource) src).getSourceMetrics().clear();
}
this.oldsources.remove(src);
deleteSource(src.getPeerClusterZnode(), false);
this.walsByIdRecoveredQueues.remove(src.getPeerClusterZnode());
}
/**
* Clear the references to the specified old source
* @param src source to clear
*/
public void closeQueue(ReplicationSourceInterface src) {
LOG.info("Done with the queue " + src.getPeerClusterZnode());
src.getSourceMetrics().clear();
this.sources.remove(src);
deleteSource(src.getPeerClusterZnode(), true);
this.walsById.remove(src.getPeerClusterZnode());
}
/**
* Thie method first deletes all the recovered sources for the specified
* id, then deletes the normal source (deleting all related data in ZK).
* @param id The id of the peer cluster
*/
public void removePeer(String id) {
LOG.info("Closing the following queue " + id + ", currently have "
+ sources.size() + " and another "
+ oldsources.size() + " that were recovered");
String terminateMessage = "Replication stream was removed by a user";
List oldSourcesToDelete = new ArrayList<>();
// synchronized on oldsources to avoid adding recovered source for the to-be-removed peer
// see NodeFailoverWorker.run
synchronized (oldsources) {
// First close all the recovered sources for this peer
for (ReplicationSourceInterface src : oldsources) {
if (id.equals(src.getPeerId())) {
oldSourcesToDelete.add(src);
}
}
for (ReplicationSourceInterface src : oldSourcesToDelete) {
src.terminate(terminateMessage);
closeRecoveredQueue(src);
}
}
LOG.info("Number of deleted recovered sources for " + id + ": "
+ oldSourcesToDelete.size());
// Now look for the one on this cluster
List srcToRemove = new ArrayList<>();
// synchronize on replicationPeers to avoid adding source for the to-be-removed peer
synchronized (this.replicationPeers) {
for (ReplicationSourceInterface src : this.sources) {
if (id.equals(src.getPeerId())) {
srcToRemove.add(src);
}
}
if (srcToRemove.isEmpty()) {
LOG.error("The peer we wanted to remove is missing a ReplicationSourceInterface. " +
"This could mean that ReplicationSourceInterface initialization failed for this peer " +
"and that replication on this peer may not be caught up. peerId=" + id);
}
for (ReplicationSourceInterface toRemove : srcToRemove) {
toRemove.terminate(terminateMessage);
closeQueue(toRemove);
}
deleteSource(id, true);
}
}
@Override
public void regionServerRemoved(String regionserver) {
transferQueues(regionserver);
}
@Override
public void peerRemoved(String peerId) {
removePeer(peerId);
this.replicationQueues.removePeerFromHFileRefs(peerId);
}
@Override
public void peerListChanged(List peerIds) {
for (String id : peerIds) {
try {
boolean added = this.replicationPeers.peerConnected(id);
if (added) {
addSource(id);
if (replicationForBulkLoadDataEnabled) {
this.replicationQueues.addPeerToHFileRefs(id);
}
}
} catch (Exception e) {
LOG.error("Error while adding a new peer", e);
}
}
}
/**
* Class responsible to setup new ReplicationSources to take care of the
* queues from dead region servers.
*/
class NodeFailoverWorker extends Thread {
private String rsZnode;
private final ReplicationQueues rq;
private final ReplicationPeers rp;
private final UUID clusterId;
/**
* @param rsZnode
*/
public NodeFailoverWorker(String rsZnode) {
this(rsZnode, replicationQueues, replicationPeers, ReplicationSourceManager.this.clusterId);
}
public NodeFailoverWorker(String rsZnode, final ReplicationQueues replicationQueues,
final ReplicationPeers replicationPeers, final UUID clusterId) {
super("Failover-for-"+rsZnode);
this.rsZnode = rsZnode;
this.rq = replicationQueues;
this.rp = replicationPeers;
this.clusterId = clusterId;
}
@Override
public void run() {
if (this.rq.isThisOurRegionServer(rsZnode)) {
return;
}
// Wait a bit before transferring the queues, we may be shutting down.
// This sleep may not be enough in some cases.
try {
Thread.sleep(sleepBeforeFailover +
(long) (ThreadLocalRandom.current().nextFloat() * sleepBeforeFailover));
} catch (InterruptedException e) {
LOG.warn("Interrupted while waiting before transferring a queue.");
Thread.currentThread().interrupt();
}
// We try to lock that rs' queue directory
if (server.isStopped()) {
LOG.info("Not transferring queue since we are shutting down");
return;
}
Map> newQueues = new HashMap<>();
List peers = rq.getUnClaimedQueueIds(rsZnode);
while (peers != null && !peers.isEmpty()) {
Pair> peer = this.rq.claimQueue(rsZnode,
peers.get(ThreadLocalRandom.current().nextInt(peers.size())));
long sleep = sleepBeforeFailover/2;
if (peer != null) {
newQueues.put(peer.getFirst(), peer.getSecond());
sleep = sleepBeforeFailover;
}
try {
Thread.sleep(sleep);
} catch (InterruptedException e) {
LOG.warn("Interrupted while waiting before transferring a queue.");
Thread.currentThread().interrupt();
}
peers = rq.getUnClaimedQueueIds(rsZnode);
}
if (peers != null) {
rq.removeReplicatorIfQueueIsEmpty(rsZnode);
}
// Copying over the failed queue is completed.
if (newQueues.isEmpty()) {
// We either didn't get the lock or the failed region server didn't have any outstanding
// WALs to replicate, so we are done.
return;
}
for (Map.Entry> entry : newQueues.entrySet()) {
String peerId = entry.getKey();
Set walsSet = entry.getValue();
try {
// there is not an actual peer defined corresponding to peerId for the failover.
ReplicationQueueInfo replicationQueueInfo = new ReplicationQueueInfo(peerId);
String actualPeerId = replicationQueueInfo.getPeerId();
ReplicationPeer peer = replicationPeers.getConnectedPeer(actualPeerId);
ReplicationPeerConfig peerConfig = null;
try {
peerConfig = replicationPeers.getReplicationPeerConfig(actualPeerId);
} catch (ReplicationException ex) {
LOG.warn("Received exception while getting replication peer config, skipping replay"
+ ex);
}
if (peer == null || peerConfig == null) {
LOG.warn("Skipping failover for peer:" + actualPeerId + " of node" + rsZnode);
replicationQueues.removeQueue(peerId);
continue;
}
// track sources in walsByIdRecoveredQueues
Map> walsByGroup = new HashMap<>();
walsByIdRecoveredQueues.put(peerId, walsByGroup);
for (String wal : walsSet) {
String walPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(wal);
SortedSet wals = walsByGroup.get(walPrefix);
if (wals == null) {
wals = new TreeSet<>();
walsByGroup.put(walPrefix, wals);
}
wals.add(wal);
}
// enqueue sources
ReplicationSourceInterface src =
getReplicationSource(conf, fs, ReplicationSourceManager.this, this.rq, this.rp,
server, peerId, this.clusterId, peerConfig, peer, walFileLengthProvider);
// synchronized on oldsources to avoid adding recovered source for the to-be-removed peer
// see removePeer
synchronized (oldsources) {
if (!this.rp.getConnectedPeerIds().contains(src.getPeerId())) {
src.terminate("Recovered queue doesn't belong to any current peer");
closeRecoveredQueue(src);
continue;
}
oldsources.add(src);
for (String wal : walsSet) {
src.enqueueLog(new Path(oldLogDir, wal));
}
src.startup();
}
} catch (IOException e) {
// TODO manage it
LOG.error("Failed creating a source", e);
}
}
}
}
class AdoptAbandonedQueuesWorker extends Thread{
public AdoptAbandonedQueuesWorker() {}
@Override
public void run() {
List currentReplicators = replicationQueues.getListOfReplicators();
if (currentReplicators == null || currentReplicators.isEmpty()) {
return;
}
List otherRegionServers = replicationTracker.getListOfRegionServers();
LOG.info("Current list of replicators: " + currentReplicators + " other RSs: "
+ otherRegionServers);
// Look if there's anything to process after a restart
for (String rs : currentReplicators) {
if (!otherRegionServers.contains(rs)) {
transferQueues(rs);
}
}
}
}
/**
* Get the directory where wals are archived
* @return the directory where wals are archived
*/
public Path getOldLogDir() {
return this.oldLogDir;
}
/**
* Get the directory where wals are stored by their RSs
* @return the directory where wals are stored by their RSs
*/
public Path getLogDir() {
return this.logDir;
}
/**
* Get the handle on the local file system
* @return Handle on the local file system
*/
public FileSystem getFs() {
return this.fs;
}
public Connection getConnection() {
return this.connection;
}
/**
* Get the ReplicationPeers used by this ReplicationSourceManager
* @return the ReplicationPeers used by this ReplicationSourceManager
*/
public ReplicationPeers getReplicationPeers() {return this.replicationPeers;}
/**
* Get a string representation of all the sources' metrics
*/
public String getStats() {
StringBuffer stats = new StringBuffer();
for (ReplicationSourceInterface source : sources) {
stats.append("Normal source for cluster " + source.getPeerId() + ": ");
stats.append(source.getStats() + "\n");
}
for (ReplicationSourceInterface oldSource : oldsources) {
stats.append("Recovered source for cluster/machine(s) " + oldSource.getPeerId()+": ");
stats.append(oldSource.getStats()+ "\n");
}
return stats.toString();
}
public void addHFileRefs(TableName tableName, byte[] family, List> pairs)
throws ReplicationException {
for (ReplicationSourceInterface source : this.sources) {
source.addHFileRefs(tableName, family, pairs);
}
}
public void cleanUpHFileRefs(String peerId, List files) {
this.replicationQueues.removeHFileRefs(peerId, files);
}
/**
* Whether an entry can be pushed to the peer or not right now.
* If we enable serial replication, we can not push the entry until all entries in its region
* whose sequence numbers are smaller than this entry have been pushed.
* For each ReplicationSource, we need only check the first entry in each region, as long as it
* can be pushed, we can push all in this ReplicationSource.
* This method will be blocked until we can push.
* @return the first barrier of entry's region, or -1 if there is no barrier. It is used to
* prevent saving positions in the region of no barrier.
*/
void waitUntilCanBePushed(byte[] encodedName, long seq, String peerId)
throws IOException, InterruptedException {
/**
* There are barriers for this region and position for this peer. N barriers form N intervals,
* (b1,b2) (b2,b3) ... (bn,max). Generally, there is no logs whose seq id is not greater than
* the first barrier and the last interval is start from the last barrier.
*
* There are several conditions that we can push now, otherwise we should block:
* 1) "Serial replication" is not enabled, we can push all logs just like before. This case
* should not call this method.
* 2) There is no barriers for this region, or the seq id is smaller than the first barrier.
* It is mainly because we alter REPLICATION_SCOPE = 2. We can not guarantee the
* order of logs that is written before altering.
* 3) This entry is in the first interval of barriers. We can push them because it is the
* start of a region. But if the region is created by region split, we should check
* if the parent regions are fully pushed.
* 4) If the entry's seq id and the position are in same section, or the pos is the last
* number of previous section. Because when open a region we put a barrier the number
* is the last log's id + 1.
* 5) Log's seq is smaller than pos in meta, we are retrying. It may happen when a RS crashes
* after save replication meta and before save zk offset.
*/
List barriers = MetaTableAccessor.getReplicationBarriers(connection, encodedName);
if (barriers.isEmpty() || seq <= barriers.get(0)) {
// Case 2
return;
}
int interval = Collections.binarySearch(barriers, seq);
if (interval < 0) {
interval = -interval - 1;// get the insert position if negative
}
if (interval == 1) {
// Case 3
// Check if there are parent regions
String parentValue = MetaTableAccessor.getSerialReplicationParentRegion(connection,
encodedName);
if (parentValue == null) {
// This region has no parent or the parent's log entries are fully pushed.
return;
}
while (true) {
boolean allParentDone = true;
String[] parentRegions = parentValue.split(",");
for (String parent : parentRegions) {
byte[] region = Bytes.toBytes(parent);
long pos = MetaTableAccessor.getReplicationPositionForOnePeer(connection, region, peerId);
List parentBarriers = MetaTableAccessor.getReplicationBarriers(connection, region);
if (parentBarriers.size() > 0
&& parentBarriers.get(parentBarriers.size() - 1) - 1 > pos) {
allParentDone = false;
// For a closed region, we will write a close event marker to WAL whose sequence id is
// larger than final barrier but still smaller than next region's openSeqNum.
// So if the pos is larger than last barrier, we can say we have read the event marker
// which means the parent region has been fully pushed.
LOG.info(Bytes.toString(encodedName) + " can not start pushing because parent region's"
+ " log has not been fully pushed: parent=" + Bytes.toString(region) + " pos=" + pos
+ " barriers=" + Arrays.toString(barriers.toArray()));
break;
}
}
if (allParentDone) {
return;
} else {
Thread.sleep(replicationWaitTime);
}
}
}
while (true) {
long pos = MetaTableAccessor.getReplicationPositionForOnePeer(connection, encodedName, peerId);
if (seq <= pos) {
// Case 5
}
if (pos >= 0) {
// Case 4
int posInterval = Collections.binarySearch(barriers, pos);
if (posInterval < 0) {
posInterval = -posInterval - 1;// get the insert position if negative
}
if (posInterval == interval || pos == barriers.get(interval - 1) - 1) {
return;
}
}
LOG.info(Bytes.toString(encodedName) + " can not start pushing to peer " + peerId
+ " because previous log has not been pushed: sequence=" + seq + " pos=" + pos
+ " barriers=" + Arrays.toString(barriers.toArray()));
Thread.sleep(replicationWaitTime);
}
}
}