All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceManager Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.replication.regionserver;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.RejectedExecutionException;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.MetaTableAccessor;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.TableDescriptors;
import org.apache.hadoop.hbase.TableName;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.regionserver.RegionServerCoprocessorHost;
import org.apache.hadoop.hbase.replication.ReplicationEndpoint;
import org.apache.hadoop.hbase.replication.ReplicationException;
import org.apache.hadoop.hbase.replication.ReplicationListener;
import org.apache.hadoop.hbase.replication.ReplicationPeer;
import org.apache.hadoop.hbase.replication.ReplicationPeerConfig;
import org.apache.hadoop.hbase.replication.ReplicationPeers;
import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
import org.apache.hadoop.hbase.replication.ReplicationQueues;
import org.apache.hadoop.hbase.replication.ReplicationTracker;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;

import org.apache.hadoop.hbase.shaded.com.google.common.annotations.VisibleForTesting;
import org.apache.hadoop.hbase.shaded.com.google.common.util.concurrent.ThreadFactoryBuilder;

/**
 * This class is responsible to manage all the replication
 * sources. There are two classes of sources:
 * 
    *
  • Normal sources are persistent and one per peer cluster
  • *
  • Old sources are recovered from a failed region server and our * only goal is to finish replicating the WAL queue it had up in ZK
  • *
* * When a region server dies, this class uses a watcher to get notified and it * tries to grab a lock in order to transfer all the queues in a local * old source. * * This class implements the ReplicationListener interface so that it can track changes in * replication state. */ @InterfaceAudience.Private public class ReplicationSourceManager implements ReplicationListener { private static final Log LOG = LogFactory.getLog(ReplicationSourceManager.class); // List of all the sources that read this RS's logs private final List sources; // List of all the sources we got from died RSs private final List oldsources; private final ReplicationQueues replicationQueues; private final ReplicationTracker replicationTracker; private final ReplicationPeers replicationPeers; // UUID for this cluster private final UUID clusterId; // All about stopping private final Server server; // All logs we are currently tracking // Index structure of the map is: peer_id->logPrefix/logGroup->logs private final Map>> walsById; // Logs for recovered sources we are currently tracking private final Map>> walsByIdRecoveredQueues; private final Configuration conf; private final FileSystem fs; // The paths to the latest log of each wal group, for new coming peers private Set latestPaths; // Path to the wals directories private final Path logDir; // Path to the wal archive private final Path oldLogDir; private final WALFileLengthProvider walFileLengthProvider; // The number of ms that we wait before moving znodes, HBASE-3596 private final long sleepBeforeFailover; // Homemade executer service for replication private final ThreadPoolExecutor executor; private final boolean replicationForBulkLoadDataEnabled; private Connection connection; private long replicationWaitTime; private AtomicLong totalBufferUsed = new AtomicLong(); /** * Creates a replication manager and sets the watch on all the other registered region servers * @param replicationQueues the interface for manipulating replication queues * @param replicationPeers * @param replicationTracker * @param conf the configuration to use * @param server the server for this region server * @param fs the file system to use * @param logDir the directory that contains all wal directories of live RSs * @param oldLogDir the directory where old logs are archived * @param clusterId */ public ReplicationSourceManager(ReplicationQueues replicationQueues, ReplicationPeers replicationPeers, ReplicationTracker replicationTracker, Configuration conf, Server server, FileSystem fs, Path logDir, Path oldLogDir, UUID clusterId, WALFileLengthProvider walFileLengthProvider) throws IOException { //CopyOnWriteArrayList is thread-safe. //Generally, reading is more than modifying. this.sources = new CopyOnWriteArrayList<>(); this.replicationQueues = replicationQueues; this.replicationPeers = replicationPeers; this.replicationTracker = replicationTracker; this.server = server; this.walsById = new HashMap<>(); this.walsByIdRecoveredQueues = new ConcurrentHashMap<>(); this.oldsources = new CopyOnWriteArrayList<>(); this.conf = conf; this.fs = fs; this.logDir = logDir; this.oldLogDir = oldLogDir; this.sleepBeforeFailover = conf.getLong("replication.sleep.before.failover", 30000); // 30 seconds this.clusterId = clusterId; this.walFileLengthProvider = walFileLengthProvider; this.replicationTracker.registerListener(this); this.replicationPeers.getAllPeerIds(); // It's preferable to failover 1 RS at a time, but with good zk servers // more could be processed at the same time. int nbWorkers = conf.getInt("replication.executor.workers", 1); // use a short 100ms sleep since this could be done inline with a RS startup // even if we fail, other region servers can take care of it this.executor = new ThreadPoolExecutor(nbWorkers, nbWorkers, 100, TimeUnit.MILLISECONDS, new LinkedBlockingQueue<>()); ThreadFactoryBuilder tfb = new ThreadFactoryBuilder(); tfb.setNameFormat("ReplicationExecutor-%d"); tfb.setDaemon(true); this.executor.setThreadFactory(tfb.build()); this.latestPaths = new HashSet(); replicationForBulkLoadDataEnabled = conf.getBoolean(HConstants.REPLICATION_BULKLOAD_ENABLE_KEY, HConstants.REPLICATION_BULKLOAD_ENABLE_DEFAULT); this.replicationWaitTime = conf.getLong(HConstants.REPLICATION_SERIALLY_WAITING_KEY, HConstants.REPLICATION_SERIALLY_WAITING_DEFAULT); connection = ConnectionFactory.createConnection(conf); } /** * Provide the id of the peer and a log key and this method will figure which * wal it belongs to and will log, for this region server, the current * position. It will also clean old logs from the queue. * @param log Path to the log currently being replicated from * replication status in zookeeper. It will also delete older entries. * @param id id of the peer cluster * @param position current location in the log * @param queueRecovered indicates if this queue comes from another region server * @param holdLogInZK if true then the log is retained in ZK */ public void logPositionAndCleanOldLogs(Path log, String id, long position, boolean queueRecovered, boolean holdLogInZK) { String fileName = log.getName(); this.replicationQueues.setLogPosition(id, fileName, position); if (holdLogInZK) { return; } cleanOldLogs(fileName, id, queueRecovered); } /** * Cleans a log file and all older files from ZK. Called when we are sure that a * log file is closed and has no more entries. * @param key Path to the log * @param id id of the peer cluster * @param queueRecovered Whether this is a recovered queue */ public void cleanOldLogs(String key, String id, boolean queueRecovered) { String logPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(key); if (queueRecovered) { SortedSet wals = walsByIdRecoveredQueues.get(id).get(logPrefix); if (wals != null && !wals.first().equals(key)) { cleanOldLogs(wals, key, id); } } else { synchronized (this.walsById) { SortedSet wals = walsById.get(id).get(logPrefix); if (wals != null && !wals.first().equals(key)) { cleanOldLogs(wals, key, id); } } } } private void cleanOldLogs(SortedSet wals, String key, String id) { SortedSet walSet = wals.headSet(key); LOG.debug("Removing " + walSet.size() + " logs in the list: " + walSet); for (String wal : walSet) { this.replicationQueues.removeLog(id, wal); } walSet.clear(); } /** * Adds a normal source per registered peer cluster and tries to process all * old region server wal queues */ void init() throws IOException, ReplicationException { for (String id : this.replicationPeers.getConnectedPeerIds()) { addSource(id); if (replicationForBulkLoadDataEnabled) { // Check if peer exists in hfile-refs queue, if not add it. This can happen in the case // when a peer was added before replication for bulk loaded data was enabled. this.replicationQueues.addPeerToHFileRefs(id); } } AdoptAbandonedQueuesWorker adoptionWorker = new AdoptAbandonedQueuesWorker(); try { this.executor.execute(adoptionWorker); } catch (RejectedExecutionException ex) { LOG.info("Cancelling the adoption of abandoned queues because of " + ex.getMessage()); } } /** * Add sources for the given peer cluster on this region server. For the newly added peer, we only * need to enqueue the latest log of each wal group and do replication * @param id the id of the peer cluster * @return the source that was created * @throws IOException */ @VisibleForTesting ReplicationSourceInterface addSource(String id) throws IOException, ReplicationException { ReplicationPeerConfig peerConfig = replicationPeers.getReplicationPeerConfig(id); ReplicationPeer peer = replicationPeers.getConnectedPeer(id); ReplicationSourceInterface src = getReplicationSource(this.conf, this.fs, this, this.replicationQueues, this.replicationPeers, server, id, this.clusterId, peerConfig, peer, walFileLengthProvider); synchronized (this.walsById) { this.sources.add(src); Map> walsByGroup = new HashMap<>(); this.walsById.put(id, walsByGroup); // Add the latest wal to that source's queue synchronized (latestPaths) { if (this.latestPaths.size() > 0) { for (Path logPath : latestPaths) { String name = logPath.getName(); String walPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(name); SortedSet logs = new TreeSet<>(); logs.add(name); walsByGroup.put(walPrefix, logs); try { this.replicationQueues.addLog(id, name); } catch (ReplicationException e) { String message = "Cannot add log to queue when creating a new source, queueId=" + id + ", filename=" + name; server.stop(message); throw e; } src.enqueueLog(logPath); } } } } src.startup(); return src; } /** * Delete a complete queue of wals associated with a peer cluster * @param peerId Id of the peer cluster queue of wals to delete */ public void deleteSource(String peerId, boolean closeConnection) { this.replicationQueues.removeQueue(peerId); if (closeConnection) { this.replicationPeers.peerDisconnected(peerId); } } /** * Terminate the replication on this region server */ public void join() { this.executor.shutdown(); for (ReplicationSourceInterface source : this.sources) { source.terminate("Region server is closing"); } } /** * Get a copy of the wals of the first source on this rs * @return a sorted set of wal names */ @VisibleForTesting Map>> getWALs() { return Collections.unmodifiableMap(walsById); } /** * Get a copy of the wals of the recovered sources on this rs * @return a sorted set of wal names */ @VisibleForTesting Map>> getWalsByIdRecoveredQueues() { return Collections.unmodifiableMap(walsByIdRecoveredQueues); } /** * Get a list of all the normal sources of this rs * @return lis of all sources */ public List getSources() { return this.sources; } /** * Get a list of all the old sources of this rs * @return list of all old sources */ public List getOldSources() { return this.oldsources; } /** * Get the normal source for a given peer * @param peerId * @return the normal source for the give peer if it exists, otherwise null. */ public ReplicationSourceInterface getSource(String peerId) { return getSources().stream().filter(s -> s.getPeerId().equals(peerId)).findFirst().orElse(null); } @VisibleForTesting List getAllQueues() { return replicationQueues.getAllQueues(); } void preLogRoll(Path newLog) throws IOException { recordLog(newLog); String logName = newLog.getName(); String logPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(logName); synchronized (latestPaths) { Iterator iterator = latestPaths.iterator(); while (iterator.hasNext()) { Path path = iterator.next(); if (path.getName().contains(logPrefix)) { iterator.remove(); break; } } this.latestPaths.add(newLog); } } /** * Check and enqueue the given log to the correct source. If there's still no source for the * group to which the given log belongs, create one * @param logPath the log path to check and enqueue * @throws IOException */ private void recordLog(Path logPath) throws IOException { String logName = logPath.getName(); String logPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(logName); // update replication queues on ZK // synchronize on replicationPeers to avoid adding source for the to-be-removed peer synchronized (replicationPeers) { for (String id : replicationPeers.getConnectedPeerIds()) { try { this.replicationQueues.addLog(id, logName); } catch (ReplicationException e) { throw new IOException("Cannot add log to replication queue" + " when creating a new source, queueId=" + id + ", filename=" + logName, e); } } } // update walsById map synchronized (walsById) { for (Map.Entry>> entry : this.walsById.entrySet()) { String peerId = entry.getKey(); Map> walsByPrefix = entry.getValue(); boolean existingPrefix = false; for (Map.Entry> walsEntry : walsByPrefix.entrySet()) { SortedSet wals = walsEntry.getValue(); if (this.sources.isEmpty()) { // If there's no slaves, don't need to keep the old wals since // we only consider the last one when a new slave comes in wals.clear(); } if (logPrefix.equals(walsEntry.getKey())) { wals.add(logName); existingPrefix = true; } } if (!existingPrefix) { // The new log belongs to a new group, add it into this peer LOG.debug("Start tracking logs for wal group " + logPrefix + " for peer " + peerId); SortedSet wals = new TreeSet<>(); wals.add(logName); walsByPrefix.put(logPrefix, wals); } } } } void postLogRoll(Path newLog) throws IOException { // This only updates the sources we own, not the recovered ones for (ReplicationSourceInterface source : this.sources) { source.enqueueLog(newLog); } } @VisibleForTesting public AtomicLong getTotalBufferUsed() { return totalBufferUsed; } /** * Factory method to create a replication source * @param conf the configuration to use * @param fs the file system to use * @param manager the manager to use * @param server the server object for this region server * @param peerId the id of the peer cluster * @return the created source * @throws IOException */ private ReplicationSourceInterface getReplicationSource(Configuration conf, FileSystem fs, ReplicationSourceManager manager, ReplicationQueues replicationQueues, ReplicationPeers replicationPeers, Server server, String peerId, UUID clusterId, ReplicationPeerConfig peerConfig, ReplicationPeer replicationPeer, WALFileLengthProvider walFileLengthProvider) throws IOException { RegionServerCoprocessorHost rsServerHost = null; TableDescriptors tableDescriptors = null; if (server instanceof HRegionServer) { rsServerHost = ((HRegionServer) server).getRegionServerCoprocessorHost(); tableDescriptors = ((HRegionServer) server).getTableDescriptors(); } ReplicationSourceInterface src = ReplicationSourceFactory.create(conf, peerId); ReplicationEndpoint replicationEndpoint = null; try { String replicationEndpointImpl = peerConfig.getReplicationEndpointImpl(); if (replicationEndpointImpl == null) { // Default to HBase inter-cluster replication endpoint replicationEndpointImpl = HBaseInterClusterReplicationEndpoint.class.getName(); } @SuppressWarnings("rawtypes") Class c = Class.forName(replicationEndpointImpl); replicationEndpoint = (ReplicationEndpoint) c.newInstance(); if(rsServerHost != null) { ReplicationEndpoint newReplicationEndPoint = rsServerHost .postCreateReplicationEndPoint(replicationEndpoint); if(newReplicationEndPoint != null) { // Override the newly created endpoint from the hook with configured end point replicationEndpoint = newReplicationEndPoint; } } } catch (Exception e) { LOG.warn("Passed replication endpoint implementation throws errors" + " while initializing ReplicationSource for peer: " + peerId, e); throw new IOException(e); } MetricsSource metrics = new MetricsSource(peerId); // init replication source src.init(conf, fs, manager, replicationQueues, replicationPeers, server, peerId, clusterId, replicationEndpoint, walFileLengthProvider, metrics); // init replication endpoint replicationEndpoint.init(new ReplicationEndpoint.Context(replicationPeer.getConfiguration(), fs, peerId, clusterId, replicationPeer, metrics, tableDescriptors, server)); return src; } /** * Transfer all the queues of the specified to this region server. * First it tries to grab a lock and if it works it will move the * znodes and finally will delete the old znodes. * * It creates one old source for any type of source of the old rs. * @param rsZnode */ private void transferQueues(String rsZnode) { NodeFailoverWorker transfer = new NodeFailoverWorker(rsZnode, this.replicationQueues, this.replicationPeers, this.clusterId); try { this.executor.execute(transfer); } catch (RejectedExecutionException ex) { LOG.info("Cancelling the transfer of " + rsZnode + " because of " + ex.getMessage()); } } /** * Clear the references to the specified old source * @param src source to clear */ public void closeRecoveredQueue(ReplicationSourceInterface src) { LOG.info("Done with the recovered queue " + src.getPeerClusterZnode()); if (src instanceof ReplicationSource) { ((ReplicationSource) src).getSourceMetrics().clear(); } this.oldsources.remove(src); deleteSource(src.getPeerClusterZnode(), false); this.walsByIdRecoveredQueues.remove(src.getPeerClusterZnode()); } /** * Clear the references to the specified old source * @param src source to clear */ public void closeQueue(ReplicationSourceInterface src) { LOG.info("Done with the queue " + src.getPeerClusterZnode()); src.getSourceMetrics().clear(); this.sources.remove(src); deleteSource(src.getPeerClusterZnode(), true); this.walsById.remove(src.getPeerClusterZnode()); } /** * Thie method first deletes all the recovered sources for the specified * id, then deletes the normal source (deleting all related data in ZK). * @param id The id of the peer cluster */ public void removePeer(String id) { LOG.info("Closing the following queue " + id + ", currently have " + sources.size() + " and another " + oldsources.size() + " that were recovered"); String terminateMessage = "Replication stream was removed by a user"; List oldSourcesToDelete = new ArrayList<>(); // synchronized on oldsources to avoid adding recovered source for the to-be-removed peer // see NodeFailoverWorker.run synchronized (oldsources) { // First close all the recovered sources for this peer for (ReplicationSourceInterface src : oldsources) { if (id.equals(src.getPeerId())) { oldSourcesToDelete.add(src); } } for (ReplicationSourceInterface src : oldSourcesToDelete) { src.terminate(terminateMessage); closeRecoveredQueue(src); } } LOG.info("Number of deleted recovered sources for " + id + ": " + oldSourcesToDelete.size()); // Now look for the one on this cluster List srcToRemove = new ArrayList<>(); // synchronize on replicationPeers to avoid adding source for the to-be-removed peer synchronized (this.replicationPeers) { for (ReplicationSourceInterface src : this.sources) { if (id.equals(src.getPeerId())) { srcToRemove.add(src); } } if (srcToRemove.isEmpty()) { LOG.error("The peer we wanted to remove is missing a ReplicationSourceInterface. " + "This could mean that ReplicationSourceInterface initialization failed for this peer " + "and that replication on this peer may not be caught up. peerId=" + id); } for (ReplicationSourceInterface toRemove : srcToRemove) { toRemove.terminate(terminateMessage); closeQueue(toRemove); } deleteSource(id, true); } } @Override public void regionServerRemoved(String regionserver) { transferQueues(regionserver); } @Override public void peerRemoved(String peerId) { removePeer(peerId); this.replicationQueues.removePeerFromHFileRefs(peerId); } @Override public void peerListChanged(List peerIds) { for (String id : peerIds) { try { boolean added = this.replicationPeers.peerConnected(id); if (added) { addSource(id); if (replicationForBulkLoadDataEnabled) { this.replicationQueues.addPeerToHFileRefs(id); } } } catch (Exception e) { LOG.error("Error while adding a new peer", e); } } } /** * Class responsible to setup new ReplicationSources to take care of the * queues from dead region servers. */ class NodeFailoverWorker extends Thread { private String rsZnode; private final ReplicationQueues rq; private final ReplicationPeers rp; private final UUID clusterId; /** * @param rsZnode */ public NodeFailoverWorker(String rsZnode) { this(rsZnode, replicationQueues, replicationPeers, ReplicationSourceManager.this.clusterId); } public NodeFailoverWorker(String rsZnode, final ReplicationQueues replicationQueues, final ReplicationPeers replicationPeers, final UUID clusterId) { super("Failover-for-"+rsZnode); this.rsZnode = rsZnode; this.rq = replicationQueues; this.rp = replicationPeers; this.clusterId = clusterId; } @Override public void run() { if (this.rq.isThisOurRegionServer(rsZnode)) { return; } // Wait a bit before transferring the queues, we may be shutting down. // This sleep may not be enough in some cases. try { Thread.sleep(sleepBeforeFailover + (long) (ThreadLocalRandom.current().nextFloat() * sleepBeforeFailover)); } catch (InterruptedException e) { LOG.warn("Interrupted while waiting before transferring a queue."); Thread.currentThread().interrupt(); } // We try to lock that rs' queue directory if (server.isStopped()) { LOG.info("Not transferring queue since we are shutting down"); return; } Map> newQueues = new HashMap<>(); List peers = rq.getUnClaimedQueueIds(rsZnode); while (peers != null && !peers.isEmpty()) { Pair> peer = this.rq.claimQueue(rsZnode, peers.get(ThreadLocalRandom.current().nextInt(peers.size()))); long sleep = sleepBeforeFailover/2; if (peer != null) { newQueues.put(peer.getFirst(), peer.getSecond()); sleep = sleepBeforeFailover; } try { Thread.sleep(sleep); } catch (InterruptedException e) { LOG.warn("Interrupted while waiting before transferring a queue."); Thread.currentThread().interrupt(); } peers = rq.getUnClaimedQueueIds(rsZnode); } if (peers != null) { rq.removeReplicatorIfQueueIsEmpty(rsZnode); } // Copying over the failed queue is completed. if (newQueues.isEmpty()) { // We either didn't get the lock or the failed region server didn't have any outstanding // WALs to replicate, so we are done. return; } for (Map.Entry> entry : newQueues.entrySet()) { String peerId = entry.getKey(); Set walsSet = entry.getValue(); try { // there is not an actual peer defined corresponding to peerId for the failover. ReplicationQueueInfo replicationQueueInfo = new ReplicationQueueInfo(peerId); String actualPeerId = replicationQueueInfo.getPeerId(); ReplicationPeer peer = replicationPeers.getConnectedPeer(actualPeerId); ReplicationPeerConfig peerConfig = null; try { peerConfig = replicationPeers.getReplicationPeerConfig(actualPeerId); } catch (ReplicationException ex) { LOG.warn("Received exception while getting replication peer config, skipping replay" + ex); } if (peer == null || peerConfig == null) { LOG.warn("Skipping failover for peer:" + actualPeerId + " of node" + rsZnode); replicationQueues.removeQueue(peerId); continue; } // track sources in walsByIdRecoveredQueues Map> walsByGroup = new HashMap<>(); walsByIdRecoveredQueues.put(peerId, walsByGroup); for (String wal : walsSet) { String walPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(wal); SortedSet wals = walsByGroup.get(walPrefix); if (wals == null) { wals = new TreeSet<>(); walsByGroup.put(walPrefix, wals); } wals.add(wal); } // enqueue sources ReplicationSourceInterface src = getReplicationSource(conf, fs, ReplicationSourceManager.this, this.rq, this.rp, server, peerId, this.clusterId, peerConfig, peer, walFileLengthProvider); // synchronized on oldsources to avoid adding recovered source for the to-be-removed peer // see removePeer synchronized (oldsources) { if (!this.rp.getConnectedPeerIds().contains(src.getPeerId())) { src.terminate("Recovered queue doesn't belong to any current peer"); closeRecoveredQueue(src); continue; } oldsources.add(src); for (String wal : walsSet) { src.enqueueLog(new Path(oldLogDir, wal)); } src.startup(); } } catch (IOException e) { // TODO manage it LOG.error("Failed creating a source", e); } } } } class AdoptAbandonedQueuesWorker extends Thread{ public AdoptAbandonedQueuesWorker() {} @Override public void run() { List currentReplicators = replicationQueues.getListOfReplicators(); if (currentReplicators == null || currentReplicators.isEmpty()) { return; } List otherRegionServers = replicationTracker.getListOfRegionServers(); LOG.info("Current list of replicators: " + currentReplicators + " other RSs: " + otherRegionServers); // Look if there's anything to process after a restart for (String rs : currentReplicators) { if (!otherRegionServers.contains(rs)) { transferQueues(rs); } } } } /** * Get the directory where wals are archived * @return the directory where wals are archived */ public Path getOldLogDir() { return this.oldLogDir; } /** * Get the directory where wals are stored by their RSs * @return the directory where wals are stored by their RSs */ public Path getLogDir() { return this.logDir; } /** * Get the handle on the local file system * @return Handle on the local file system */ public FileSystem getFs() { return this.fs; } public Connection getConnection() { return this.connection; } /** * Get the ReplicationPeers used by this ReplicationSourceManager * @return the ReplicationPeers used by this ReplicationSourceManager */ public ReplicationPeers getReplicationPeers() {return this.replicationPeers;} /** * Get a string representation of all the sources' metrics */ public String getStats() { StringBuffer stats = new StringBuffer(); for (ReplicationSourceInterface source : sources) { stats.append("Normal source for cluster " + source.getPeerId() + ": "); stats.append(source.getStats() + "\n"); } for (ReplicationSourceInterface oldSource : oldsources) { stats.append("Recovered source for cluster/machine(s) " + oldSource.getPeerId()+": "); stats.append(oldSource.getStats()+ "\n"); } return stats.toString(); } public void addHFileRefs(TableName tableName, byte[] family, List> pairs) throws ReplicationException { for (ReplicationSourceInterface source : this.sources) { source.addHFileRefs(tableName, family, pairs); } } public void cleanUpHFileRefs(String peerId, List files) { this.replicationQueues.removeHFileRefs(peerId, files); } /** * Whether an entry can be pushed to the peer or not right now. * If we enable serial replication, we can not push the entry until all entries in its region * whose sequence numbers are smaller than this entry have been pushed. * For each ReplicationSource, we need only check the first entry in each region, as long as it * can be pushed, we can push all in this ReplicationSource. * This method will be blocked until we can push. * @return the first barrier of entry's region, or -1 if there is no barrier. It is used to * prevent saving positions in the region of no barrier. */ void waitUntilCanBePushed(byte[] encodedName, long seq, String peerId) throws IOException, InterruptedException { /** * There are barriers for this region and position for this peer. N barriers form N intervals, * (b1,b2) (b2,b3) ... (bn,max). Generally, there is no logs whose seq id is not greater than * the first barrier and the last interval is start from the last barrier. * * There are several conditions that we can push now, otherwise we should block: * 1) "Serial replication" is not enabled, we can push all logs just like before. This case * should not call this method. * 2) There is no barriers for this region, or the seq id is smaller than the first barrier. * It is mainly because we alter REPLICATION_SCOPE = 2. We can not guarantee the * order of logs that is written before altering. * 3) This entry is in the first interval of barriers. We can push them because it is the * start of a region. But if the region is created by region split, we should check * if the parent regions are fully pushed. * 4) If the entry's seq id and the position are in same section, or the pos is the last * number of previous section. Because when open a region we put a barrier the number * is the last log's id + 1. * 5) Log's seq is smaller than pos in meta, we are retrying. It may happen when a RS crashes * after save replication meta and before save zk offset. */ List barriers = MetaTableAccessor.getReplicationBarriers(connection, encodedName); if (barriers.isEmpty() || seq <= barriers.get(0)) { // Case 2 return; } int interval = Collections.binarySearch(barriers, seq); if (interval < 0) { interval = -interval - 1;// get the insert position if negative } if (interval == 1) { // Case 3 // Check if there are parent regions String parentValue = MetaTableAccessor.getSerialReplicationParentRegion(connection, encodedName); if (parentValue == null) { // This region has no parent or the parent's log entries are fully pushed. return; } while (true) { boolean allParentDone = true; String[] parentRegions = parentValue.split(","); for (String parent : parentRegions) { byte[] region = Bytes.toBytes(parent); long pos = MetaTableAccessor.getReplicationPositionForOnePeer(connection, region, peerId); List parentBarriers = MetaTableAccessor.getReplicationBarriers(connection, region); if (parentBarriers.size() > 0 && parentBarriers.get(parentBarriers.size() - 1) - 1 > pos) { allParentDone = false; // For a closed region, we will write a close event marker to WAL whose sequence id is // larger than final barrier but still smaller than next region's openSeqNum. // So if the pos is larger than last barrier, we can say we have read the event marker // which means the parent region has been fully pushed. LOG.info(Bytes.toString(encodedName) + " can not start pushing because parent region's" + " log has not been fully pushed: parent=" + Bytes.toString(region) + " pos=" + pos + " barriers=" + Arrays.toString(barriers.toArray())); break; } } if (allParentDone) { return; } else { Thread.sleep(replicationWaitTime); } } } while (true) { long pos = MetaTableAccessor.getReplicationPositionForOnePeer(connection, encodedName, peerId); if (seq <= pos) { // Case 5 } if (pos >= 0) { // Case 4 int posInterval = Collections.binarySearch(barriers, pos); if (posInterval < 0) { posInterval = -posInterval - 1;// get the insert position if negative } if (posInterval == interval || pos == barriers.get(interval - 1) - 1) { return; } } LOG.info(Bytes.toString(encodedName) + " can not start pushing to peer " + peerId + " because previous log has not been pushed: sequence=" + seq + " pos=" + pos + " barriers=" + Arrays.toString(barriers.toArray())); Thread.sleep(replicationWaitTime); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy