All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.replication.regionserver.ReplicationSource Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/*
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.replication.regionserver;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.UUID;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.concurrent.atomic.AtomicLong;

import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.ServerName;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.regionserver.RSRpcServices;
import org.apache.hadoop.hbase.replication.ChainWALEntryFilter;
import org.apache.hadoop.hbase.replication.ClusterMarkingEntryFilter;
import org.apache.hadoop.hbase.replication.ReplicationEndpoint;
import org.apache.hadoop.hbase.replication.ReplicationException;
import org.apache.hadoop.hbase.replication.ReplicationPeer;
import org.apache.hadoop.hbase.replication.ReplicationPeers;
import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
import org.apache.hadoop.hbase.replication.ReplicationQueues;
import org.apache.hadoop.hbase.replication.SystemTableWALEntryFilter;
import org.apache.hadoop.hbase.replication.WALEntryFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.wal.AbstractFSWALProvider;
import org.apache.hadoop.hbase.wal.WAL.Entry;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.collect.Lists;

/**
 * Class that handles the source of a replication stream.
 * Currently does not handle more than 1 slave
 * For each slave cluster it selects a random number of peers
 * using a replication ratio. For example, if replication ration = 0.1
 * and slave cluster has 100 region servers, 10 will be selected.
 * 

* A stream is considered down when we cannot contact a region server on the * peer cluster for more than 55 seconds by default. *

*/ @InterfaceAudience.Private public class ReplicationSource extends Thread implements ReplicationSourceInterface { private static final Logger LOG = LoggerFactory.getLogger(ReplicationSource.class); // Queues of logs to process, entry in format of walGroupId->queue, // each presents a queue for one wal group private Map> queues = new HashMap<>(); // per group queue size, keep no more than this number of logs in each wal group protected int queueSizePerGroup; protected ReplicationQueues replicationQueues; private ReplicationPeers replicationPeers; protected Configuration conf; protected ReplicationQueueInfo replicationQueueInfo; // id of the peer cluster this source replicates to private String peerId; // The manager of all sources to which we ping back our progress protected ReplicationSourceManager manager; // Should we stop everything? protected Server server; // How long should we sleep for each retry private long sleepForRetries; protected FileSystem fs; // id of this cluster private UUID clusterId; // id of the other cluster private UUID peerClusterId; // total number of edits we replicated private AtomicLong totalReplicatedEdits = new AtomicLong(0); // The znode we currently play with protected String peerClusterZnode; // Maximum number of retries before taking bold actions private int maxRetriesMultiplier; // Indicates if this particular source is running private volatile boolean sourceRunning = false; // Metrics for this source private MetricsSource metrics; //WARN threshold for the number of queued logs, defaults to 2 private int logQueueWarnThreshold; // ReplicationEndpoint which will handle the actual replication private ReplicationEndpoint replicationEndpoint; // A filter (or a chain of filters) for the WAL entries. protected WALEntryFilter walEntryFilter; // throttler private ReplicationThrottler throttler; private long defaultBandwidth; private long currentBandwidth; private WALFileLengthProvider walFileLengthProvider; protected final ConcurrentHashMap workerThreads = new ConcurrentHashMap<>(); private AtomicLong totalBufferUsed; public static final String WAIT_ON_ENDPOINT_SECONDS = "hbase.replication.wait.on.endpoint.seconds"; public static final int DEFAULT_WAIT_ON_ENDPOINT_SECONDS = 30; private int waitOnEndpointSeconds = -1; /** * Instantiation method used by region servers * * @param conf configuration to use * @param fs file system to use * @param manager replication manager to ping to * @param server the server for this region server * @param peerClusterZnode the name of our znode * @param clusterId unique UUID for the cluster * @param replicationEndpoint the replication endpoint implementation * @param metrics metrics for replication source * @throws IOException */ @Override public void init(Configuration conf, FileSystem fs, ReplicationSourceManager manager, ReplicationQueues replicationQueues, ReplicationPeers replicationPeers, Server server, String peerClusterZnode, UUID clusterId, ReplicationEndpoint replicationEndpoint, WALFileLengthProvider walFileLengthProvider, MetricsSource metrics) throws IOException { this.server = server; this.conf = HBaseConfiguration.create(conf); this.waitOnEndpointSeconds = this.conf.getInt(WAIT_ON_ENDPOINT_SECONDS, DEFAULT_WAIT_ON_ENDPOINT_SECONDS); decorateConf(); this.sleepForRetries = this.conf.getLong("replication.source.sleepforretries", 1000); // 1 second this.maxRetriesMultiplier = this.conf.getInt("replication.source.maxretriesmultiplier", 300); // 5 minutes @ 1 sec per this.queueSizePerGroup = this.conf.getInt("hbase.regionserver.maxlogs", 32); this.replicationQueues = replicationQueues; this.replicationPeers = replicationPeers; this.manager = manager; this.fs = fs; this.metrics = metrics; this.clusterId = clusterId; this.peerClusterZnode = peerClusterZnode; this.replicationQueueInfo = new ReplicationQueueInfo(peerClusterZnode); // ReplicationQueueInfo parses the peerId out of the znode for us this.peerId = this.replicationQueueInfo.getPeerId(); this.logQueueWarnThreshold = this.conf.getInt("replication.source.log.queue.warn", 2); this.replicationEndpoint = replicationEndpoint; defaultBandwidth = this.conf.getLong("replication.source.per.peer.node.bandwidth", 0); currentBandwidth = getCurrentBandwidth(); this.throttler = new ReplicationThrottler((double) currentBandwidth / 10.0); this.totalBufferUsed = manager.getTotalBufferUsed(); this.walFileLengthProvider = walFileLengthProvider; LOG.info("peerClusterZnode=" + peerClusterZnode + ", ReplicationSource : " + peerId + ", currentBandwidth=" + this.currentBandwidth); } private void decorateConf() { String replicationCodec = this.conf.get(HConstants.REPLICATION_CODEC_CONF_KEY); if (StringUtils.isNotEmpty(replicationCodec)) { this.conf.set(HConstants.RPC_CODEC_CONF_KEY, replicationCodec); } } @Override public void enqueueLog(Path log) { String logPrefix = AbstractFSWALProvider.getWALPrefixFromWALName(log.getName()); PriorityBlockingQueue queue = queues.get(logPrefix); if (queue == null) { queue = new PriorityBlockingQueue<>(queueSizePerGroup, new LogsComparator()); queues.put(logPrefix, queue); if (this.sourceRunning) { // new wal group observed after source startup, start a new worker thread to track it // notice: it's possible that log enqueued when this.running is set but worker thread // still not launched, so it's necessary to check workerThreads before start the worker tryStartNewShipper(logPrefix, queue); } } queue.put(log); this.metrics.incrSizeOfLogQueue(); // This will log a warning for each new log that gets created above the warn threshold int queueSize = queue.size(); if (queueSize > this.logQueueWarnThreshold) { LOG.warn("WAL group " + logPrefix + " queue size: " + queueSize + " exceeds value of replication.source.log.queue.warn: " + logQueueWarnThreshold); } } @Override public void addHFileRefs(TableName tableName, byte[] family, List> pairs) throws ReplicationException { String peerId = peerClusterZnode; if (peerId.contains("-")) { // peerClusterZnode will be in the form peerId + "-" + rsZNode. // A peerId will not have "-" in its name, see HBASE-11394 peerId = peerClusterZnode.split("-")[0]; } Map> tableCFMap = replicationPeers.getConnectedPeer(peerId).getTableCFs(); if (tableCFMap != null) { List tableCfs = tableCFMap.get(tableName); if (tableCFMap.containsKey(tableName) && (tableCfs == null || tableCfs.contains(Bytes.toString(family)))) { this.replicationQueues.addHFileRefs(peerId, pairs); metrics.incrSizeOfHFileRefsQueue(pairs.size()); } else { LOG.debug("HFiles will not be replicated belonging to the table " + tableName + " family " + Bytes.toString(family) + " to peer id " + peerId); } } else { // user has explicitly not defined any table cfs for replication, means replicate all the // data this.replicationQueues.addHFileRefs(peerId, pairs); metrics.incrSizeOfHFileRefsQueue(pairs.size()); } } @Override public void run() { // mark we are running now this.sourceRunning = true; try { // start the endpoint, connect to the cluster this.replicationEndpoint.start(); this.replicationEndpoint.awaitRunning(this.waitOnEndpointSeconds, TimeUnit.SECONDS); } catch (Exception ex) { LOG.warn("Error starting ReplicationEndpoint, exiting", ex); uninitialize(); throw new RuntimeException(ex); } int sleepMultiplier = 1; // delay this until we are in an asynchronous thread while (this.isSourceActive() && this.peerClusterId == null) { this.peerClusterId = replicationEndpoint.getPeerUUID(); if (this.isSourceActive() && this.peerClusterId == null) { if (sleepForRetries("Cannot contact the peer's zk ensemble", sleepMultiplier)) { sleepMultiplier++; } } } // In rare case, zookeeper setting may be messed up. That leads to the incorrect // peerClusterId value, which is the same as the source clusterId if (clusterId.equals(peerClusterId) && !replicationEndpoint.canReplicateToSameCluster()) { this.terminate("ClusterId " + clusterId + " is replicating to itself: peerClusterId " + peerClusterId + " which is not allowed by ReplicationEndpoint:" + replicationEndpoint.getClass().getName(), null, false); this.manager.closeQueue(this); return; } LOG.info("Replicating " + clusterId + " -> " + peerClusterId); initializeWALEntryFilter(); // start workers for (Map.Entry> entry : queues.entrySet()) { String walGroupId = entry.getKey(); PriorityBlockingQueue queue = entry.getValue(); tryStartNewShipper(walGroupId, queue); } } private void initializeWALEntryFilter() { // get the WALEntryFilter from ReplicationEndpoint and add it to default filters ArrayList filters = Lists.newArrayList( (WALEntryFilter)new SystemTableWALEntryFilter()); WALEntryFilter filterFromEndpoint = this.replicationEndpoint.getWALEntryfilter(); if (filterFromEndpoint != null) { filters.add(filterFromEndpoint); } filters.add(new ClusterMarkingEntryFilter(clusterId, peerClusterId, replicationEndpoint)); this.walEntryFilter = new ChainWALEntryFilter(filters); } protected void tryStartNewShipper(String walGroupId, PriorityBlockingQueue queue) { final ReplicationSourceShipper worker = new ReplicationSourceShipper(conf, walGroupId, queue, this); ReplicationSourceShipper extant = workerThreads.putIfAbsent(walGroupId, worker); if (extant != null) { LOG.debug("Someone has beat us to start a worker thread for wal group " + walGroupId); } else { LOG.debug("Starting up worker for wal group " + walGroupId); worker.startup(getUncaughtExceptionHandler()); worker.setWALReader(startNewWALReader(worker.getName(), walGroupId, queue, worker.getStartPosition())); workerThreads.put(walGroupId, worker); } } protected ReplicationSourceWALReader startNewWALReader(String threadName, String walGroupId, PriorityBlockingQueue queue, long startPosition) { ReplicationSourceWALReader walReader = new ReplicationSourceWALReader(fs, conf, queue, startPosition, walEntryFilter, this); return (ReplicationSourceWALReader) Threads.setDaemonThreadRunning(walReader, threadName + ".replicationSource.wal-reader." + walGroupId + "," + peerClusterZnode, getUncaughtExceptionHandler()); } public Thread.UncaughtExceptionHandler getUncaughtExceptionHandler() { return new Thread.UncaughtExceptionHandler() { @Override public void uncaughtException(final Thread t, final Throwable e) { RSRpcServices.exitIfOOME(e); LOG.error("Unexpected exception in " + t.getName() + " currentPath=" + getCurrentPath(), e); server.stop("Unexpected exception in " + t.getName()); } }; } @Override public ReplicationEndpoint getReplicationEndpoint() { return this.replicationEndpoint; } @Override public ReplicationSourceManager getSourceManager() { return this.manager; } @Override public void tryThrottle(int batchSize) throws InterruptedException { checkBandwidthChangeAndResetThrottler(); if (throttler.isEnabled()) { long sleepTicks = throttler.getNextSleepInterval(batchSize); if (sleepTicks > 0) { if (LOG.isTraceEnabled()) { LOG.trace("To sleep " + sleepTicks + "ms for throttling control"); } Thread.sleep(sleepTicks); // reset throttler's cycle start tick when sleep for throttling occurs throttler.resetStartTick(); } } } private void checkBandwidthChangeAndResetThrottler() { long peerBandwidth = getCurrentBandwidth(); if (peerBandwidth != currentBandwidth) { currentBandwidth = peerBandwidth; throttler.setBandwidth((double) currentBandwidth / 10.0); LOG.info("ReplicationSource : " + peerId + " bandwidth throttling changed, currentBandWidth=" + currentBandwidth); } } private long getCurrentBandwidth() { ReplicationPeer replicationPeer = this.replicationPeers.getConnectedPeer(peerId); long peerBandwidth = replicationPeer != null ? replicationPeer.getPeerBandwidth() : 0; // user can set peer bandwidth to 0 to use default bandwidth return peerBandwidth != 0 ? peerBandwidth : defaultBandwidth; } private void uninitialize() { LOG.debug("Source exiting " + this.peerId); metrics.clear(); if (this.replicationEndpoint.isRunning() || this.replicationEndpoint.isStarting()) { this.replicationEndpoint.stop(); try { this.replicationEndpoint.awaitTerminated(this.waitOnEndpointSeconds, TimeUnit.SECONDS); } catch (TimeoutException e) { LOG.warn("Failed termination after " + this.waitOnEndpointSeconds + " seconds."); } } } /** * Do the sleeping logic * @param msg Why we sleep * @param sleepMultiplier by how many times the default sleeping time is augmented * @return True if sleepMultiplier is < maxRetriesMultiplier */ protected boolean sleepForRetries(String msg, int sleepMultiplier) { try { if (LOG.isTraceEnabled()) { LOG.trace(msg + ", sleeping " + sleepForRetries + " times " + sleepMultiplier); } Thread.sleep(this.sleepForRetries * sleepMultiplier); } catch (InterruptedException e) { LOG.debug("Interrupted while sleeping between retries"); Thread.currentThread().interrupt(); } return sleepMultiplier < maxRetriesMultiplier; } /** * check whether the peer is enabled or not * * @return true if the peer is enabled, otherwise false */ @Override public boolean isPeerEnabled() { return this.replicationPeers.getStatusOfPeer(this.peerId); } @Override public void startup() { String n = Thread.currentThread().getName(); Thread.UncaughtExceptionHandler handler = new Thread.UncaughtExceptionHandler() { @Override public void uncaughtException(final Thread t, final Throwable e) { LOG.error("Unexpected exception in ReplicationSource", e); } }; Threads .setDaemonThreadRunning(this, n + ".replicationSource," + this.peerClusterZnode, handler); } @Override public void terminate(String reason) { terminate(reason, null); } @Override public void terminate(String reason, Exception cause) { terminate(reason, cause, true); } public void terminate(String reason, Exception cause, boolean join) { if (cause == null) { LOG.info("Closing source " + this.peerClusterZnode + " because: " + reason); } else { LOG.error("Closing source " + this.peerClusterZnode + " because an error occurred: " + reason, cause); } this.sourceRunning = false; Collection workers = workerThreads.values(); for (ReplicationSourceShipper worker : workers) { worker.stopWorker(); worker.entryReader.interrupt(); worker.interrupt(); } if (this.replicationEndpoint != null) { this.replicationEndpoint.stop(); } if (join) { for (ReplicationSourceShipper worker : workers) { Threads.shutdown(worker, this.sleepForRetries); LOG.info("ReplicationSourceWorker " + worker.getName() + " terminated"); } if (this.replicationEndpoint != null) { try { this.replicationEndpoint .awaitTerminated(sleepForRetries * maxRetriesMultiplier, TimeUnit.MILLISECONDS); } catch (TimeoutException te) { LOG.warn("Got exception while waiting for endpoint to shutdown for replication source :" + this.peerClusterZnode, te); } } } } @Override public String getPeerClusterZnode() { return this.peerClusterZnode; } @Override public String getPeerId() { return this.peerId; } @Override public Path getCurrentPath() { // only for testing for (ReplicationSourceShipper worker : workerThreads.values()) { if (worker.getCurrentPath() != null) { return worker.getCurrentPath(); } } return null; } @Override public boolean isSourceActive() { return !this.server.isStopped() && this.sourceRunning; } /** * Comparator used to compare logs together based on their start time */ public static class LogsComparator implements Comparator { @Override public int compare(Path o1, Path o2) { return Long.compare(getTS(o1), getTS(o2)); } /** * Split a path to get the start time * For example: 10.20.20.171%3A60020.1277499063250 * @param p path to split * @return start time */ private static long getTS(Path p) { int tsIndex = p.getName().lastIndexOf('.') + 1; return Long.parseLong(p.getName().substring(tsIndex)); } } @Override public String getStats() { StringBuilder sb = new StringBuilder(); sb.append("Total replicated edits: ").append(totalReplicatedEdits) .append(", current progress: \n"); for (Map.Entry entry : workerThreads.entrySet()) { String walGroupId = entry.getKey(); ReplicationSourceShipper worker = entry.getValue(); long position = worker.getCurrentPosition(); Path currentPath = worker.getCurrentPath(); sb.append("walGroup [").append(walGroupId).append("]: "); if (currentPath != null) { sb.append("currently replicating from: ").append(currentPath).append(" at position: ") .append(position).append("\n"); } else { sb.append("no replication ongoing, waiting for new log"); } } return sb.toString(); } @Override public MetricsSource getSourceMetrics() { return this.metrics; } @Override public void postShipEdits(List entries, int batchSize) { if (throttler.isEnabled()) { throttler.addPushSize(batchSize); } totalReplicatedEdits.addAndGet(entries.size()); totalBufferUsed.addAndGet(-batchSize); } @Override public WALFileLengthProvider getWALFileLengthProvider() { return walFileLengthProvider; } @Override public ServerName getServerWALsBelongTo() { return server.getServerName(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy