org.apache.hadoop.hbase.replication.regionserver.ReplicationSourceWALReaderThread Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of hbase-server Show documentation
Show all versions of hbase-server Show documentation
Server functionality for HBase
/**
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.replication.regionserver;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.PriorityBlockingQueue;
import java.util.concurrent.atomic.AtomicLong;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor;
import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor;
import org.apache.hadoop.hbase.regionserver.RSRpcServices;
import org.apache.hadoop.hbase.regionserver.wal.WALEdit;
import org.apache.hadoop.hbase.replication.ReplicationQueueInfo;
import org.apache.hadoop.hbase.replication.WALEntryFilter;
import org.apache.hadoop.hbase.replication.regionserver.WALEntryStream.WALEntryStreamRuntimeException;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.Threads;
import org.apache.hadoop.hbase.wal.WAL.Entry;
/**
* Reads and filters WAL entries, groups the filtered entries into batches, and puts the batches onto a queue
*
*/
@InterfaceAudience.Private
@InterfaceStability.Evolving
public class ReplicationSourceWALReaderThread extends Thread {
private static final Log LOG = LogFactory.getLog(ReplicationSourceWALReaderThread.class);
private PriorityBlockingQueue logQueue;
private FileSystem fs;
private Configuration conf;
private BlockingQueue entryBatchQueue;
// max (heap) size of each batch - multiply by number of batches in queue to get total
private long replicationBatchSizeCapacity;
// max count of each batch - multiply by number of batches in queue to get total
private int replicationBatchCountCapacity;
// position in the WAL to start reading at
private long currentPosition;
private WALEntryFilter filter;
private long sleepForRetries;
//Indicates whether this particular worker is running
private boolean isReaderRunning = true;
private ReplicationQueueInfo replicationQueueInfo;
private int maxRetriesMultiplier;
private MetricsSource metrics;
private AtomicLong totalBufferUsed;
private long totalBufferQuota;
/**
* Creates a reader worker for a given WAL queue. Reads WAL entries off a given queue, batches the
* entries, and puts them on a batch queue.
* @param manager replication manager
* @param replicationQueueInfo
* @param logQueue The WAL queue to read off of
* @param startPosition position in the first WAL to start reading from
* @param fs the files system to use
* @param conf configuration to use
* @param filter The filter to use while reading
* @param metrics replication metrics
*/
public ReplicationSourceWALReaderThread(ReplicationSourceManager manager,
ReplicationQueueInfo replicationQueueInfo, PriorityBlockingQueue logQueue,
long startPosition,
FileSystem fs, Configuration conf, WALEntryFilter filter, MetricsSource metrics) {
this.replicationQueueInfo = replicationQueueInfo;
this.logQueue = logQueue;
this.currentPosition = startPosition;
this.fs = fs;
this.conf = conf;
this.filter = filter;
this.replicationBatchSizeCapacity =
this.conf.getLong("replication.source.size.capacity", 1024 * 1024 * 64);
this.replicationBatchCountCapacity = this.conf.getInt("replication.source.nb.capacity", 25000);
// memory used will be batchSizeCapacity * (nb.batches + 1)
// the +1 is for the current thread reading before placing onto the queue
int batchCount = conf.getInt("replication.source.nb.batches", 1);
this.totalBufferUsed = manager.getTotalBufferUsed();
this.totalBufferQuota = conf.getLong(HConstants.REPLICATION_SOURCE_TOTAL_BUFFER_KEY,
HConstants.REPLICATION_SOURCE_TOTAL_BUFFER_DFAULT);
this.sleepForRetries =
this.conf.getLong("replication.source.sleepforretries", 1000); // 1 second
this.maxRetriesMultiplier =
this.conf.getInt("replication.source.maxretriesmultiplier", 300); // 5 minutes @ 1 sec per
this.metrics = metrics;
this.entryBatchQueue = new LinkedBlockingQueue<>(batchCount);
LOG.info("peerClusterZnode=" + replicationQueueInfo.getPeerClusterZnode()
+ ", ReplicationSourceWALReaderThread : " + replicationQueueInfo.getPeerId()
+ " inited, replicationBatchSizeCapacity=" + replicationBatchSizeCapacity
+ ", replicationBatchCountCapacity=" + replicationBatchCountCapacity
+ ", replicationBatchQueueCapacity=" + batchCount);
}
@Override
public void run() {
int sleepMultiplier = 1;
while (isReaderRunning()) { // we only loop back here if something fatal happened to our stream
try (WALEntryStream entryStream =
new WALEntryStream(logQueue, fs, conf, currentPosition, metrics)) {
while (isReaderRunning()) { // loop here to keep reusing stream while we can
if (!checkQuota()) {
continue;
}
WALEntryBatch batch = null;
while (entryStream.hasNext()) {
if (batch == null) {
batch = new WALEntryBatch(replicationBatchCountCapacity, entryStream.getCurrentPath());
}
Entry entry = entryStream.next();
if (updateSerialReplPos(batch, entry)) {
batch.lastWalPosition = entryStream.getPosition();
break;
}
entry = filterEntry(entry);
if (entry != null) {
WALEdit edit = entry.getEdit();
if (edit != null && !edit.isEmpty()) {
long entrySize = getEntrySize(entry);
batch.addEntry(entry);
updateBatchStats(batch, entry, entryStream.getPosition(), entrySize);
boolean totalBufferTooLarge = acquireBufferQuota(entrySize);
// Stop if too many entries or too big
if (totalBufferTooLarge || batch.getHeapSize() >= replicationBatchSizeCapacity
|| batch.getNbEntries() >= replicationBatchCountCapacity) {
break;
}
}
}
}
if (batch != null && (!batch.getLastSeqIds().isEmpty() || batch.getNbEntries() > 0)) {
if (LOG.isTraceEnabled()) {
LOG.trace(String.format("Read %s WAL entries eligible for replication",
batch.getNbEntries()));
}
entryBatchQueue.put(batch);
sleepMultiplier = 1;
} else { // got no entries and didn't advance position in WAL
LOG.trace("Didn't read any new entries from WAL");
if (replicationQueueInfo.isQueueRecovered()) {
// we're done with queue recovery, shut ourself down
setReaderRunning(false);
// shuts down shipper thread immediately
entryBatchQueue.put(batch != null ? batch
: new WALEntryBatch(replicationBatchCountCapacity, entryStream.getCurrentPath()));
} else {
Thread.sleep(sleepForRetries);
}
}
currentPosition = entryStream.getPosition();
entryStream.reset(); // reuse stream
}
} catch (IOException | WALEntryStreamRuntimeException e) { // stream related
if (sleepMultiplier < maxRetriesMultiplier) {
LOG.debug("Failed to read stream of replication entries: " + e);
sleepMultiplier++;
} else {
LOG.error("Failed to read stream of replication entries", e);
}
Threads.sleep(sleepForRetries * sleepMultiplier);
} catch (InterruptedException e) {
LOG.trace("Interrupted while sleeping between WAL reads");
Thread.currentThread().interrupt();
}
}
}
//returns false if we've already exceeded the global quota
private boolean checkQuota() {
// try not to go over total quota
if (totalBufferUsed.get() > totalBufferQuota) {
Threads.sleep(sleepForRetries);
return false;
}
return true;
}
private Entry filterEntry(Entry entry) {
Entry filtered = filter.filter(entry);
if (entry != null && filtered == null) {
metrics.incrLogEditsFiltered();
}
return filtered;
}
/**
* @return true if we should stop reading because we're at REGION_CLOSE
*/
private boolean updateSerialReplPos(WALEntryBatch batch, Entry entry) throws IOException {
if (entry.hasSerialReplicationScope()) {
String key = Bytes.toString(entry.getKey().getEncodedRegionName());
batch.setLastPosition(key, entry.getKey().getSequenceId());
if (!entry.getEdit().getCells().isEmpty()) {
WALProtos.RegionEventDescriptor maybeEvent =
WALEdit.getRegionEventDescriptor(entry.getEdit().getCells().get(0));
if (maybeEvent != null && maybeEvent
.getEventType() == WALProtos.RegionEventDescriptor.EventType.REGION_CLOSE) {
// In serially replication, if we move a region to another RS and move it back, we may
// read logs crossing two sections. We should break at REGION_CLOSE and push the first
// section first in case of missing the middle section belonging to the other RS.
// In a worker thread, if we can push the first log of a region, we can push all logs
// in the same region without waiting until we read a close marker because next time
// we read logs in this region, it must be a new section and not adjacent with this
// region. Mark it negative.
batch.setLastPosition(key, -entry.getKey().getSequenceId());
return true;
}
}
}
return false;
}
/**
* Retrieves the next batch of WAL entries from the queue, waiting up to the specified time for a
* batch to become available
* @return A batch of entries, along with the position in the log after reading the batch
* @throws InterruptedException if interrupted while waiting
*/
public WALEntryBatch take() throws InterruptedException {
return entryBatchQueue.take();
}
private long getEntrySize(Entry entry) {
WALEdit edit = entry.getEdit();
return edit.heapSize() + calculateTotalSizeOfStoreFiles(edit);
}
private void updateBatchStats(WALEntryBatch batch, Entry entry, long entryPosition, long entrySize) {
WALEdit edit = entry.getEdit();
if (edit != null && !edit.isEmpty()) {
batch.incrementHeapSize(entrySize);
Pair nbRowsAndHFiles = countDistinctRowKeysAndHFiles(edit);
batch.incrementNbRowKeys(nbRowsAndHFiles.getFirst());
batch.incrementNbHFiles(nbRowsAndHFiles.getSecond());
}
batch.lastWalPosition = entryPosition;
}
/**
* Count the number of different row keys in the given edit because of mini-batching. We assume
* that there's at least one Cell in the WALEdit.
* @param edit edit to count row keys from
* @return number of different row keys and HFiles
*/
private Pair countDistinctRowKeysAndHFiles(WALEdit edit) {
List cells = edit.getCells();
int distinctRowKeys = 1;
int totalHFileEntries = 0;
Cell lastCell = cells.get(0);
int totalCells = edit.size();
for (int i = 0; i < totalCells; i++) {
// Count HFiles to be replicated
if (CellUtil.matchingQualifier(cells.get(i), WALEdit.BULK_LOAD)) {
try {
BulkLoadDescriptor bld = WALEdit.getBulkLoadDescriptor(cells.get(i));
List stores = bld.getStoresList();
int totalStores = stores.size();
for (int j = 0; j < totalStores; j++) {
totalHFileEntries += stores.get(j).getStoreFileList().size();
}
} catch (IOException e) {
LOG.error("Failed to deserialize bulk load entry from wal edit. "
+ "Then its hfiles count will not be added into metric.");
}
}
if (!CellUtil.matchingRows(cells.get(i), lastCell)) {
distinctRowKeys++;
}
lastCell = cells.get(i);
}
Pair result = new Pair<>(distinctRowKeys, totalHFileEntries);
return result;
}
/**
* Calculate the total size of all the store files
* @param edit edit to count row keys from
* @return the total size of the store files
*/
private int calculateTotalSizeOfStoreFiles(WALEdit edit) {
List cells = edit.getCells();
int totalStoreFilesSize = 0;
int totalCells = edit.size();
for (int i = 0; i < totalCells; i++) {
if (CellUtil.matchingQualifier(cells.get(i), WALEdit.BULK_LOAD)) {
try {
BulkLoadDescriptor bld = WALEdit.getBulkLoadDescriptor(cells.get(i));
List stores = bld.getStoresList();
int totalStores = stores.size();
for (int j = 0; j < totalStores; j++) {
totalStoreFilesSize += stores.get(j).getStoreFileSizeBytes();
}
} catch (IOException e) {
LOG.error("Failed to deserialize bulk load entry from wal edit. "
+ "Size of HFiles part of cell will not be considered in replication "
+ "request size calculation.",
e);
}
}
}
return totalStoreFilesSize;
}
/**
* @param size delta size for grown buffer
* @return true if we should clear buffer and push all
*/
private boolean acquireBufferQuota(long size) {
return totalBufferUsed.addAndGet(size) >= totalBufferQuota;
}
/**
* @return whether the reader thread is running
*/
public boolean isReaderRunning() {
return isReaderRunning && !isInterrupted();
}
/**
* @param readerRunning the readerRunning to set
*/
public void setReaderRunning(boolean readerRunning) {
this.isReaderRunning = readerRunning;
}
/**
* Holds a batch of WAL entries to replicate, along with some statistics
*
*/
static class WALEntryBatch {
private List walEntries;
// last WAL that was read
private Path lastWalPath;
// position in WAL of last entry in this batch
private long lastWalPosition = 0;
// number of distinct row keys in this batch
private int nbRowKeys = 0;
// number of HFiles
private int nbHFiles = 0;
// heap size of data we need to replicate
private long heapSize = 0;
// save the last sequenceid for each region if the table has serial-replication scope
private Map lastSeqIds = new HashMap<>();
/**
* @param walEntries
* @param lastWalPath Path of the WAL the last entry in this batch was read from
* @param lastWalPosition Position in the WAL the last entry in this batch was read from
*/
private WALEntryBatch(int maxNbEntries, Path lastWalPath) {
this.walEntries = new ArrayList<>(maxNbEntries);
this.lastWalPath = lastWalPath;
}
public void addEntry(Entry entry) {
walEntries.add(entry);
}
/**
* @return the WAL Entries.
*/
public List getWalEntries() {
return walEntries;
}
/**
* @return the path of the last WAL that was read.
*/
public Path getLastWalPath() {
return lastWalPath;
}
/**
* @return the position in the last WAL that was read.
*/
public long getLastWalPosition() {
return lastWalPosition;
}
public int getNbEntries() {
return walEntries.size();
}
/**
* @return the number of distinct row keys in this batch
*/
public int getNbRowKeys() {
return nbRowKeys;
}
/**
* @return the number of HFiles in this batch
*/
public int getNbHFiles() {
return nbHFiles;
}
/**
* @return total number of operations in this batch
*/
public int getNbOperations() {
return getNbRowKeys() + getNbHFiles();
}
/**
* @return the heap size of this batch
*/
public long getHeapSize() {
return heapSize;
}
/**
* @return the last sequenceid for each region if the table has serial-replication scope
*/
public Map getLastSeqIds() {
return lastSeqIds;
}
private void incrementNbRowKeys(int increment) {
nbRowKeys += increment;
}
private void incrementNbHFiles(int increment) {
nbHFiles += increment;
}
private void incrementHeapSize(long increment) {
heapSize += increment;
}
private void setLastPosition(String region, Long sequenceId) {
getLastSeqIds().put(region, sequenceId);
}
}
}
| |