All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.replication.regionserver.ReplicationSink Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.replication.regionserver;

import static org.apache.hadoop.hbase.replication.master.ReplicationSinkTrackerTableCreator.OFFSET_COLUMN;
import static org.apache.hadoop.hbase.replication.master.ReplicationSinkTrackerTableCreator.REPLICATION_SINK_TRACKER_ENABLED_DEFAULT;
import static org.apache.hadoop.hbase.replication.master.ReplicationSinkTrackerTableCreator.REPLICATION_SINK_TRACKER_ENABLED_KEY;
import static org.apache.hadoop.hbase.replication.master.ReplicationSinkTrackerTableCreator.REPLICATION_SINK_TRACKER_INFO_FAMILY;
import static org.apache.hadoop.hbase.replication.master.ReplicationSinkTrackerTableCreator.REPLICATION_SINK_TRACKER_TABLE_NAME;
import static org.apache.hadoop.hbase.replication.master.ReplicationSinkTrackerTableCreator.RS_COLUMN;
import static org.apache.hadoop.hbase.replication.master.ReplicationSinkTrackerTableCreator.TIMESTAMP_COLUMN;
import static org.apache.hadoop.hbase.replication.master.ReplicationSinkTrackerTableCreator.WAL_NAME_COLUMN;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;
import java.util.UUID;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import org.apache.commons.lang3.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.CellUtil;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.TableNotFoundException;
import org.apache.hadoop.hbase.client.AsyncClusterConnection;
import org.apache.hadoop.hbase.client.AsyncTable;
import org.apache.hadoop.hbase.client.ClusterConnectionFactory;
import org.apache.hadoop.hbase.client.Delete;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.RetriesExhaustedException;
import org.apache.hadoop.hbase.client.Row;
import org.apache.hadoop.hbase.regionserver.RegionServerCoprocessorHost;
import org.apache.hadoop.hbase.replication.ReplicationUtils;
import org.apache.hadoop.hbase.security.UserProvider;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.FutureUtils;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.wal.WALEdit;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.collect.Lists;

import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.WALEntry;
import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos;
import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.BulkLoadDescriptor;
import org.apache.hadoop.hbase.shaded.protobuf.generated.WALProtos.StoreDescriptor;

/**
 * 

* This class is responsible for replicating the edits coming from another cluster. *

*

* This replication process is currently waiting for the edits to be applied before the method can * return. This means that the replication of edits is synchronized (after reading from WALs in * ReplicationSource) and that a single region server cannot receive edits from two sources at the * same time *

*

* This class uses the native HBase client in order to replicate entries. *

* TODO make this class more like ReplicationSource wrt log handling */ @InterfaceAudience.Private public class ReplicationSink { private static final Logger LOG = LoggerFactory.getLogger(ReplicationSink.class); private final Configuration conf; // Volatile because of note in here -- look for double-checked locking: // http://www.oracle.com/technetwork/articles/javase/bloch-effective-08-qa-140880.html private volatile AsyncClusterConnection sharedConn; private final MetricsSink metrics; private final AtomicLong totalReplicatedEdits = new AtomicLong(); private final Object sharedConnLock = new Object(); // Number of hfiles that we successfully replicated private long hfilesReplicated = 0; private SourceFSConfigurationProvider provider; private WALEntrySinkFilter walEntrySinkFilter; /** * Row size threshold for multi requests above which a warning is logged */ private final int rowSizeWarnThreshold; private boolean replicationSinkTrackerEnabled; private final RegionServerCoprocessorHost rsServerHost; /** * Create a sink for replication * @param conf conf object * @throws IOException thrown when HDFS goes bad or bad file name */ public ReplicationSink(Configuration conf, RegionServerCoprocessorHost rsServerHost) throws IOException { this.conf = HBaseConfiguration.create(conf); this.rsServerHost = rsServerHost; rowSizeWarnThreshold = conf.getInt(HConstants.BATCH_ROWS_THRESHOLD_NAME, HConstants.BATCH_ROWS_THRESHOLD_DEFAULT); replicationSinkTrackerEnabled = conf.getBoolean(REPLICATION_SINK_TRACKER_ENABLED_KEY, REPLICATION_SINK_TRACKER_ENABLED_DEFAULT); decorateConf(); this.metrics = new MetricsSink(); this.walEntrySinkFilter = setupWALEntrySinkFilter(); String className = conf.get("hbase.replication.source.fs.conf.provider", DefaultSourceFSConfigurationProvider.class.getCanonicalName()); try { Class c = Class.forName(className).asSubclass(SourceFSConfigurationProvider.class); this.provider = c.getDeclaredConstructor().newInstance(); } catch (Exception e) { throw new IllegalArgumentException( "Configured source fs configuration provider class " + className + " throws error.", e); } } private WALEntrySinkFilter setupWALEntrySinkFilter() throws IOException { Class walEntryFilterClass = this.conf.getClass(WALEntrySinkFilter.WAL_ENTRY_FILTER_KEY, null); WALEntrySinkFilter filter = null; try { filter = walEntryFilterClass == null ? null : (WALEntrySinkFilter) walEntryFilterClass.getDeclaredConstructor().newInstance(); } catch (Exception e) { LOG.warn("Failed to instantiate " + walEntryFilterClass); } if (filter != null) { filter.init(getConnection()); } return filter; } /** * decorate the Configuration object to make replication more receptive to delays: lessen the * timeout and numTries. */ private void decorateConf() { this.conf.setInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, this.conf.getInt("replication.sink.client.retries.number", 4)); this.conf.setInt(HConstants.HBASE_CLIENT_OPERATION_TIMEOUT, this.conf.getInt("replication.sink.client.ops.timeout", 10000)); String replicationCodec = this.conf.get(HConstants.REPLICATION_CODEC_CONF_KEY); if (StringUtils.isNotEmpty(replicationCodec)) { this.conf.set(HConstants.RPC_CODEC_CONF_KEY, replicationCodec); } // use server ZK cluster for replication, so we unset the client ZK related properties if any if (this.conf.get(HConstants.CLIENT_ZOOKEEPER_QUORUM) != null) { this.conf.unset(HConstants.CLIENT_ZOOKEEPER_QUORUM); } } /** * Replicate this array of entries directly into the local cluster using the native client. Only * operates against raw protobuf type saving on a conversion from pb to pojo. * @param entries WAL entries to be replicated. * @param cells cell scanner for iteration. * @param replicationClusterId Id which will uniquely identify source cluster FS client * configurations in the replication configuration directory * @param sourceBaseNamespaceDirPath Path that point to the source cluster base namespace * directory * @param sourceHFileArchiveDirPath Path that point to the source cluster hfile archive directory * @throws IOException If failed to replicate the data */ public void replicateEntries(List entries, final CellScanner cells, String replicationClusterId, String sourceBaseNamespaceDirPath, String sourceHFileArchiveDirPath) throws IOException { if (entries.isEmpty()) { return; } // Very simple optimization where we batch sequences of rows going // to the same table. try { long totalReplicated = 0; // Map of table => list of Rows, grouped by cluster id, we only want to flushCommits once per // invocation of this method per table and cluster id. Map, List>> rowMap = new TreeMap<>(); Map, Map>>>> bulkLoadsPerClusters = null; Pair, List> mutationsToWalEntriesPairs = new Pair<>(new ArrayList<>(), new ArrayList<>()); for (WALEntry entry : entries) { TableName table = TableName.valueOf(entry.getKey().getTableName().toByteArray()); if (this.walEntrySinkFilter != null) { if (this.walEntrySinkFilter.filter(table, entry.getKey().getWriteTime())) { // Skip Cells in CellScanner associated with this entry. int count = entry.getAssociatedCellCount(); for (int i = 0; i < count; i++) { // Throw index out of bounds if our cell count is off if (!cells.advance()) { this.metrics.incrementFailedBatches(); throw new ArrayIndexOutOfBoundsException("Expected=" + count + ", index=" + i); } } continue; } } Cell previousCell = null; Mutation mutation = null; int count = entry.getAssociatedCellCount(); for (int i = 0; i < count; i++) { // Throw index out of bounds if our cell count is off if (!cells.advance()) { this.metrics.incrementFailedBatches(); throw new ArrayIndexOutOfBoundsException("Expected=" + count + ", index=" + i); } Cell cell = cells.current(); // Handle bulk load hfiles replication if (CellUtil.matchingQualifier(cell, WALEdit.BULK_LOAD)) { BulkLoadDescriptor bld = WALEdit.getBulkLoadDescriptor(cell); if (bld.getReplicate()) { if (bulkLoadsPerClusters == null) { bulkLoadsPerClusters = new HashMap<>(); } // Map of table name Vs list of pair of family and list of // hfile paths from its namespace Map>>> bulkLoadHFileMap = bulkLoadsPerClusters.computeIfAbsent(bld.getClusterIdsList(), k -> new HashMap<>()); buildBulkLoadHFileMap(bulkLoadHFileMap, table, bld); } } else if (CellUtil.matchingQualifier(cell, WALEdit.REPLICATION_MARKER)) { Mutation put = processReplicationMarkerEntry(cell); if (put == null) { continue; } table = REPLICATION_SINK_TRACKER_TABLE_NAME; List clusterIds = new ArrayList<>(); for (HBaseProtos.UUID clusterId : entry.getKey().getClusterIdsList()) { clusterIds.add(toUUID(clusterId)); } put.setClusterIds(clusterIds); addToHashMultiMap(rowMap, table, clusterIds, put); } else { // Handle wal replication if (isNewRowOrType(previousCell, cell)) { // Create new mutation mutation = CellUtil.isDelete(cell) ? new Delete(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()) : new Put(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()); List clusterIds = new ArrayList<>(entry.getKey().getClusterIdsList().size()); for (HBaseProtos.UUID clusterId : entry.getKey().getClusterIdsList()) { clusterIds.add(toUUID(clusterId)); } mutation.setClusterIds(clusterIds); mutation.setAttribute(ReplicationUtils.REPLICATION_ATTR_NAME, HConstants.EMPTY_BYTE_ARRAY); if (rsServerHost != null) { rsServerHost.preReplicationSinkBatchMutate(entry, mutation); mutationsToWalEntriesPairs.getFirst().add(mutation); mutationsToWalEntriesPairs.getSecond().add(entry); } addToHashMultiMap(rowMap, table, clusterIds, mutation); } if (CellUtil.isDelete(cell)) { ((Delete) mutation).add(cell); } else { ((Put) mutation).add(cell); } previousCell = cell; } } totalReplicated++; } // TODO Replicating mutations and bulk loaded data can be made parallel if (!rowMap.isEmpty()) { LOG.debug("Started replicating mutations."); for (Entry, List>> entry : rowMap.entrySet()) { batch(entry.getKey(), entry.getValue().values(), rowSizeWarnThreshold); } LOG.debug("Finished replicating mutations."); } if (rsServerHost != null) { List mutations = mutationsToWalEntriesPairs.getFirst(); List walEntries = mutationsToWalEntriesPairs.getSecond(); for (int i = 0; i < mutations.size(); i++) { rsServerHost.postReplicationSinkBatchMutate(walEntries.get(i), mutations.get(i)); } } if (bulkLoadsPerClusters != null) { for (Entry, Map>>>> entry : bulkLoadsPerClusters.entrySet()) { Map>>> bulkLoadHFileMap = entry.getValue(); if (bulkLoadHFileMap != null && !bulkLoadHFileMap.isEmpty()) { LOG.debug("Replicating {} bulk loaded data", entry.getKey().toString()); Configuration providerConf = this.provider.getConf(this.conf, replicationClusterId); try (HFileReplicator hFileReplicator = new HFileReplicator(providerConf, sourceBaseNamespaceDirPath, sourceHFileArchiveDirPath, bulkLoadHFileMap, conf, getConnection(), entry.getKey())) { hFileReplicator.replicate(); LOG.debug("Finished replicating {} bulk loaded data", entry.getKey().toString()); } } } } int size = entries.size(); this.metrics.setAgeOfLastAppliedOp(entries.get(size - 1).getKey().getWriteTime()); this.metrics.applyBatch(size + hfilesReplicated, hfilesReplicated); this.totalReplicatedEdits.addAndGet(totalReplicated); } catch (IOException ex) { LOG.error("Unable to accept edit because:", ex); this.metrics.incrementFailedBatches(); throw ex; } } /* * First check if config key hbase.regionserver.replication.sink.tracker.enabled is true or not. * If false, then ignore this cell. If set to true, de-serialize value into * ReplicationTrackerDescriptor. Create a Put mutation with regionserver name, walname, offset and * timestamp from ReplicationMarkerDescriptor. */ private Put processReplicationMarkerEntry(Cell cell) throws IOException { // If source is emitting replication marker rows but sink is not accepting them, // ignore the edits. if (!replicationSinkTrackerEnabled) { return null; } WALProtos.ReplicationMarkerDescriptor descriptor = WALProtos.ReplicationMarkerDescriptor.parseFrom(new ByteArrayInputStream(cell.getValueArray(), cell.getValueOffset(), cell.getValueLength())); Put put = new Put(cell.getRowArray(), cell.getRowOffset(), cell.getRowLength()); put.addColumn(REPLICATION_SINK_TRACKER_INFO_FAMILY, RS_COLUMN, cell.getTimestamp(), (Bytes.toBytes(descriptor.getRegionServerName()))); put.addColumn(REPLICATION_SINK_TRACKER_INFO_FAMILY, WAL_NAME_COLUMN, cell.getTimestamp(), Bytes.toBytes(descriptor.getWalName())); put.addColumn(REPLICATION_SINK_TRACKER_INFO_FAMILY, TIMESTAMP_COLUMN, cell.getTimestamp(), Bytes.toBytes(cell.getTimestamp())); put.addColumn(REPLICATION_SINK_TRACKER_INFO_FAMILY, OFFSET_COLUMN, cell.getTimestamp(), Bytes.toBytes(descriptor.getOffset())); return put; } private void buildBulkLoadHFileMap( final Map>>> bulkLoadHFileMap, TableName table, BulkLoadDescriptor bld) throws IOException { List storesList = bld.getStoresList(); int storesSize = storesList.size(); for (int j = 0; j < storesSize; j++) { StoreDescriptor storeDescriptor = storesList.get(j); List storeFileList = storeDescriptor.getStoreFileList(); int storeFilesSize = storeFileList.size(); hfilesReplicated += storeFilesSize; for (int k = 0; k < storeFilesSize; k++) { byte[] family = storeDescriptor.getFamilyName().toByteArray(); // Build hfile relative path from its namespace String pathToHfileFromNS = getHFilePath(table, bld, storeFileList.get(k), family); String tableName = table.getNameWithNamespaceInclAsString(); List>> familyHFilePathsList = bulkLoadHFileMap.get(tableName); if (familyHFilePathsList != null) { boolean foundFamily = false; for (Pair> familyHFilePathsPair : familyHFilePathsList) { if (Bytes.equals(familyHFilePathsPair.getFirst(), family)) { // Found family already present, just add the path to the existing list familyHFilePathsPair.getSecond().add(pathToHfileFromNS); foundFamily = true; break; } } if (!foundFamily) { // Family not found, add this family and its hfile paths pair to the list addFamilyAndItsHFilePathToTableInMap(family, pathToHfileFromNS, familyHFilePathsList); } } else { // Add this table entry into the map addNewTableEntryInMap(bulkLoadHFileMap, family, pathToHfileFromNS, tableName); } } } } private void addFamilyAndItsHFilePathToTableInMap(byte[] family, String pathToHfileFromNS, List>> familyHFilePathsList) { List hfilePaths = new ArrayList<>(1); hfilePaths.add(pathToHfileFromNS); familyHFilePathsList.add(new Pair<>(family, hfilePaths)); } private void addNewTableEntryInMap( final Map>>> bulkLoadHFileMap, byte[] family, String pathToHfileFromNS, String tableName) { List hfilePaths = new ArrayList<>(1); hfilePaths.add(pathToHfileFromNS); Pair> newFamilyHFilePathsPair = new Pair<>(family, hfilePaths); List>> newFamilyHFilePathsList = new ArrayList<>(); newFamilyHFilePathsList.add(newFamilyHFilePathsPair); bulkLoadHFileMap.put(tableName, newFamilyHFilePathsList); } private String getHFilePath(TableName table, BulkLoadDescriptor bld, String storeFile, byte[] family) { return new StringBuilder(100).append(table.getNamespaceAsString()).append(Path.SEPARATOR) .append(table.getQualifierAsString()).append(Path.SEPARATOR) .append(Bytes.toString(bld.getEncodedRegionName().toByteArray())).append(Path.SEPARATOR) .append(Bytes.toString(family)).append(Path.SEPARATOR).append(storeFile).toString(); } /** Returns True if we have crossed over onto a new row or type */ private boolean isNewRowOrType(final Cell previousCell, final Cell cell) { return previousCell == null || previousCell.getTypeByte() != cell.getTypeByte() || !CellUtil.matchingRows(previousCell, cell); } private java.util.UUID toUUID(final HBaseProtos.UUID uuid) { return new java.util.UUID(uuid.getMostSigBits(), uuid.getLeastSigBits()); } /** * Simple helper to a map from key to (a list of) values TODO: Make a general utility method * @return the list of values corresponding to key1 and key2 */ private List addToHashMultiMap(Map>> map, K1 key1, K2 key2, V value) { Map> innerMap = map.computeIfAbsent(key1, k -> new HashMap<>()); List values = innerMap.computeIfAbsent(key2, k -> new ArrayList<>()); values.add(value); return values; } /** * stop the thread pool executor. It is called when the regionserver is stopped. */ public void stopReplicationSinkServices() { try { if (this.sharedConn != null) { synchronized (sharedConnLock) { if (this.sharedConn != null) { this.sharedConn.close(); this.sharedConn = null; } } } } catch (IOException e) { LOG.warn("IOException while closing the connection", e); // ignoring as we are closing. } } /** * Do the changes and handle the pool * @param tableName table to insert into * @param allRows list of actions * @param batchRowSizeThreshold rowSize threshold for batch mutation */ private void batch(TableName tableName, Collection> allRows, int batchRowSizeThreshold) throws IOException { if (allRows.isEmpty()) { return; } AsyncTable table = getConnection().getTable(tableName); List> futures = new ArrayList<>(); for (List rows : allRows) { List> batchRows; if (rows.size() > batchRowSizeThreshold) { batchRows = Lists.partition(rows, batchRowSizeThreshold); } else { batchRows = Collections.singletonList(rows); } futures.addAll(batchRows.stream().map(table::batchAll).collect(Collectors.toList())); } for (Future future : futures) { try { FutureUtils.get(future); } catch (RetriesExhaustedException e) { if (e.getCause() instanceof TableNotFoundException) { throw new TableNotFoundException("'" + tableName + "'"); } throw e; } } } private AsyncClusterConnection getConnection() throws IOException { // See https://en.wikipedia.org/wiki/Double-checked_locking AsyncClusterConnection connection = sharedConn; if (connection == null) { synchronized (sharedConnLock) { connection = sharedConn; if (connection == null) { connection = ClusterConnectionFactory.createAsyncClusterConnection(conf, null, UserProvider.instantiate(conf).getCurrent()); sharedConn = connection; } } } return connection; } /** * Get a string representation of this sink's metrics * @return string with the total replicated edits count and the date of the last edit that was * applied */ public String getStats() { long total = this.totalReplicatedEdits.get(); return total == 0 ? "" : "Sink: " + "age in ms of last applied edit: " + this.metrics.refreshAgeOfLastAppliedOp() + ", total replicated edits: " + total; } /** * Get replication Sink Metrics */ public MetricsSink getSinkMetrics() { return this.metrics; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy