All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tephra.hbase.txprune.HBaseTransactionPruningPlugin Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.tephra.hbase.txprune;

import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.collect.Iterables;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.TableExistsException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Admin;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.tephra.TxConstants;
import org.apache.tephra.hbase.coprocessor.TransactionProcessor;
import org.apache.tephra.txprune.TransactionPruningPlugin;
import org.apache.tephra.util.TxUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.TreeSet;

/**
 * Default implementation of the {@link TransactionPruningPlugin} for HBase.
 *
 * This plugin determines the prune upper bound for transactional HBase tables that use
 * coprocessor {@link TransactionProcessor}.
 *
 * 

State storage:

* * This plugin expects the TransactionProcessor to save the prune upper bound for invalid transactions * after every major compaction of a region. Let's call this (region, prune upper bound). * In addition, the plugin also persists the following information on a run at time t *
    *
  • * (t, set of regions): Set of transactional regions at time t. * Transactional regions are regions of the tables that have the coprocessor TransactionProcessor * attached to them. *
  • *
  • * (t, inactive transaction bound): This is the smallest not in-progress transaction that * will not have writes in any HBase regions that are created after time t. * This value is determined by the Transaction Service based on the transaction state at time t * and passed on to the plugin. *
  • *
* *

Computing prune upper bound:

* * In a typical HBase instance, there can be a constant change in the number of regions due to region creations, * splits and merges. At any given time there can always be a region on which a major compaction has not been run. * Since the prune upper bound will get recorded for a region only after a major compaction, * using only the latest set of regions we may not be able to find the * prune upper bounds for all the current regions. Hence we persist the set of regions that exist at that time * of each run of the plugin, and use historical region set for time t, t - 1, etc. * to determine the prune upper bound. * * From the regions saved at time t, t - 1, etc., * the plugin tries to find the latest (t, set of regions) where all regions have been major compacted, * i.e, all regions have prune upper bound recorded in (region, prune upper bound). *
* If such a set is found for time t1, the prune upper bound returned by the plugin is the minimum of *
    *
  • Prune upper bounds of regions in set (t1, set of regions)
  • *
  • Inactive transaction bound from (t1, inactive transaction bound)
  • *
* *

* Above, when we find (t1, set of regions), there may a region that was created after time t1, * but has a data write from an invalid transaction that is smaller than the prune upper bounds of all * regions in (t1, set of regions). This is possible because (region, prune upper bound) persisted by * TransactionProcessor is always the latest prune upper bound for a region. *
* However a region created after time t1 cannot have writes from an invalid transaction that is smaller than * inactive transaction bound at the time the region was created. * Since we limit the plugin prune upper bound using (t1, inactive transaction bound), * there should be no invalid transactions smaller than the plugin prune upper bound with writes in any * transactional region of this HBase instance. * *

* Note: If your tables uses a transactional coprocessor other than TransactionProcessor, * then you may need to write a new plugin to compute prune upper bound for those tables. */ @SuppressWarnings("WeakerAccess") public class HBaseTransactionPruningPlugin implements TransactionPruningPlugin { public static final Logger LOG = LoggerFactory.getLogger(HBaseTransactionPruningPlugin.class); protected Configuration conf; protected Connection connection; protected DataJanitorState dataJanitorState; @Override public void initialize(Configuration conf) throws IOException { this.conf = conf; this.connection = ConnectionFactory.createConnection(conf); final TableName stateTable = TableName.valueOf(conf.get(TxConstants.TransactionPruning.PRUNE_STATE_TABLE, TxConstants.TransactionPruning.DEFAULT_PRUNE_STATE_TABLE)); LOG.info("Initializing plugin with state table {}", stateTable.getNameWithNamespaceInclAsString()); createPruneTable(stateTable); this.dataJanitorState = new DataJanitorState(new DataJanitorState.TableSupplier() { @Override public Table get() throws IOException { return connection.getTable(stateTable); } }); } /** * Determines prune upper bound for the data store as mentioned above. */ @Override public long fetchPruneUpperBound(long time, long inactiveTransactionBound) throws IOException { LOG.debug("Fetching prune upper bound for time {} and inactive transaction bound {}", time, inactiveTransactionBound); if (time < 0 || inactiveTransactionBound < 0) { return -1; } // Get all the current transactional regions SortedSet transactionalRegions = getTransactionalRegions(); if (!transactionalRegions.isEmpty()) { LOG.debug("Saving {} transactional regions for time {}", transactionalRegions.size(), time); dataJanitorState.saveRegionsForTime(time, transactionalRegions); // Save inactive transaction bound for time as the final step. // We can then use its existence to make sure that the data for a given time is complete or not LOG.debug("Saving inactive transaction bound {} for time {}", inactiveTransactionBound, time); dataJanitorState.saveInactiveTransactionBoundForTime(time, inactiveTransactionBound); } return computePruneUpperBound(new TimeRegions(time, transactionalRegions)); } /** * After invalid list has been pruned, this cleans up state information that is no longer required. * This includes - *

    *
  • * (region, prune upper bound) - prune upper bound for regions that are older * than maxPrunedInvalid *
  • *
  • * (t, set of regions) - Regions set that were recorded on or before the start time * of maxPrunedInvalid *
  • *
  • * (t, inactive transaction bound) - Smallest not in-progress transaction without any writes in new regions * information recorded on or before the start time of maxPrunedInvalid *
  • *
*/ @Override public void pruneComplete(long time, long maxPrunedInvalid) throws IOException { LOG.debug("Prune complete for time {} and prune upper bound {}", time, maxPrunedInvalid); if (time < 0 || maxPrunedInvalid < 0) { return; } // Get regions for the current time, so as to not delete the prune upper bounds for them. // The prune upper bounds for regions are recorded by TransactionProcessor and the deletion // is done by this class. To avoid update/delete race condition, we only delete prune upper // bounds for the stale regions. TimeRegions regionsToExclude = dataJanitorState.getRegionsOnOrBeforeTime(time); if (regionsToExclude != null) { LOG.debug("Deleting prune upper bounds smaller than {} for stale regions", maxPrunedInvalid); dataJanitorState.deletePruneUpperBounds(maxPrunedInvalid, regionsToExclude.getRegions()); } else { LOG.warn("Cannot find saved regions on or before time {}", time); } long pruneTime = TxUtils.getTimestamp(maxPrunedInvalid); LOG.debug("Deleting regions recorded before time {}", pruneTime); dataJanitorState.deleteAllRegionsOnOrBeforeTime(pruneTime); LOG.debug("Deleting inactive transaction bounds recorded on or before time {}", pruneTime); dataJanitorState.deleteInactiveTransactionBoundsOnOrBeforeTime(pruneTime); LOG.debug("Deleting empty regions recorded on or before time {}", pruneTime); dataJanitorState.deleteEmptyRegionsOnOrBeforeTime(pruneTime); } @Override public void destroy() { LOG.info("Stopping plugin..."); try { connection.close(); } catch (IOException e) { LOG.error("Got exception while closing HBase connection", e); } } /** * Create the prune state table given the {@link TableName} if the table doesn't exist already. * * @param stateTable prune state table name */ protected void createPruneTable(TableName stateTable) throws IOException { try (Admin admin = this.connection.getAdmin()) { if (admin.tableExists(stateTable)) { LOG.debug("Not creating pruneStateTable {} since it already exists.", stateTable.getNameWithNamespaceInclAsString()); return; } HTableDescriptor htd = new HTableDescriptor(stateTable); htd.addFamily(new HColumnDescriptor(DataJanitorState.FAMILY).setMaxVersions(1)); admin.createTable(htd); LOG.info("Created pruneTable {}", stateTable.getNameWithNamespaceInclAsString()); } catch (TableExistsException ex) { // Expected if the prune state table is being created at the same time by another client LOG.debug("Not creating pruneStateTable {} since it already exists.", stateTable.getNameWithNamespaceInclAsString(), ex); } } /** * Returns whether the table is a transactional table. By default, it is a table is identified as a transactional * table if it has a the coprocessor {@link TransactionProcessor} attached to it. Should be overriden if the users * attach a different coprocessor. * * @param tableDescriptor {@link HTableDescriptor} of the table * @return true if the table is transactional */ protected boolean isTransactionalTable(HTableDescriptor tableDescriptor) { return tableDescriptor.hasCoprocessor(TransactionProcessor.class.getName()); } protected SortedSet getTransactionalRegions() throws IOException { SortedSet regions = new TreeSet<>(Bytes.BYTES_COMPARATOR); try (Admin admin = connection.getAdmin()) { HTableDescriptor[] tableDescriptors = admin.listTables(); LOG.debug("Got {} tables to process", tableDescriptors == null ? 0 : tableDescriptors.length); if (tableDescriptors != null) { for (HTableDescriptor tableDescriptor : tableDescriptors) { if (isTransactionalTable(tableDescriptor)) { List tableRegions = admin.getTableRegions(tableDescriptor.getTableName()); LOG.debug("Regions for table {}: {}", tableDescriptor.getTableName(), tableRegions); if (tableRegions != null) { for (HRegionInfo region : tableRegions) { regions.add(region.getRegionName()); } } } else { LOG.debug("{} is not a transactional table", tableDescriptor.getTableName()); } } } } return regions; } /** * Try to find the latest set of regions in which all regions have been major compacted, and * compute prune upper bound from them. Starting from newest to oldest, this looks into the * region set that has been saved periodically, and joins it with the prune upper bound data * for a region recorded after a major compaction. * * @param timeRegions the latest set of regions * @return prune upper bound * @throws IOException when not able to talk to HBase */ private long computePruneUpperBound(TimeRegions timeRegions) throws IOException { // Get the tables for the current time from the latest regions set final Set existingTables = getTableNamesForRegions(timeRegions.getRegions()); LOG.debug("Tables for time {} = {}", timeRegions.getTime(), existingTables); do { LOG.debug("Computing prune upper bound for {}", timeRegions); SortedSet transactionalRegions = timeRegions.getRegions(); long time = timeRegions.getTime(); long inactiveTransactionBound = dataJanitorState.getInactiveTransactionBoundForTime(time); LOG.debug("Got inactive transaction bound {}", inactiveTransactionBound); // If inactiveTransactionBound is not recorded then that means the data is not complete for these regions if (inactiveTransactionBound == -1) { if (LOG.isDebugEnabled()) { LOG.debug("Ignoring regions for time {} as no inactiveTransactionBound was found for that time, " + "and hence the data must be incomplete", time); } continue; } // Remove non-existing tables from the transactional regions set, so that we don't lookup prune upper bounds // for them. Since the deleted tables do not exist anymore, there is no need to make sure they have been // compacted. This ensures that transient tables do not block pruning progress. transactionalRegions = filterDeletedTableRegions(existingTables, transactionalRegions); if (LOG.isDebugEnabled()) { LOG.debug("Transactional regions after removing the regions of non-existing tables = {}", Iterables.transform(transactionalRegions, TimeRegions.BYTE_ARR_TO_STRING_FN)); } // Get the prune upper bounds for all the transactional regions Map pruneUpperBoundRegions = dataJanitorState.getPruneUpperBoundForRegions(transactionalRegions); logPruneUpperBoundRegions(pruneUpperBoundRegions); // Use inactiveTransactionBound as the prune upper bound for the empty regions since the regions that are // recorded as empty after inactiveTransactionBoundTime will not have invalid data // for transactions started on or before inactiveTransactionBoundTime pruneUpperBoundRegions = handleEmptyRegions(inactiveTransactionBound, transactionalRegions, pruneUpperBoundRegions); // If prune upper bounds are found for all the transactional regions, then compute the prune upper bound // across all regions if (!transactionalRegions.isEmpty() && pruneUpperBoundRegions.size() == transactionalRegions.size()) { Long minPruneUpperBoundRegions = Collections.min(pruneUpperBoundRegions.values()); long pruneUpperBound = Math.min(inactiveTransactionBound, minPruneUpperBoundRegions); LOG.debug("Found prune upper bound {} for time {}", pruneUpperBound, time); return pruneUpperBound; } else { if (LOG.isDebugEnabled()) { Sets.SetView difference = Sets.difference(transactionalRegions, pruneUpperBoundRegions.keySet()); LOG.debug("Ignoring regions for time {} because the following regions did not record a pruneUpperBound: {}", time, Iterables.transform(difference, TimeRegions.BYTE_ARR_TO_STRING_FN)); } } timeRegions = dataJanitorState.getRegionsOnOrBeforeTime(time - 1); } while (timeRegions != null); return -1; } private SortedSet filterDeletedTableRegions(final Set existingTables, SortedSet transactionalRegions) { return Sets.filter(transactionalRegions, new Predicate() { @Override public boolean apply(byte[] region) { return existingTables.contains(HRegionInfo.getTable(region)); } }); } private Set getTableNamesForRegions(Set regions) { Set tableNames = new HashSet<>(regions.size()); for (byte[] region : regions) { tableNames.add(HRegionInfo.getTable(region)); } return tableNames; } private Map handleEmptyRegions(long inactiveTransactionBound, SortedSet transactionalRegions, Map pruneUpperBoundRegions) throws IOException { long inactiveTransactionBoundTime = TxUtils.getTimestamp(inactiveTransactionBound); SortedSet emptyRegions = dataJanitorState.getEmptyRegionsAfterTime(inactiveTransactionBoundTime, transactionalRegions); LOG.debug("Got empty transactional regions for inactive transaction bound time {}: {}", inactiveTransactionBoundTime, Iterables.transform(emptyRegions, TimeRegions.BYTE_ARR_TO_STRING_FN)); // The regions that are recorded as empty after inactiveTransactionBoundTime will not have invalid data // for transactions started before or on inactiveTransactionBoundTime. Hence we can consider the prune upper bound // for these empty regions as inactiveTransactionBound Map pubWithEmptyRegions = new TreeMap<>(Bytes.BYTES_COMPARATOR); pubWithEmptyRegions.putAll(pruneUpperBoundRegions); for (byte[] emptyRegion : emptyRegions) { if (!pruneUpperBoundRegions.containsKey(emptyRegion)) { pubWithEmptyRegions.put(emptyRegion, inactiveTransactionBound); } } return Collections.unmodifiableMap(pubWithEmptyRegions); } private void logPruneUpperBoundRegions(Map pruneUpperBoundRegions) { if (LOG.isDebugEnabled()) { LOG.debug("Got region - prune upper bound map: {}", Iterables.transform(pruneUpperBoundRegions.entrySet(), new Function, Map.Entry>() { @Override public Map.Entry apply(Map.Entry input) { String regionName = TimeRegions.BYTE_ARR_TO_STRING_FN.apply(input.getKey()); return Maps.immutableEntry(regionName, input.getValue()); } })); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy