All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.quotas.FileArchiverNotifierImpl Maven / Gradle / Ivy

There is a newer version: 3.0.0-beta-1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to you under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.quotas;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.locks.ReentrantReadWriteLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.ReadLock;
import java.util.concurrent.locks.ReentrantReadWriteLock.WriteLock;
import java.util.function.Predicate;
import java.util.stream.Collectors;

import org.apache.commons.lang3.builder.HashCodeBuilder;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.snapshot.SnapshotDescriptionUtils;
import org.apache.hadoop.hbase.snapshot.SnapshotManifest;
import org.apache.hadoop.hbase.util.FSUtils;
import org.apache.hadoop.hbase.util.HFileArchiveUtil;
import org.apache.hadoop.util.StringUtils;
import org.apache.yetus.audience.InterfaceAudience;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.hbase.thirdparty.com.google.common.collect.HashMultimap;
import org.apache.hbase.thirdparty.com.google.common.collect.Multimap;
import org.apache.hbase.thirdparty.com.google.protobuf.InvalidProtocolBufferException;

import org.apache.hadoop.hbase.shaded.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotDescription;
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest;
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest.FamilyFiles;
import org.apache.hadoop.hbase.shaded.protobuf.generated.SnapshotProtos.SnapshotRegionManifest.StoreFile;

/**
 * Tracks file archiving and updates the hbase quota table.
 */
@InterfaceAudience.Private
public class FileArchiverNotifierImpl implements FileArchiverNotifier {
  private static final Logger LOG = LoggerFactory.getLogger(FileArchiverNotifierImpl.class);
  private final Connection conn;
  private final Configuration conf;
  private final FileSystem fs;
  private final TableName tn;
  private final ReadLock readLock;
  private final WriteLock writeLock;
  private volatile long lastFullCompute = Long.MIN_VALUE;
  private List currentSnapshots = Collections.emptyList();
  private static final Map NAMESPACE_LOCKS = new HashMap<>();

  /**
   * An Exception thrown when SnapshotSize updates to hbase:quota fail to be written.
   */
  @InterfaceAudience.Private
  public static class QuotaSnapshotSizeSerializationException extends IOException {
    private static final long serialVersionUID = 1L;

    public QuotaSnapshotSizeSerializationException(String msg) {
      super(msg);
    }
  }

  public FileArchiverNotifierImpl(
      Connection conn, Configuration conf, FileSystem fs, TableName tn) {
    this.conn = conn;
    this.conf = conf;
    this.fs = fs;
    this.tn = tn;
    ReentrantReadWriteLock lock = new ReentrantReadWriteLock();
    readLock = lock.readLock();
    writeLock = lock.writeLock();
  }

  static synchronized Object getLockForNamespace(String namespace) {
    return NAMESPACE_LOCKS.computeIfAbsent(namespace, (ns) -> new Object());
  }

  /**
   * Returns a strictly-increasing measure of time extracted by {@link System#nanoTime()}.
   */
  long getLastFullCompute() {
    return lastFullCompute;
  }

  @Override
  public void addArchivedFiles(Set> fileSizes) throws IOException {
    long start = System.nanoTime();
    readLock.lock();
    try {
      // We want to catch the case where we got an archival request, but there was a full
      // re-computation in progress that was blocking us. Most likely, the full computation is going
      // to already include the changes we were going to make.
      //
      // Same as "start < lastFullCompute" but avoiding numeric overflow per the
      // System.nanoTime() javadoc
      if (lastFullCompute != Long.MIN_VALUE && start - lastFullCompute < 0) {
        if (LOG.isTraceEnabled()) {
          LOG.trace("A full computation was performed after this request was received."
              + " Ignoring requested updates: " + fileSizes);
        }
        return;
      }

      if (LOG.isTraceEnabled()) {
        LOG.trace("currentSnapshots: " + currentSnapshots + " fileSize: "+ fileSizes);
      }

      // Write increment to quota table for the correct snapshot. Only do this if we have snapshots
      // and some files that were archived.
      if (!currentSnapshots.isEmpty() && !fileSizes.isEmpty()) {
        // We get back the files which no snapshot referenced (the files which will be deleted soon)
        groupArchivedFiledBySnapshotAndRecordSize(currentSnapshots, fileSizes);
      }
    } finally {
      readLock.unlock();
    }
  }

  /**
   * For each file in the map, this updates the first snapshot (lexicographic snapshot name) that
   * references this file. The result of this computation is serialized to the quota table.
   *
   * @param snapshots A collection of HBase snapshots to group the files into
   * @param fileSizes A map of file names to their sizes
   */
  void groupArchivedFiledBySnapshotAndRecordSize(
      List snapshots, Set> fileSizes) throws IOException {
    // Make a copy as we'll modify it.
    final Map filesToUpdate = new HashMap<>(fileSizes.size());
    for (Entry entry : fileSizes) {
      filesToUpdate.put(entry.getKey(), entry.getValue());
    }
    // Track the change in size to each snapshot
    final Map snapshotSizeChanges = new HashMap<>();
    for (String snapshot : snapshots) {
      // For each file in `filesToUpdate`, check if `snapshot` refers to it.
      // If `snapshot` does, remove it from `filesToUpdate` and add it to `snapshotSizeChanges`.
      bucketFilesToSnapshot(snapshot, filesToUpdate, snapshotSizeChanges);
      if (filesToUpdate.isEmpty()) {
        // If we have no more files recently archived, we have nothing more to check
        break;
      }
    }
    // We have computed changes to the snapshot size, we need to record them.
    if (!snapshotSizeChanges.isEmpty()) {
      if (LOG.isTraceEnabled()) {
        LOG.trace("Writing snapshot size changes for: " + snapshotSizeChanges);
      }
      persistSnapshotSizeChanges(snapshotSizeChanges);
    }
  }

  /**
   * For the given snapshot, find all files which this {@code snapshotName} references. After a file
   * is found to be referenced by the snapshot, it is removed from {@code filesToUpdate} and
   * {@code snapshotSizeChanges} is updated in concert.
   *
   * @param snapshotName The snapshot to check
   * @param filesToUpdate A mapping of archived files to their size
   * @param snapshotSizeChanges A mapping of snapshots and their change in size
   */
  void bucketFilesToSnapshot(
      String snapshotName, Map filesToUpdate, Map snapshotSizeChanges)
          throws IOException {
    // A quick check to avoid doing work if the caller unnecessarily invoked this method.
    if (filesToUpdate.isEmpty()) {
      return;
    }

    Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(
        snapshotName, FSUtils.getRootDir(conf));
    SnapshotDescription sd = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
    SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, sd);
    // For each region referenced by the snapshot
    for (SnapshotRegionManifest rm : manifest.getRegionManifests()) {
      // For each column family in this region
      for (FamilyFiles ff : rm.getFamilyFilesList()) {
        // And each store file in that family
        for (StoreFile sf : ff.getStoreFilesList()) {
          Long valueOrNull = filesToUpdate.remove(sf.getName());
          if (valueOrNull != null) {
            // This storefile was recently archived, we should update this snapshot with its size
            snapshotSizeChanges.merge(snapshotName, valueOrNull, Long::sum);
          }
          // Short-circuit, if we have no more files that were archived, we don't need to iterate
          // over the rest of the snapshot.
          if (filesToUpdate.isEmpty()) {
            return;
          }
        }
      }
    }
  }

  /**
   * Reads the current size for each snapshot to update, generates a new update based on that value,
   * and then writes the new update.
   *
   * @param snapshotSizeChanges A map of snapshot name to size change
   */
  void persistSnapshotSizeChanges(Map snapshotSizeChanges) throws IOException {
    try (Table quotaTable = conn.getTable(QuotaTableUtil.QUOTA_TABLE_NAME)) {
      // Create a list (with a more typical ordering implied)
      final List> snapshotSizeEntries = new ArrayList<>(
          snapshotSizeChanges.entrySet());
      // Create the Gets for each snapshot we need to update
      final List snapshotSizeGets = snapshotSizeEntries.stream()
          .map((e) -> QuotaTableUtil.makeGetForSnapshotSize(tn, e.getKey()))
          .collect(Collectors.toList());
      final Iterator> iterator = snapshotSizeEntries.iterator();
      // A List to store each Put we'll create from the Get's we retrieve
      final List updates = new ArrayList<>(snapshotSizeEntries.size());

      // TODO Push this down to the RegionServer with a coprocessor:
      //
      // We would really like to piggy-back on the row-lock already being grabbed
      // to handle the update of the row in the quota table. However, because the value
      // is a serialized protobuf, the standard Increment API doesn't work for us. With a CP, we
      // can just send the size deltas to the RS and atomically update the serialized PB object
      // while relying on the row-lock for synchronization.
      //
      // Synchronizing on the namespace string is a "minor smell" but passable as this is
      // only invoked via a single caller (the active Master). Using the namespace name lets us
      // have some parallelism without worry of on caller seeing stale data from the quota table.
      synchronized (getLockForNamespace(tn.getNamespaceAsString())) {
        final Result[] existingSnapshotSizes = quotaTable.get(snapshotSizeGets);
        long totalSizeChange = 0;
        // Read the current size values (if they exist) to generate the new value
        for (Result result : existingSnapshotSizes) {
          Entry entry = iterator.next();
          String snapshot = entry.getKey();
          Long size = entry.getValue();
          // Track the total size change for the namespace this table belongs in
          totalSizeChange += size;
          // Get the size of the previous value (or zero)
          long previousSize = getSnapshotSizeFromResult(result);
          // Create an update. A file was archived from the table, so the table's size goes
          // down, but the snapshot's size goes up.
          updates.add(QuotaTableUtil.createPutForSnapshotSize(tn, snapshot, previousSize + size));
        }

        // Create an update for the summation of all snapshots in the namespace
        if (totalSizeChange != 0) {
          long previousSize = getPreviousNamespaceSnapshotSize(
              quotaTable, tn.getNamespaceAsString());
          updates.add(QuotaTableUtil.createPutForNamespaceSnapshotSize(
              tn.getNamespaceAsString(), previousSize + totalSizeChange));
        }

        // Send all of the quota table updates in one batch.
        List failures = new ArrayList<>();
        final Object[] results = new Object[updates.size()];
        quotaTable.batch(updates, results);
        for (Object result : results) {
          // A null result is an error condition (all RPC attempts failed)
          if (!(result instanceof Result)) {
            failures.add(result);
          }
        }
        // Propagate a failure if any updates failed
        if (!failures.isEmpty()) {
          throw new QuotaSnapshotSizeSerializationException(
              "Failed to write some snapshot size updates: " + failures);
        }
      }
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
      return;
    }
  }

  /**
   * Fetches the current size of all snapshots in the given {@code namespace}.
   *
   * @param quotaTable The HBase quota table
   * @param namespace Namespace to fetch the sum of snapshot sizes for
   * @return The size of all snapshot sizes for the namespace in bytes.
   */
  long getPreviousNamespaceSnapshotSize(Table quotaTable, String namespace) throws IOException {
    // Update the size of each snapshot for all snapshots in a namespace.
    Result r = quotaTable.get(
        QuotaTableUtil.createGetNamespaceSnapshotSize(namespace));
    return getSnapshotSizeFromResult(r);
  }

  /**
   * Extracts the size component from a serialized {@link SpaceQuotaSnapshot} protobuf.
   *
   * @param r A Result containing one cell with a SpaceQuotaSnapshot protobuf
   * @return The size in bytes of the snapshot.
   */
  long getSnapshotSizeFromResult(Result r) throws InvalidProtocolBufferException {
    // Per javadoc, Result should only be null if an exception was thrown. So, if we're here,
    // we should be non-null. If we can't advance to the first cell, same as "no cell".
    if (!r.isEmpty() && r.advance()) {
      return QuotaTableUtil.parseSnapshotSize(r.current());
    }
    return 0L;
  }

  @Override
  public long computeAndStoreSnapshotSizes(
      Collection currentSnapshots) throws IOException {
    // Record what the current snapshots are
    this.currentSnapshots = new ArrayList<>(currentSnapshots);
    Collections.sort(this.currentSnapshots);

    // compute new size for table + snapshots for that table
    List snapshotSizes = computeSnapshotSizes(this.currentSnapshots);
    if (LOG.isTraceEnabled()) {
      LOG.trace("Computed snapshot sizes for " + tn + " of " + snapshotSizes);
    }

    // Compute the total size of all snapshots against our table
    final long totalSnapshotSize = snapshotSizes.stream().mapToLong((sws) -> sws.getSize()).sum();

    writeLock.lock();
    try {
      // Persist the size of each snapshot
      try (Table quotaTable = conn.getTable(QuotaTableUtil.QUOTA_TABLE_NAME)) {
        persistSnapshotSizes(quotaTable, snapshotSizes);
      }

      // Report the last time we did a recomputation
      lastFullCompute = System.nanoTime();

      return totalSnapshotSize;
    } finally {
      writeLock.unlock();
    }
  }

  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
    sb.append(getClass().getSimpleName()).append("[");
    sb.append("tableName=").append(tn).append(", currentSnapshots=");
    sb.append(currentSnapshots).append(", lastFullCompute=").append(lastFullCompute);
    return sb.append("]").toString();
  }

  /**
   * Computes the size of each snapshot against the table referenced by {@code this}.
   *
   * @param snapshots A sorted list of snapshots against {@code tn}.
   * @return A list of the size for each snapshot against {@code tn}.
   */
  List computeSnapshotSizes(List snapshots) throws IOException {
    final List snapshotSizes = new ArrayList<>(snapshots.size());
    final Path rootDir = FSUtils.getRootDir(conf);

    // Get the map of store file names to store file path for this table
    final Set tableReferencedStoreFiles;
    try {
      tableReferencedStoreFiles = FSUtils.getTableStoreFilePathMap(fs, rootDir).keySet();
    } catch (InterruptedException e) {
      Thread.currentThread().interrupt();
      return null;
    }

    if (LOG.isTraceEnabled()) {
      LOG.trace("Paths for " + tn + ": " + tableReferencedStoreFiles);
    }

    // For each snapshot on this table, get the files which the snapshot references which
    // the table does not.
    Set snapshotReferencedFiles = new HashSet<>();
    for (String snapshotName : snapshots) {
      Path snapshotDir = SnapshotDescriptionUtils.getCompletedSnapshotDir(snapshotName, rootDir);
      SnapshotDescription sd = SnapshotDescriptionUtils.readSnapshotInfo(fs, snapshotDir);
      SnapshotManifest manifest = SnapshotManifest.open(conf, fs, snapshotDir, sd);

      if (LOG.isTraceEnabled()) {
        LOG.trace("Files referenced by other snapshots: " + snapshotReferencedFiles);
      }

      // Get the set of files from the manifest that this snapshot references which are not also
      // referenced by the originating table.
      Set unreferencedStoreFileNames = getStoreFilesFromSnapshot(
          manifest, (sfn) -> !tableReferencedStoreFiles.contains(sfn)
              && !snapshotReferencedFiles.contains(sfn));

      if (LOG.isTraceEnabled()) {
        LOG.trace("Snapshot " + snapshotName + " solely references the files: "
            + unreferencedStoreFileNames);
      }

      // Compute the size of the store files for this snapshot
      long size = getSizeOfStoreFiles(tn, unreferencedStoreFileNames);
      if (LOG.isTraceEnabled()) {
        LOG.trace("Computed size of " + snapshotName + " to be " + size);
      }

      // Persist this snapshot's size into the map
      snapshotSizes.add(new SnapshotWithSize(snapshotName, size));

      // Make sure that we don't double-count the same file
      for (StoreFileReference ref : unreferencedStoreFileNames) {
        for (String fileNames : ref.getFamilyToFilesMapping().values()) {
          snapshotReferencedFiles.add(fileNames);
        }
      }
    }

    return snapshotSizes;
  }

  /**
   * Computes the size of each store file in {@code storeFileNames}
   */
  long getSizeOfStoreFiles(TableName tn, Set storeFileNames) {
    return storeFileNames.stream()
        .collect(Collectors.summingLong((sfr) -> getSizeOfStoreFile(tn, sfr)));
  }

  /**
   * Computes the size of the store files for a single region.
   */
  long getSizeOfStoreFile(TableName tn, StoreFileReference storeFileName) {
    String regionName = storeFileName.getRegionName();
    return storeFileName.getFamilyToFilesMapping()
        .entries().stream()
        .collect(Collectors.summingLong((e) ->
            getSizeOfStoreFile(tn, regionName, e.getKey(), e.getValue())));
  }

  /**
   * Computes the size of the store file given its name, region and family name in
   * the archive directory.
   */
  long getSizeOfStoreFile(
      TableName tn, String regionName, String family, String storeFile) {
    Path familyArchivePath;
    try {
      familyArchivePath = HFileArchiveUtil.getStoreArchivePath(conf, tn, regionName, family);
    } catch (IOException e) {
      LOG.warn("Could not compute path for the archive directory for the region", e);
      return 0L;
    }
    Path fileArchivePath = new Path(familyArchivePath, storeFile);
    try {
      if (fs.exists(fileArchivePath)) {
        FileStatus[] status = fs.listStatus(fileArchivePath);
        if (1 != status.length) {
          LOG.warn("Expected " + fileArchivePath +
              " to be a file but was a directory, ignoring reference");
          return 0L;
        }
        return status[0].getLen();
      }
    } catch (IOException e) {
      LOG.warn("Could not obtain the status of " + fileArchivePath, e);
      return 0L;
    }
    LOG.warn("Expected " + fileArchivePath + " to exist but does not, ignoring reference.");
    return 0L;
  }

  /**
   * Extracts the names of the store files referenced by this snapshot which satisfy the given
   * predicate (the predicate returns {@code true}).
   */
  Set getStoreFilesFromSnapshot(
      SnapshotManifest manifest, Predicate filter) {
    Set references = new HashSet<>();
    // For each region referenced by the snapshot
    for (SnapshotRegionManifest rm : manifest.getRegionManifests()) {
      StoreFileReference regionReference = new StoreFileReference(
          ProtobufUtil.toRegionInfo(rm.getRegionInfo()).getEncodedName());

      // For each column family in this region
      for (FamilyFiles ff : rm.getFamilyFilesList()) {
        final String familyName = ff.getFamilyName().toStringUtf8();
        // And each store file in that family
        for (StoreFile sf : ff.getStoreFilesList()) {
          String storeFileName = sf.getName();
          // A snapshot only "inherits" a files size if it uniquely refers to it (no table
          // and no other snapshot references it).
          if (filter.test(storeFileName)) {
            regionReference.addFamilyStoreFile(familyName, storeFileName);
          }
        }
      }
      // Only add this Region reference if we retained any files.
      if (!regionReference.getFamilyToFilesMapping().isEmpty()) {
        references.add(regionReference);
      }
    }
    return references;
  }

  /**
   * Writes the snapshot sizes to the provided {@code table}.
   */
  void persistSnapshotSizes(
      Table table, List snapshotSizes) throws IOException {
    // Convert each entry in the map to a Put and write them to the quota table
    table.put(snapshotSizes
        .stream()
        .map(sws -> QuotaTableUtil.createPutForSnapshotSize(
            tn, sws.getName(), sws.getSize()))
        .collect(Collectors.toList()));
  }

  /**
   * A struct encapsulating the name of a snapshot and its "size" on the filesystem. This size is
   * defined as the amount of filesystem space taken by the files the snapshot refers to which
   * the originating table no longer refers to.
   */
  static class SnapshotWithSize {
    private final String name;
    private final long size;

    SnapshotWithSize(String name, long size) {
      this.name = Objects.requireNonNull(name);
      this.size = size;
    }

    String getName() {
      return name;
    }

    long getSize() {
      return size;
    }

    @Override
    public int hashCode() {
      return new HashCodeBuilder().append(name).append(size).toHashCode();
    }

    @Override
    public boolean equals(Object o) {
      if (this == o) {
        return true;
      }

      if (!(o instanceof SnapshotWithSize)) {
        return false;
      }

      SnapshotWithSize other = (SnapshotWithSize) o;
      return name.equals(other.name) && size == other.size;
    }

    @Override
    public String toString() {
      StringBuilder sb = new StringBuilder(32);
      return sb.append("SnapshotWithSize:[").append(name).append(" ")
          .append(StringUtils.byteDesc(size)).append("]").toString();
    }
  }

  /**
   * A reference to a collection of files in the archive directory for a single region.
   */
  static class StoreFileReference {
    private final String regionName;
    private final Multimap familyToFiles;

    StoreFileReference(String regionName) {
      this.regionName = Objects.requireNonNull(regionName);
      familyToFiles = HashMultimap.create();
    }

    String getRegionName() {
      return regionName;
    }

    Multimap getFamilyToFilesMapping() {
      return familyToFiles;
    }

    void addFamilyStoreFile(String family, String storeFileName) {
      familyToFiles.put(family, storeFileName);
    }

    @Override
    public int hashCode() {
      return new HashCodeBuilder().append(regionName).append(familyToFiles).toHashCode();
    }

    @Override
    public boolean equals(Object o) {
      if (this == o) {
        return true;
      }
      if (!(o instanceof StoreFileReference)) {
        return false;
      }
      StoreFileReference other = (StoreFileReference) o;
      return regionName.equals(other.regionName) && familyToFiles.equals(other.familyToFiles);
    }

    @Override
    public String toString() {
      StringBuilder sb = new StringBuilder();
      return sb.append("StoreFileReference[region=").append(regionName).append(", files=")
          .append(familyToFiles).append("]").toString();
    }
  }
}