org.apache.hadoop.hbase.regionserver.compactions.DateTieredCompactionPolicy Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of hbase-server Show documentation
Server functionality for HBase
There is a newer version: 3.0.0-beta-1
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.regionserver.compactions;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.regionserver.StoreConfigInformation;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.hbase.util.ReflectionUtils;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.PeekingIterator;

/**
 * HBASE-15181 This is a simple implementation of date-based tiered compaction similar to
 * Cassandra's for the following benefits:
 * 1. Improve date-range-based scan by structuring store files in date-based tiered layout.
 * 2. Reduce compaction overhead.
 * 3. Improve TTL efficiency.
 * Perfect fit for the use cases that:
 * 1. has mostly date-based data write and scan and a focus on the most recent data.
 * 2. never or rarely deletes data. Out-of-order writes are handled gracefully. Time range
 * overlapping among store files is tolerated and the performance impact is minimized. Configuration
 * can be set at hbase-site or overriden at per-table or per-column-famly level by hbase shell.
 * Design spec is at
 * https://docs.google.com/document/d/1_AmlNb2N8Us1xICsTeGDLKIqL6T-oHoRLZ323MG_uy8/
 */
public class DateTieredCompactionPolicy extends RatioBasedCompactionPolicy {
  private static final Log LOG = LogFactory.getLog(DateTieredCompactionPolicy.class);

  private RatioBasedCompactionPolicy compactionPolicyPerWindow;

  public DateTieredCompactionPolicy(Configuration conf, StoreConfigInformation storeConfigInfo)
      throws IOException {
    super(conf, storeConfigInfo);
    try {
      compactionPolicyPerWindow =
          ReflectionUtils.instantiateWithCustomCtor(comConf.getCompactionPolicyForTieredWindow(),
            new Class[] { Configuration.class, StoreConfigInformation.class }, new Object[] { conf,
                storeConfigInfo });
    } catch (Exception e) {
      throw new IOException("Unable to load configured compaction policy '"
          + comConf.getCompactionPolicyForTieredWindow() + "'", e);
    }
  }

  @Override
  public boolean isMajorCompaction(Collection filesToCompact) throws IOException {
    // TODO: major compaction with tiered output. Never do major compaction unless forced for now.
    return false;
  }

  @Override
  /**
   * Heuristics for guessing whether we need compaction.
   */
  public boolean needsCompaction(final Collection storeFiles,
      final List filesCompacting) {
    return needsCompaction(storeFiles, filesCompacting, System.currentTimeMillis());
  }

  @VisibleForTesting
  public boolean needsCompaction(final Collection storeFiles,
      final List filesCompacting, long now) {
    ArrayList candidates = new ArrayList(storeFiles);
    candidates = filterBulk(candidates);
    candidates = skipLargeFiles(candidates);
    try {
      candidates = applyCompactionPolicy(candidates, true, false, now);
    } catch (Exception e) {
      LOG.error("Can not check for compaction: ", e);
      return false;
    }
    return candidates != null;
  }

  @Override
  /**
   * Input candidates are sorted from oldest to newest by seqId
   * Could return null if no candidates are found
   */
  public ArrayList applyCompactionPolicy(ArrayList candidates,
      boolean mayUseOffPeak, boolean mayBeStuck) throws IOException {
    return applyCompactionPolicy(candidates, mayUseOffPeak, mayBeStuck,
      System.currentTimeMillis());
  }

  @VisibleForTesting
  public ArrayList applyCompactionPolicy(ArrayList candidates,
      boolean mayUseOffPeak, boolean mayBeStuck, long now) throws IOException {
    Iterable candidatesInWindow =
      filterOldStoreFiles(Lists.newArrayList(candidates), comConf.getMaxStoreFileAgeMillis(), now);

    List> buckets =
        partitionFilesToBuckets(candidatesInWindow, comConf.getBaseWindowMillis(),
          comConf.getWindowsPerTier(), now);
    LOG.debug("Compaction buckets are: " + buckets);
    if (buckets.size() >= storeConfigInfo.getBlockingFileCount()) {
      LOG.warn("Number of compaction buckets:" +  buckets.size()
        + ", exceeds blocking file count setting: "
        + storeConfigInfo.getBlockingFileCount()
        + ", either increase hbase.hstore.blockingStoreFiles or "
        + "reduce the number of tiered compaction windows");
    }
    
    return newestBucket(buckets, comConf.getIncomingWindowMin(), now, comConf.getBaseWindowMillis(),
      mayUseOffPeak);
  }

  /**
   * @param buckets the list of buckets, sorted from newest to oldest, from which to return the
   *          newest bucket within thresholds.
   * @param incomingWindowThreshold minimum number of storeFiles in a bucket to qualify.
   * @param maxThreshold maximum number of storeFiles to compact at once (the returned bucket will
   *          be trimmed down to this).
   * @return a bucket (a list of store files within a window to be compacted).
   * @throws IOException
   */
  private ArrayList newestBucket(List> buckets,
      int incomingWindowThreshold, long now, long baseWindowMillis, boolean mayUseOffPeak)
      throws IOException {
    Window incomingWindow = getInitialWindow(now, baseWindowMillis);
    for (ArrayList bucket : buckets) {
      int minThreshold = incomingWindow.compareToTimestamp(bucket.get(0).getMaximumTimestamp())
        <= 0? comConf.getIncomingWindowMin() : comConf.minFilesToCompact;
      compactionPolicyPerWindow.setMinThreshold(minThreshold);
      ArrayList candidates = compactionPolicyPerWindow.applyCompactionPolicy(bucket,
        mayUseOffPeak, false);
      if (candidates != null && !candidates.isEmpty()) {
        return candidates;
      }
    }
    return null;
  }

  /**
   * We receive store files sorted in ascending order by seqId then scan the list of files. If the
   * current file has a maxTimestamp older than last known maximum, treat this file as it carries
   * the last known maximum. This way both seqId and timestamp are in the same order. If files carry
   * the same maxTimestamps, they are ordered by seqId. We then reverse the list so they are ordered
   * by seqId and maxTimestamp in decending order and build the time windows. All the out-of-order
   * data into the same compaction windows, guaranteeing contiguous compaction based on sequence id.
   */
  private static List> partitionFilesToBuckets(Iterable storeFiles,
      long baseWindowSizeMillis, int windowsPerTier, long now) {
    List> buckets = Lists.newArrayList();
    Window window = getInitialWindow(now, baseWindowSizeMillis);

    List> storefileMaxTimestampPairs =
        Lists.newArrayListWithCapacity(Iterables.size(storeFiles));
    long maxTimestampSeen = Long.MIN_VALUE;
    for (StoreFile storeFile : storeFiles) {
      // if there is out-of-order data,
      // we put them in the same window as the last file in increasing order
      maxTimestampSeen = Math.max(maxTimestampSeen, storeFile.getMaximumTimestamp());
      storefileMaxTimestampPairs.add(new Pair(storeFile, maxTimestampSeen));
    }

    Collections.reverse(storefileMaxTimestampPairs);
    PeekingIterator> it =
        Iterators.peekingIterator(storefileMaxTimestampPairs.iterator());

    while (it.hasNext()) {
      int compResult = window.compareToTimestamp(it.peek().getSecond());
      if (compResult > 0) {
        // If the file is too old for the window, switch to the next window
        window = window.nextWindow(windowsPerTier);
      } else {
        // The file is within the target window
        ArrayList bucket = Lists.newArrayList();
        // Add all files in the same window to current bucket. For incoming window
        // we tolerate files with future data although it is sub-optimal
        while (it.hasNext() && window.compareToTimestamp(it.peek().getSecond()) <= 0) {
          bucket.add(it.next().getFirst());
        }
        if (!bucket.isEmpty()) {
          buckets.add(bucket);
        }
      }
    }

    return buckets;
  }

  /**
   * Removes all store files with max timestamp older than (current - maxAge).
   * @param storeFiles all store files to consider
   * @param maxAge the age in milliseconds when a store file stops participating in compaction.
   * @param now current time. store files with max timestamp less than (now - maxAge) are filtered.
   * @return a list of storeFiles with the store file older than maxAge excluded
   */
  private static Iterable filterOldStoreFiles(List storeFiles, long maxAge,
      long now) {
    if (maxAge == 0) return ImmutableList.of();
    final long cutoff = now - maxAge;
    return Iterables.filter(storeFiles, new Predicate() {
      @Override
      public boolean apply(StoreFile storeFile) {
        // This is for findbugs' issue with Guava. We know this won't happen.
        if (storeFile == null) {
          return false;
        }
        return storeFile.getMaximumTimestamp() >= cutoff;
      }
    });
  }

  /**
   * This is the class we use to partition from epoch time to now into tiers of exponential sizes of
   * windows.
   */
  private static Window getInitialWindow(long now, long timeUnit) {
    return new Window(timeUnit, now / timeUnit);
  }

  private static class Window {
    /**
     * How big a range of timestamps fit inside the window in milliseconds.
     */
    private final long windowMillis;
    /**
     * A timestamp t is within the window iff t / size == divPosition.
     */
    private final long divPosition;

    public Window(long baseWindowMillis, long divPosition) {
      this.windowMillis = baseWindowMillis;
      this.divPosition = divPosition;
    }

    /**
     * Compares the window to a timestamp.
     * @param timestamp the timestamp to compare.
     * @return a negative integer, zero, or a positive integer as the window lies before, covering,
     *         or after than the timestamp.
     */
    public int compareToTimestamp(long timestamp) {
      long pos = timestamp / windowMillis;
      return divPosition == pos ? 0 : divPosition < pos ? -1 : 1;
    }

    /**
     * Move to the new window of the same tier or of the next tier, which represents an earlier time
     * span.
     * @param windowsPerTier The number of contiguous windows that will have the same size. Windows
     *          following those will be tierBase times as big.
     * @return The next window
     */
    public Window nextWindow(int windowsPerTier) {
      if (divPosition % windowsPerTier > 0) return new Window(windowMillis, divPosition - 1);
      else return new Window(windowMillis * windowsPerTier, divPosition / windowsPerTier - 1);
    }
  }
}