All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.regionserver.compactions.FIFOCompactionPolicy Maven / Gradle / Ivy

The newest version!
/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.regionserver.compactions;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.regionserver.StoreConfigInformation;
import org.apache.hadoop.hbase.regionserver.StoreFile;
import org.apache.hadoop.hbase.regionserver.StoreUtils;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;

/**
 * 
 * FIFO compaction policy selects only files which have all cells expired. 
 * The column family MUST have non-default TTL. One of the use cases for this 
 * policy is when we need to store raw data which will be post-processed later 
 * and discarded completely after quite short period of time. Raw time-series vs. 
 * time-based roll up aggregates and compacted time-series. We collect raw time-series
 * and store them into CF with FIFO compaction policy, periodically we run task 
 * which creates roll up aggregates and compacts time-series, the original raw data 
 * can be discarded after that.
 * 
 */
@InterfaceAudience.Private
public class FIFOCompactionPolicy extends ExploringCompactionPolicy {
  
  private static final Log LOG = LogFactory.getLog(FIFOCompactionPolicy.class);


  public FIFOCompactionPolicy(Configuration conf, StoreConfigInformation storeConfigInfo) {
    super(conf, storeConfigInfo);
  }

  @Override
  public CompactionRequest selectCompaction(Collection candidateFiles,
      List filesCompacting, boolean isUserCompaction, boolean mayUseOffPeak,
      boolean forceMajor) throws IOException {
    
    if(forceMajor){
      LOG.warn("Major compaction is not supported for FIFO compaction policy. Ignore the flag.");
    }
    boolean isAfterSplit = StoreUtils.hasReferences(candidateFiles);
    if(isAfterSplit){
      LOG.info("Split detected, delegate selection to the parent policy.");
      return super.selectCompaction(candidateFiles, filesCompacting, isUserCompaction, 
        mayUseOffPeak, forceMajor);
    }
    
    // Nothing to compact
    Collection toCompact = getExpiredStores(candidateFiles, filesCompacting);
    CompactionRequest result = new CompactionRequest(toCompact);
    return result;
  }

  @Override
  public boolean shouldPerformMajorCompaction(Collection filesToCompact)
    throws IOException {
    boolean isAfterSplit = StoreUtils.hasReferences(filesToCompact);
    if(isAfterSplit){
      LOG.info("Split detected, delegate to the parent policy.");
      return super.shouldPerformMajorCompaction(filesToCompact);
    }
    return false;
  }

  @Override
  public boolean needsCompaction(Collection storeFiles, 
      List filesCompacting) {  
    boolean isAfterSplit = StoreUtils.hasReferences(storeFiles);
    if(isAfterSplit){
      LOG.info("Split detected, delegate to the parent policy.");
      return super.needsCompaction(storeFiles, filesCompacting);
    }
    return hasExpiredStores(storeFiles);
  }

  /**
   * The FIFOCompactionPolicy only choose those TTL expired HFiles as the compaction candidates. So
   * if all HFiles are TTL expired, then the compaction will generate a new empty HFile. While its
   * max timestamp will be Long.MAX_VALUE. If not considered separately, the HFile will never be
   * archived because its TTL will be never expired. So we'll check the empty store file separately.
   * (See HBASE-21504)
   */
  private boolean isEmptyStoreFile(StoreFile sf) {
    return sf.getReader().getEntries() == 0;
  }

  private boolean hasExpiredStores(Collection files) {
    long currentTime = EnvironmentEdgeManager.currentTime();
    for (StoreFile sf : files) {
      if (isEmptyStoreFile(sf)) {
        return true;
      }
      // Check MIN_VERSIONS is in HStore removeUnneededFiles
      Long maxTs = sf.getReader().getMaxTimestamp();
      long maxTtl = storeConfigInfo.getStoreFileTtl();
      if (maxTs == null || maxTtl == Long.MAX_VALUE || (currentTime - maxTtl < maxTs)) {
        continue;
      } else {
        return true;
      }
    }
    return false;
  }

  private  Collection getExpiredStores(Collection files,
      Collection filesCompacting) {
    long currentTime = EnvironmentEdgeManager.currentTime();
    Collection expiredStores = new ArrayList();
    for (StoreFile sf : files) {
      if (isEmptyStoreFile(sf)) {
        expiredStores.add(sf);
        continue;
      }
      // Check MIN_VERSIONS is in HStore removeUnneededFiles
      Long maxTs = sf.getReader().getMaxTimestamp();
      long maxTtl = storeConfigInfo.getStoreFileTtl();
      if (maxTs == null || maxTtl == Long.MAX_VALUE || (currentTime - maxTtl < maxTs)) {
        continue;
      } else if (filesCompacting == null || filesCompacting.contains(sf) == false) {
        expiredStores.add(sf);
      }
    }
    return expiredStores;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy