All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactor Maven / Gradle / Ivy

/**
 *
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hbase.mob.compactions;

import static org.apache.hadoop.hbase.regionserver.HStoreFile.BULKLOAD_TIME_KEY;
import static org.apache.hadoop.hbase.regionserver.HStoreFile.MOB_CELLS_COUNT;
import static org.apache.hadoop.hbase.regionserver.HStoreFile.SKIP_RESET_SEQ_ID;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.NavigableMap;
import java.util.Objects;
import java.util.TreeMap;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.ArrayBackedTag;
import org.apache.hadoop.hbase.Cell;
import org.apache.hadoop.hbase.CellComparator;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.Tag;
import org.apache.hadoop.hbase.TagType;
import org.apache.hadoop.hbase.TagUtil;
import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.ConnectionFactory;
import org.apache.hadoop.hbase.client.MobCompactPartitionPolicy;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.io.HFileLink;
import org.apache.hadoop.hbase.io.crypto.Encryption;
import org.apache.hadoop.hbase.io.hfile.CacheConfig;
import org.apache.hadoop.hbase.io.hfile.HFile;
import org.apache.hadoop.hbase.io.hfile.HFile.Reader;
import org.apache.hadoop.hbase.mob.MobConstants;
import org.apache.hadoop.hbase.mob.MobFileName;
import org.apache.hadoop.hbase.mob.MobUtils;
import org.apache.hadoop.hbase.mob.compactions.MobCompactionRequest.CompactionType;
import org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionDelPartition;
import org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionDelPartitionId;
import org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionPartition;
import org.apache.hadoop.hbase.mob.compactions.PartitionedMobCompactionRequest.CompactionPartitionId;
import org.apache.hadoop.hbase.regionserver.BloomType;
import org.apache.hadoop.hbase.regionserver.HStore;
import org.apache.hadoop.hbase.regionserver.HStoreFile;
import org.apache.hadoop.hbase.regionserver.ScanInfo;
import org.apache.hadoop.hbase.regionserver.ScanType;
import org.apache.hadoop.hbase.regionserver.ScannerContext;
import org.apache.hadoop.hbase.regionserver.StoreFileInfo;
import org.apache.hadoop.hbase.regionserver.StoreFileScanner;
import org.apache.hadoop.hbase.regionserver.StoreFileWriter;
import org.apache.hadoop.hbase.regionserver.StoreScanner;
import org.apache.hadoop.hbase.security.EncryptionUtil;
import org.apache.hadoop.hbase.tool.LoadIncrementalHFiles;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.EnvironmentEdgeManager;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.yetus.audience.InterfaceAudience;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * An implementation of {@link MobCompactor} that compacts the mob files in partitions.
 */
@InterfaceAudience.Private
public class PartitionedMobCompactor extends MobCompactor {

  private static final Logger LOG = LoggerFactory.getLogger(PartitionedMobCompactor.class);
  protected long mergeableSize;
  protected int delFileMaxCount;
  /** The number of files compacted in a batch */
  protected int compactionBatchSize;
  protected int compactionKVMax;

  private final Path tempPath;
  private final Path bulkloadPath;
  private final CacheConfig compactionCacheConfig;
  private final byte[] refCellTags;
  private Encryption.Context cryptoContext = Encryption.Context.NONE;

  public PartitionedMobCompactor(Configuration conf, FileSystem fs, TableName tableName,
                                 ColumnFamilyDescriptor column, ExecutorService pool) throws IOException {
    super(conf, fs, tableName, column, pool);
    mergeableSize = conf.getLong(MobConstants.MOB_COMPACTION_MERGEABLE_THRESHOLD,
      MobConstants.DEFAULT_MOB_COMPACTION_MERGEABLE_THRESHOLD);
    delFileMaxCount = conf.getInt(MobConstants.MOB_DELFILE_MAX_COUNT,
      MobConstants.DEFAULT_MOB_DELFILE_MAX_COUNT);
    // default is 100
    compactionBatchSize = conf.getInt(MobConstants.MOB_COMPACTION_BATCH_SIZE,
      MobConstants.DEFAULT_MOB_COMPACTION_BATCH_SIZE);
    tempPath = new Path(MobUtils.getMobHome(conf), MobConstants.TEMP_DIR_NAME);
    bulkloadPath = new Path(tempPath, new Path(MobConstants.BULKLOAD_DIR_NAME, new Path(
      tableName.getNamespaceAsString(), tableName.getQualifierAsString())));
    compactionKVMax = this.conf.getInt(HConstants.COMPACTION_KV_MAX,
      HConstants.COMPACTION_KV_MAX_DEFAULT);
    Configuration copyOfConf = new Configuration(conf);
    copyOfConf.setFloat(HConstants.HFILE_BLOCK_CACHE_SIZE_KEY, 0f);
    compactionCacheConfig = new CacheConfig(copyOfConf);
    List tags = new ArrayList<>(2);
    tags.add(MobConstants.MOB_REF_TAG);
    Tag tableNameTag = new ArrayBackedTag(TagType.MOB_TABLE_NAME_TAG_TYPE, tableName.getName());
    tags.add(tableNameTag);
    this.refCellTags = TagUtil.fromList(tags);
    cryptoContext = EncryptionUtil.createEncryptionContext(copyOfConf, column);
  }

  @Override
  public List compact(List files, boolean allFiles) throws IOException {
    if (files == null || files.isEmpty()) {
      LOG.info("No candidate mob files");
      return null;
    }
    LOG.info("is allFiles: " + allFiles);

    // find the files to compact.
    PartitionedMobCompactionRequest request = select(files, allFiles);
    // compact the files.
    return performCompaction(request);
  }

  /**
   * Selects the compacted mob/del files.
   * Iterates the candidates to find out all the del files and small mob files.
   * @param candidates All the candidates.
   * @param allFiles Whether add all mob files into the compaction.
   * @return A compaction request.
   * @throws IOException if IO failure is encountered
   */
  protected PartitionedMobCompactionRequest select(List candidates,
    boolean allFiles) throws IOException {
    final Map filesToCompact = new HashMap<>();
    final CompactionPartitionId id = new CompactionPartitionId();
    final NavigableMap delFilesToCompact = new TreeMap<>();
    final CompactionDelPartitionId delId = new CompactionDelPartitionId();
    final ArrayList allDelPartitions = new ArrayList<>();
    int selectedFileCount = 0;
    int irrelevantFileCount = 0;
    int totalDelFiles = 0;
    MobCompactPartitionPolicy policy = column.getMobCompactPartitionPolicy();

    Calendar calendar =  Calendar.getInstance();
    Date currentDate = new Date();
    Date firstDayOfCurrentMonth = null;
    Date firstDayOfCurrentWeek = null;

    if (policy == MobCompactPartitionPolicy.MONTHLY) {
      firstDayOfCurrentMonth = MobUtils.getFirstDayOfMonth(calendar, currentDate);
      firstDayOfCurrentWeek = MobUtils.getFirstDayOfWeek(calendar, currentDate);
    } else if (policy == MobCompactPartitionPolicy.WEEKLY) {
      firstDayOfCurrentWeek = MobUtils.getFirstDayOfWeek(calendar, currentDate);
    }

    // We check if there is any del files so the logic can be optimized for the following processing
    // First step is to check if there is any delete files. If there is any delete files,
    // For each Partition, it needs to read its startKey and endKey from files.
    // If there is no delete file, there is no need to read startKey and endKey from files, this
    // is an optimization.
    boolean withDelFiles = false;
    for (FileStatus file : candidates) {
      if (!file.isFile()) {
        continue;
      }
      // group the del files and small files.
      FileStatus linkedFile = file;
      if (HFileLink.isHFileLink(file.getPath())) {
        HFileLink link = HFileLink.buildFromHFileLinkPattern(conf, file.getPath());
        linkedFile = getLinkedFileStatus(link);
        if (linkedFile == null) {
          continue;
        }
      }
      if (StoreFileInfo.isDelFile(linkedFile.getPath())) {
        withDelFiles = true;
        break;
      }
    }

    for (FileStatus file : candidates) {
      if (!file.isFile()) {
        irrelevantFileCount++;
        continue;
      }
      // group the del files and small files.
      FileStatus linkedFile = file;
      if (HFileLink.isHFileLink(file.getPath())) {
        HFileLink link = HFileLink.buildFromHFileLinkPattern(conf, file.getPath());
        linkedFile = getLinkedFileStatus(link);
        if (linkedFile == null) {
          // If the linked file cannot be found, regard it as an irrelevantFileCount file
          irrelevantFileCount++;
          continue;
        }
      }
      if (withDelFiles && StoreFileInfo.isDelFile(linkedFile.getPath())) {
        // File in the Del Partition List

        // Get delId from the file
        try (Reader reader = HFile.createReader(fs, linkedFile.getPath(), conf)) {
          delId.setStartKey(reader.getFirstRowKey().get());
          delId.setEndKey(reader.getLastRowKey().get());
        }
        CompactionDelPartition delPartition = delFilesToCompact.get(delId);
        if (delPartition == null) {
          CompactionDelPartitionId newDelId =
              new CompactionDelPartitionId(delId.getStartKey(), delId.getEndKey());
          delPartition = new CompactionDelPartition(newDelId);
          delFilesToCompact.put(newDelId, delPartition);
        }
        delPartition.addDelFile(file);
        totalDelFiles ++;
      } else {
        String fileName = linkedFile.getPath().getName();
        String date = MobFileName.getDateFromName(fileName);
        boolean skipCompaction = MobUtils
            .fillPartitionId(id, firstDayOfCurrentMonth, firstDayOfCurrentWeek, date, policy,
                calendar, mergeableSize);
        if (allFiles || (!skipCompaction && (linkedFile.getLen() < id.getThreshold()))) {
          // add all files if allFiles is true,
          // otherwise add the small files to the merge pool
          // filter out files which are not supposed to be compacted with the
          // current policy

          id.setStartKey(MobFileName.getStartKeyFromName(fileName));
          CompactionPartition compactionPartition = filesToCompact.get(id);
          if (compactionPartition == null) {
            CompactionPartitionId newId = new CompactionPartitionId(id.getStartKey(), id.getDate());
            compactionPartition = new CompactionPartition(newId);
            compactionPartition.addFile(file);
            filesToCompact.put(newId, compactionPartition);
            newId.updateLatestDate(date);
          } else {
            compactionPartition.addFile(file);
            compactionPartition.getPartitionId().updateLatestDate(date);
          }

          if (withDelFiles) {
            // get startKey and endKey from the file and update partition
            // TODO: is it possible to skip read of most hfiles?
            try (Reader reader = HFile.createReader(fs, linkedFile.getPath(), conf)) {
              compactionPartition.setStartKey(reader.getFirstRowKey().get());
              compactionPartition.setEndKey(reader.getLastRowKey().get());
            }
          }

          selectedFileCount++;
        }
      }
    }

    /*
     * Merge del files so there are only non-overlapped del file lists
     */
    for(Map.Entry entry : delFilesToCompact.entrySet()) {
      if (allDelPartitions.size() > 0) {
        // check if the current key range overlaps the previous one
        CompactionDelPartition prev = allDelPartitions.get(allDelPartitions.size() - 1);
        if (Bytes.compareTo(prev.getId().getEndKey(), entry.getKey().getStartKey()) >= 0) {
          // merge them together
          prev.getId().setEndKey(entry.getValue().getId().getEndKey());
          prev.addDelFileList(entry.getValue().listDelFiles());

        } else {
          allDelPartitions.add(entry.getValue());
        }
      } else {
        allDelPartitions.add(entry.getValue());
      }
    }

    PartitionedMobCompactionRequest request = new PartitionedMobCompactionRequest(
      filesToCompact.values(), allDelPartitions);
    if (candidates.size() == (totalDelFiles + selectedFileCount + irrelevantFileCount)) {
      // all the files are selected
      request.setCompactionType(CompactionType.ALL_FILES);
    }
    LOG.info("The compaction type is {}, the request has {} del files, {} selected files, and {} " +
        "irrelevant files table '{}' and column '{}'", request.getCompactionType(), totalDelFiles,
        selectedFileCount, irrelevantFileCount, tableName, column.getNameAsString());
    return request;
  }

  /**
   * Performs the compaction on the selected files.
   * 
    *
  1. Compacts the del files.
  2. *
  3. Compacts the selected small mob files and all the del files.
  4. *
  5. If all the candidates are selected, delete the del files.
  6. *
* @param request The compaction request. * @return The paths of new mob files generated in the compaction. * @throws IOException if IO failure is encountered */ protected List performCompaction(PartitionedMobCompactionRequest request) throws IOException { // merge the del files, it is per del partition for (CompactionDelPartition delPartition : request.getDelPartitions()) { if (delPartition.getDelFileCount() <= 1) continue; List newDelPaths = compactDelFiles(request, delPartition.listDelFiles()); delPartition.cleanDelFiles(); delPartition.addDelFileList(newDelPaths); } List paths = null; int totalDelFileCount = 0; try { for (CompactionDelPartition delPartition : request.getDelPartitions()) { for (Path newDelPath : delPartition.listDelFiles()) { HStoreFile sf = new HStoreFile(fs, newDelPath, conf, compactionCacheConfig, BloomType.NONE, true); // pre-create reader of a del file to avoid race condition when opening the reader in each // partition. sf.initReader(); delPartition.addStoreFile(sf); totalDelFileCount++; } } LOG.info("After merging, there are {} del files. table='{}' column='{}'", totalDelFileCount, tableName, column.getNameAsString()); // compact the mob files by partitions. paths = compactMobFiles(request); LOG.info("After compaction, there are {} mob files. table='{}' column='{}'", paths.size(), tableName, column.getNameAsString()); } finally { for (CompactionDelPartition delPartition : request.getDelPartitions()) { closeStoreFileReaders(delPartition.getStoreFiles()); } } // archive the del files if all the mob files are selected. if (request.type == CompactionType.ALL_FILES && !request.getDelPartitions().isEmpty()) { LOG.info("After a mob compaction with all files selected, archiving the del files for " + "table='{}' and column='{}'", tableName, column.getNameAsString()); for (CompactionDelPartition delPartition : request.getDelPartitions()) { LOG.info(Objects.toString(delPartition.listDelFiles())); try { MobUtils.removeMobFiles(conf, fs, tableName, mobTableDir, column.getName(), delPartition.getStoreFiles()); } catch (IOException e) { LOG.error("Failed to archive the del files {} for partition {} table='{}' and " + "column='{}'", delPartition.getStoreFiles(), delPartition.getId(), tableName, column.getNameAsString(), e); } } } return paths; } static class DelPartitionComparator implements Comparator { private boolean compareStartKey; DelPartitionComparator(boolean compareStartKey) { this.compareStartKey = compareStartKey; } public boolean getCompareStartKey() { return this.compareStartKey; } public void setCompareStartKey(final boolean compareStartKey) { this.compareStartKey = compareStartKey; } @Override public int compare(CompactionDelPartition o1, CompactionDelPartition o2) { if (compareStartKey) { return Bytes.compareTo(o1.getId().getStartKey(), o2.getId().getStartKey()); } else { return Bytes.compareTo(o1.getId().getEndKey(), o2.getId().getEndKey()); } } } List getListOfDelFilesForPartition(final CompactionPartition partition, final List delPartitions) { // Binary search for startKey and endKey List result = new ArrayList<>(); DelPartitionComparator comparator = new DelPartitionComparator(false); CompactionDelPartitionId id = new CompactionDelPartitionId(null, partition.getStartKey()); CompactionDelPartition target = new CompactionDelPartition(id); int start = Collections.binarySearch(delPartitions, target, comparator); // Get the start index for partition if (start < 0) { // Calculate the insert point start = (start + 1) * (-1); if (start == delPartitions.size()) { // no overlap return result; } else { // Check another case which has no overlap if (Bytes.compareTo(partition.getEndKey(), delPartitions.get(start).getId().getStartKey()) < 0) { return result; } } } // Search for end index for the partition comparator.setCompareStartKey(true); id.setStartKey(partition.getEndKey()); int end = Collections.binarySearch(delPartitions, target, comparator); if (end < 0) { end = (end + 1) * (-1); if (end == 0) { return result; } else { --end; if (Bytes.compareTo(partition.getStartKey(), delPartitions.get(end).getId().getEndKey()) > 0) { return result; } } } for (int i = start; i <= end; ++i) { result.addAll(delPartitions.get(i).getStoreFiles()); } return result; } /** * Compacts the selected small mob files and all the del files. * @param request The compaction request. * @return The paths of new mob files after compactions. * @throws IOException if IO failure is encountered */ protected List compactMobFiles(final PartitionedMobCompactionRequest request) throws IOException { Collection partitions = request.compactionPartitions; if (partitions == null || partitions.isEmpty()) { LOG.info("No partitions of mob files in table='{}' and column='{}'", tableName, column.getNameAsString()); return Collections.emptyList(); } List paths = new ArrayList<>(); final Connection c = ConnectionFactory.createConnection(conf); final Table table = c.getTable(tableName); try { Map>> results = new HashMap<>(); // compact the mob files by partitions in parallel. for (final CompactionPartition partition : partitions) { // How to efficiently come up a list of delFiles for one partition? // Search the delPartitions and collect all the delFiles for the partition // One optimization can do is that if there is no del file, we do not need to // come up with startKey/endKey. List delFiles = getListOfDelFilesForPartition(partition, request.getDelPartitions()); results.put(partition.getPartitionId(), pool.submit(new Callable>() { @Override public List call() throws Exception { LOG.info("Compacting mob files for partition {} for table='{}' and column='{}'", partition.getPartitionId(), tableName, column.getNameAsString()); return compactMobFilePartition(request, partition, delFiles, c, table); } })); } // compact the partitions in parallel. List failedPartitions = new ArrayList<>(); for (Entry>> result : results.entrySet()) { try { paths.addAll(result.getValue().get()); } catch (Exception e) { // just log the error LOG.error("Failed to compact the partition {} for table='{}' and column='{}'", result.getKey(), tableName, column.getNameAsString(), e); failedPartitions.add(result.getKey()); } } if (!failedPartitions.isEmpty()) { // if any partition fails in the compaction, directly throw an exception. throw new IOException("Failed to compact the partitions " + failedPartitions + " for table='" + tableName + "' column='" + column.getNameAsString() + "'"); } } finally { try { table.close(); } catch (IOException e) { LOG.error("Failed to close the Table", e); } } return paths; } /** * Compacts a partition of selected small mob files and all the del files. * @param request The compaction request. * @param partition A compaction partition. * @param delFiles The del files. * @param connection The connection to use. * @param table The current table. * @return The paths of new mob files after compactions. * @throws IOException if IO failure is encountered */ private List compactMobFilePartition(PartitionedMobCompactionRequest request, CompactionPartition partition, List delFiles, Connection connection, Table table) throws IOException { if (MobUtils.isMobFileExpired(column, EnvironmentEdgeManager.currentTime(), partition.getPartitionId().getDate())) { // If the files in the partition are expired, do not compact them and directly // return an empty list. return Collections.emptyList(); } List newFiles = new ArrayList<>(); List files = partition.listFiles(); int offset = 0; Path bulkloadPathOfPartition = new Path(bulkloadPath, partition.getPartitionId().toString()); Path bulkloadColumnPath = new Path(bulkloadPathOfPartition, column.getNameAsString()); while (offset < files.size()) { int batch = compactionBatchSize; if (files.size() - offset < compactionBatchSize) { batch = files.size() - offset; } if (batch == 1 && delFiles.isEmpty()) { // only one file left and no del files, do not compact it, // and directly add it to the new files. newFiles.add(files.get(offset).getPath()); offset++; continue; } // clean the bulkload directory to avoid loading old files. fs.delete(bulkloadPathOfPartition, true); // add the selected mob files and del files into filesToCompact List filesToCompact = new ArrayList<>(); for (int i = offset; i < batch + offset; i++) { HStoreFile sf = new HStoreFile(fs, files.get(i).getPath(), conf, compactionCacheConfig, BloomType.NONE, true); filesToCompact.add(sf); } filesToCompact.addAll(delFiles); // compact the mob files in a batch. compactMobFilesInBatch(request, partition, connection, table, filesToCompact, batch, bulkloadPathOfPartition, bulkloadColumnPath, newFiles); // move to the next batch. offset += batch; } LOG.info("Compaction is finished. The number of mob files is changed from {} to {} for " + "partition={} for table='{}' and column='{}'", files.size(), newFiles.size(), partition.getPartitionId(), tableName, column.getNameAsString()); return newFiles; } /** * Closes the readers of store files. * @param storeFiles The store files to be closed. */ private void closeStoreFileReaders(List storeFiles) { for (HStoreFile storeFile : storeFiles) { try { storeFile.closeStoreFile(true); } catch (IOException e) { LOG.warn("Failed to close the reader on store file " + storeFile.getPath(), e); } } } /** * Compacts a partition of selected small mob files and all the del files in a batch. * @param request The compaction request. * @param partition A compaction partition. * @param connection To use for transport * @param table The current table. * @param filesToCompact The files to be compacted. * @param batch The number of mob files to be compacted in a batch. * @param bulkloadPathOfPartition The directory where the bulkload column of the current * partition is saved. * @param bulkloadColumnPath The directory where the bulkload files of current partition * are saved. * @param newFiles The paths of new mob files after compactions. * @throws IOException if IO failure is encountered */ private void compactMobFilesInBatch(PartitionedMobCompactionRequest request, CompactionPartition partition, Connection connection, Table table, List filesToCompact, int batch, Path bulkloadPathOfPartition, Path bulkloadColumnPath, List newFiles) throws IOException { // open scanner to the selected mob files and del files. StoreScanner scanner = createScanner(filesToCompact, ScanType.COMPACT_DROP_DELETES); // the mob files to be compacted, not include the del files. List mobFilesToCompact = filesToCompact.subList(0, batch); // Pair(maxSeqId, cellsCount) Pair fileInfo = getFileInfo(mobFilesToCompact); // open writers for the mob files and new ref store files. StoreFileWriter writer = null; StoreFileWriter refFileWriter = null; Path filePath = null; long mobCells = 0; boolean cleanupTmpMobFile = false; boolean cleanupBulkloadDirOfPartition = false; boolean cleanupCommittedMobFile = false; boolean closeReaders= true; try { try { writer = MobUtils .createWriter(conf, fs, column, partition.getPartitionId().getLatestDate(), tempPath, Long.MAX_VALUE, column.getCompactionCompressionType(), partition.getPartitionId().getStartKey(), compactionCacheConfig, cryptoContext, true); cleanupTmpMobFile = true; filePath = writer.getPath(); byte[] fileName = Bytes.toBytes(filePath.getName()); // create a temp file and open a writer for it in the bulkloadPath refFileWriter = MobUtils.createRefFileWriter(conf, fs, column, bulkloadColumnPath, fileInfo.getSecond().longValue(), compactionCacheConfig, cryptoContext, true); cleanupBulkloadDirOfPartition = true; List cells = new ArrayList<>(); boolean hasMore; ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(compactionKVMax).build(); do { hasMore = scanner.next(cells, scannerContext); for (Cell cell : cells) { // write the mob cell to the mob file. writer.append(cell); // write the new reference cell to the store file. Cell reference = MobUtils.createMobRefCell(cell, fileName, this.refCellTags); refFileWriter.append(reference); mobCells++; } cells.clear(); } while (hasMore); } finally { // close the scanner. scanner.close(); if (cleanupTmpMobFile) { // append metadata to the mob file, and close the mob file writer. closeMobFileWriter(writer, fileInfo.getFirst(), mobCells); } if (cleanupBulkloadDirOfPartition) { // append metadata and bulkload info to the ref mob file, and close the writer. closeRefFileWriter(refFileWriter, fileInfo.getFirst(), request.selectionTime); } } if (mobCells > 0) { // commit mob file MobUtils.commitFile(conf, fs, filePath, mobFamilyDir, compactionCacheConfig); cleanupTmpMobFile = false; cleanupCommittedMobFile = true; // bulkload the ref file LOG.info("start MOB ref bulkload for partition {} table='{}' column='{}'", partition.getPartitionId(), tableName, column.getNameAsString()); bulkloadRefFile(connection, table, bulkloadPathOfPartition, filePath.getName()); cleanupCommittedMobFile = false; LOG.info("end MOB ref bulkload for partition {} table='{}' column='{}'", partition.getPartitionId(), tableName, column.getNameAsString()); newFiles.add(new Path(mobFamilyDir, filePath.getName())); } // archive the old mob files, do not archive the del files. try { closeStoreFileReaders(mobFilesToCompact); closeReaders = false; MobUtils.removeMobFiles(conf, fs, tableName, mobTableDir, column.getName(), mobFilesToCompact); } catch (IOException e) { LOG.error("Failed to archive the files " + mobFilesToCompact, e); } } finally { if (closeReaders) { closeStoreFileReaders(mobFilesToCompact); } if (cleanupTmpMobFile) { deletePath(filePath); } if (cleanupBulkloadDirOfPartition) { // delete the bulkload files in bulkloadPath deletePath(bulkloadPathOfPartition); } if (cleanupCommittedMobFile) { LOG.error("failed MOB ref bulkload for partition {} table='{}' column='{}'", partition.getPartitionId(), tableName, column.getNameAsString()); MobUtils.removeMobFiles(conf, fs, tableName, mobTableDir, column.getName(), Collections.singletonList(new HStoreFile(fs, new Path(mobFamilyDir, filePath.getName()), conf, compactionCacheConfig, BloomType.NONE, true))); } } } /** * Compacts the del files in batches which avoids opening too many files. * @param request The compaction request. * @param delFilePaths Del file paths to compact * @return The paths of new del files after merging or the original files if no merging * is necessary. * @throws IOException if IO failure is encountered */ protected List compactDelFiles(PartitionedMobCompactionRequest request, List delFilePaths) throws IOException { if (delFilePaths.size() <= delFileMaxCount) { return delFilePaths; } // when there are more del files than the number that is allowed, merge it firstly. int offset = 0; List paths = new ArrayList<>(); while (offset < delFilePaths.size()) { // get the batch int batch = compactionBatchSize; if (delFilePaths.size() - offset < compactionBatchSize) { batch = delFilePaths.size() - offset; } List batchedDelFiles = new ArrayList<>(); if (batch == 1) { // only one file left, do not compact it, directly add it to the new files. paths.add(delFilePaths.get(offset)); offset++; continue; } for (int i = offset; i < batch + offset; i++) { batchedDelFiles.add(new HStoreFile(fs, delFilePaths.get(i), conf, compactionCacheConfig, BloomType.NONE, true)); } // compact the del files in a batch. paths.add(compactDelFilesInBatch(request, batchedDelFiles)); // move to the next batch. offset += batch; } return compactDelFiles(request, paths); } /** * Compacts the del file in a batch. * @param request The compaction request. * @param delFiles The del files. * @return The path of new del file after merging. * @throws IOException if IO failure is encountered */ private Path compactDelFilesInBatch(PartitionedMobCompactionRequest request, List delFiles) throws IOException { // create a scanner for the del files. StoreScanner scanner = createScanner(delFiles, ScanType.COMPACT_RETAIN_DELETES); StoreFileWriter writer = null; Path filePath = null; try { writer = MobUtils.createDelFileWriter(conf, fs, column, MobUtils.formatDate(new Date(request.selectionTime)), tempPath, Long.MAX_VALUE, column.getCompactionCompressionType(), HConstants.EMPTY_START_ROW, compactionCacheConfig, cryptoContext); filePath = writer.getPath(); List cells = new ArrayList<>(); boolean hasMore; ScannerContext scannerContext = ScannerContext.newBuilder().setBatchLimit(compactionKVMax).build(); do { hasMore = scanner.next(cells, scannerContext); for (Cell cell : cells) { writer.append(cell); } cells.clear(); } while (hasMore); } finally { scanner.close(); if (writer != null) { try { writer.close(); } catch (IOException e) { LOG.error("Failed to close the writer of the file " + filePath, e); } } } // commit the new del file Path path = MobUtils.commitFile(conf, fs, filePath, mobFamilyDir, compactionCacheConfig); // archive the old del files try { MobUtils.removeMobFiles(conf, fs, tableName, mobTableDir, column.getName(), delFiles); } catch (IOException e) { LOG.error("Failed to archive the old del files " + delFiles, e); } return path; } /** * Creates a store scanner. * @param filesToCompact The files to be compacted. * @param scanType The scan type. * @return The store scanner. * @throws IOException if IO failure is encountered */ private StoreScanner createScanner(List filesToCompact, ScanType scanType) throws IOException { List scanners = StoreFileScanner.getScannersForStoreFiles(filesToCompact, false, true, false, false, HConstants.LATEST_TIMESTAMP); long ttl = HStore.determineTTLFromFamily(column); ScanInfo scanInfo = new ScanInfo(conf, column, ttl, 0, CellComparator.getInstance()); return new StoreScanner(scanInfo, scanType, scanners); } /** * Bulkloads the current file. * * @param connection to use to get admin/RegionLocator * @param table The current table. * @param bulkloadDirectory The path of bulkload directory. * @param fileName The current file name. * @throws IOException if IO failure is encountered */ private void bulkloadRefFile(Connection connection, Table table, Path bulkloadDirectory, String fileName) throws IOException { // bulkload the ref file try { LoadIncrementalHFiles bulkload = new LoadIncrementalHFiles(conf); bulkload.disableReplication(); bulkload.doBulkLoad(bulkloadDirectory, connection.getAdmin(), table, connection.getRegionLocator(table.getName())); } catch (Exception e) { throw new IOException(e); } } /** * Closes the mob file writer. * @param writer The mob file writer. * @param maxSeqId Maximum sequence id. * @param mobCellsCount The number of mob cells. * @throws IOException if IO failure is encountered */ private void closeMobFileWriter(StoreFileWriter writer, long maxSeqId, long mobCellsCount) throws IOException { if (writer != null) { writer.appendMetadata(maxSeqId, false, mobCellsCount); try { writer.close(); } catch (IOException e) { LOG.error("Failed to close the writer of the file " + writer.getPath(), e); } } } /** * Closes the ref file writer. * @param writer The ref file writer. * @param maxSeqId Maximum sequence id. * @param bulkloadTime The timestamp at which the bulk load file is created. * @throws IOException if IO failure is encountered */ private void closeRefFileWriter(StoreFileWriter writer, long maxSeqId, long bulkloadTime) throws IOException { if (writer != null) { writer.appendMetadata(maxSeqId, false); writer.appendFileInfo(BULKLOAD_TIME_KEY, Bytes.toBytes(bulkloadTime)); writer.appendFileInfo(SKIP_RESET_SEQ_ID, Bytes.toBytes(true)); try { writer.close(); } catch (IOException e) { LOG.error("Failed to close the writer of the ref file " + writer.getPath(), e); } } } /** * Gets the max seqId and number of cells of the store files. * @param storeFiles The store files. * @return The pair of the max seqId and number of cells of the store files. * @throws IOException if IO failure is encountered */ private Pair getFileInfo(List storeFiles) throws IOException { long maxSeqId = 0; long maxKeyCount = 0; for (HStoreFile sf : storeFiles) { // the readers will be closed later after the merge. maxSeqId = Math.max(maxSeqId, sf.getMaxSequenceId()); sf.initReader(); byte[] count = sf.getReader().loadFileInfo().get(MOB_CELLS_COUNT); if (count != null) { maxKeyCount += Bytes.toLong(count); } } return new Pair<>(maxSeqId, maxKeyCount); } /** * Deletes a file. * @param path The path of the file to be deleted. */ private void deletePath(Path path) { LOG.debug("Cleanup, delete path '{}'", path); try { if (path != null) { fs.delete(path, true); } } catch (IOException e) { LOG.error("Failed to delete the file " + path, e); } } private FileStatus getLinkedFileStatus(HFileLink link) throws IOException { Path[] locations = link.getLocations(); FileStatus file; for (Path location : locations) { if (location != null) { try { file = fs.getFileStatus(location); if (file != null) { return file; } } catch (FileNotFoundException e) { } } } LOG.warn("The file " + link + " links to can not be found"); return null; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy