All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.uber.hoodie.common.table.view.RocksDbBasedFileSystemView Maven / Gradle / Ivy

/*
 *  Copyright (c) 2019 Uber Technologies, Inc. ([email protected])
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *           http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 */

package com.uber.hoodie.common.table.view;

import com.google.common.base.Preconditions;
import com.uber.hoodie.common.model.CompactionOperation;
import com.uber.hoodie.common.model.FileSlice;
import com.uber.hoodie.common.model.HoodieDataFile;
import com.uber.hoodie.common.model.HoodieFileGroup;
import com.uber.hoodie.common.model.HoodieFileGroupId;
import com.uber.hoodie.common.model.HoodieLogFile;
import com.uber.hoodie.common.table.HoodieTableMetaClient;
import com.uber.hoodie.common.table.HoodieTimeline;
import com.uber.hoodie.common.util.Option;
import com.uber.hoodie.common.util.RocksDBDAO;
import com.uber.hoodie.common.util.RocksDBSchemaHelper;
import com.uber.hoodie.common.util.collection.Pair;
import java.io.Serializable;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.Path;
import org.apache.log4j.LogManager;
import org.apache.log4j.Logger;

/**
 * A file-system view implementation on top of embedded Rocks DB store.
 * For each DataSet : 3 column Family is added for storing
 *  (1) File-Slices and Data Files for View lookups
 *  (2) Pending compaction operations
 *  (3) Partitions tracked
 *
 *  Fine-grained retrieval API to fetch latest file-slice and data-file which are common operations
 *  for ingestion/compaction are supported.
 *
 *  TODO: vb The current implementation works in embedded server mode where each restarts blows away the view stores.
 *  To support view-state preservation across restarts, Hoodie timeline also needs to be stored
 *  inorder to detect changes to timeline across restarts.
 */
public class RocksDbBasedFileSystemView extends IncrementalTimelineSyncFileSystemView {

  private static Logger log = LogManager.getLogger(RocksDbBasedFileSystemView.class);

  private final FileSystemViewStorageConfig config;

  private final RocksDBSchemaHelper schemaHelper;

  private RocksDBDAO rocksDB;

  private boolean closed = false;

  public RocksDbBasedFileSystemView(HoodieTableMetaClient metaClient,
      HoodieTimeline visibleActiveTimeline, FileSystemViewStorageConfig config) {
    super(config.isIncrementalTimelineSyncEnabled());
    this.config = config;
    this.schemaHelper = new RocksDBSchemaHelper(metaClient);
    this.rocksDB = new RocksDBDAO(metaClient.getBasePath(), config);
    init(metaClient, visibleActiveTimeline);
  }

  public RocksDbBasedFileSystemView(HoodieTableMetaClient metaClient,
      HoodieTimeline visibleActiveTimeline, FileStatus[] fileStatuses, FileSystemViewStorageConfig config) {
    this(metaClient, visibleActiveTimeline, config);
    addFilesToView(fileStatuses);
  }

  @Override
  protected void init(HoodieTableMetaClient metaClient, HoodieTimeline visibleActiveTimeline) {
    schemaHelper.getAllColumnFamilies().stream().forEach(rocksDB::addColumnFamily);
    super.init(metaClient, visibleActiveTimeline);
    log.info("Created ROCKSDB based file-system view at " + config.getRocksdbBasePath());
  }

  @Override
  protected boolean isPendingCompactionScheduledForFileId(HoodieFileGroupId fgId) {
    return getPendingCompactionOperationWithInstant(fgId).isPresent();
  }

  @Override
  protected void resetPendingCompactionOperations(Stream> operations) {
    rocksDB.writeBatch(batch -> {
      operations.forEach(opPair -> {
        rocksDB.putInBatch(batch, schemaHelper.getColFamilyForPendingCompaction(),
            schemaHelper.getKeyForPendingCompactionLookup(opPair.getValue().getFileGroupId()), opPair);
      });
      log.info("Initializing pending compaction operations. Count=" + batch.count());
    });
  }

  @Override
  protected void addPendingCompactionOperations(Stream> operations) {
    rocksDB.writeBatch(batch -> {
      operations.forEach(opInstantPair -> {
        Preconditions.checkArgument(!isPendingCompactionScheduledForFileId(opInstantPair.getValue().getFileGroupId()),
            "Duplicate FileGroupId found in pending compaction operations. FgId :"
                + opInstantPair.getValue().getFileGroupId());
        rocksDB.putInBatch(batch, schemaHelper.getColFamilyForPendingCompaction(),
            schemaHelper.getKeyForPendingCompactionLookup(opInstantPair.getValue().getFileGroupId()), opInstantPair);
      });
    });
  }

  @Override
  void removePendingCompactionOperations(Stream> operations) {
    rocksDB.writeBatch(batch -> {
      operations.forEach(opInstantPair -> {
        Preconditions.checkArgument(
            getPendingCompactionOperationWithInstant(opInstantPair.getValue().getFileGroupId()) != null,
            "Trying to remove a FileGroupId which is not found in pending compaction operations. FgId :"
                + opInstantPair.getValue().getFileGroupId());
        rocksDB.deleteInBatch(batch, schemaHelper.getColFamilyForPendingCompaction(),
            schemaHelper.getKeyForPendingCompactionLookup(opInstantPair.getValue().getFileGroupId()));
      });
    });
  }

  @Override
  protected void resetViewState() {
    log.info("Deleting all rocksdb data associated with dataset filesystem view");
    rocksDB.close();
    rocksDB = new RocksDBDAO(metaClient.getBasePath(), config);
  }

  @Override
  protected Option> getPendingCompactionOperationWithInstant(HoodieFileGroupId fgId) {
    String lookupKey = schemaHelper.getKeyForPendingCompactionLookup(fgId);
    Pair instantOperationPair =
        rocksDB.get(schemaHelper.getColFamilyForPendingCompaction(), lookupKey);
    return Option.ofNullable(instantOperationPair);
  }

  @Override
  protected boolean isPartitionAvailableInStore(String partitionPath) {
    String lookupKey = schemaHelper.getKeyForPartitionLookup(partitionPath);
    Serializable obj = rocksDB.get(schemaHelper.getColFamilyForStoredPartitions(), lookupKey);
    return obj != null;
  }

  @Override
  protected void storePartitionView(String partitionPath, List fileGroups) {
    log.info("Resetting and adding new partition (" + partitionPath + ") to ROCKSDB based file-system view at "
        + config.getRocksdbBasePath() + ", Total file-groups=" + fileGroups.size());

    String lookupKey = schemaHelper.getKeyForPartitionLookup(partitionPath);
    rocksDB.delete(schemaHelper.getColFamilyForStoredPartitions(), lookupKey);

    // First delete partition views
    rocksDB.prefixDelete(schemaHelper.getColFamilyForView(),
        schemaHelper.getPrefixForSliceViewByPartition(partitionPath));
    rocksDB.prefixDelete(schemaHelper.getColFamilyForView(),
        schemaHelper.getPrefixForDataFileViewByPartition(partitionPath));

    // Now add them
    fileGroups.stream().forEach(fg -> {
      rocksDB.writeBatch(batch -> {
        fg.getAllFileSlicesIncludingInflight().forEach(fs -> {
          rocksDB.putInBatch(batch, schemaHelper.getColFamilyForView(), schemaHelper.getKeyForSliceView(fg, fs), fs);
          fs.getDataFile().ifPresent(df -> {
            rocksDB.putInBatch(batch, schemaHelper.getColFamilyForView(), schemaHelper.getKeyForDataFileView(fg, fs),
                df);
          });
        });
      });
    });

    // record that partition is loaded.
    rocksDB.put(schemaHelper.getColFamilyForStoredPartitions(), lookupKey, Boolean.TRUE);
    log.info("Finished adding new partition (" + partitionPath + ") to ROCKSDB based file-system view at "
        + config.getRocksdbBasePath() + ", Total file-groups=" + fileGroups.size());
  }

  @Override
  /**
   * This is overridden to incrementally apply file-slices to rocks DB
   */
  protected void applyDeltaFileSlicesToPartitionView(String partition, List deltaFileGroups,
      DeltaApplyMode mode) {
    rocksDB.writeBatch(batch -> {
      deltaFileGroups.stream().forEach(fg -> {
        fg.getAllRawFileSlices().map(fs -> {
          FileSlice oldSlice = getFileSlice(partition, fs.getFileId(), fs.getBaseInstantTime());
          if (null == oldSlice) {
            return fs;
          } else {
            // First remove the file-slice
            log.info("Removing old Slice in DB. FS=" + oldSlice);
            rocksDB.deleteInBatch(batch, schemaHelper.getColFamilyForView(),
                schemaHelper.getKeyForSliceView(fg, oldSlice));
            rocksDB.deleteInBatch(batch, schemaHelper.getColFamilyForView(),
                schemaHelper.getKeyForDataFileView(fg, oldSlice));

            Map logFiles = oldSlice.getLogFiles()
                .map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
                .collect(Collectors.toMap(Pair::getKey, Pair::getValue));
            Map deltaLogFiles = fs.getLogFiles()
                .map(lf -> Pair.of(Path.getPathWithoutSchemeAndAuthority(lf.getPath()).toString(), lf))
                .collect(Collectors.toMap(Pair::getKey, Pair::getValue));

            switch (mode) {
              case ADD: {
                FileSlice newFileSlice = new FileSlice(oldSlice.getFileGroupId(), oldSlice.getBaseInstantTime());
                oldSlice.getDataFile().ifPresent(df -> newFileSlice.setDataFile(df));
                fs.getDataFile().ifPresent(df -> newFileSlice.setDataFile(df));
                Map newLogFiles = new HashMap<>(logFiles);
                deltaLogFiles.entrySet().stream().filter(e -> !logFiles.containsKey(e.getKey()))
                    .forEach(p -> newLogFiles.put(p.getKey(), p.getValue()));
                newLogFiles.values().stream().forEach(lf -> newFileSlice.addLogFile(lf));
                log.info("Adding back new File Slice after add FS=" + newFileSlice);
                return newFileSlice;
              }
              case REMOVE: {
                log.info("Removing old File Slice =" + fs);
                FileSlice newFileSlice = new FileSlice(oldSlice.getFileGroupId(), oldSlice.getBaseInstantTime());
                fs.getDataFile().orElseGet(() -> {
                  oldSlice.getDataFile().ifPresent(df -> newFileSlice.setDataFile(df));
                  return null;
                });

                deltaLogFiles.keySet().stream().forEach(p -> logFiles.remove(p));
                //Add remaining log files back
                logFiles.values().stream().forEach(lf -> newFileSlice.addLogFile(lf));
                if (newFileSlice.getDataFile().isPresent() || (newFileSlice.getLogFiles().count() > 0)) {
                  log.info("Adding back new file-slice after remove FS=" + newFileSlice);
                  return newFileSlice;
                }
                return null;
              }
              default:
                throw new IllegalStateException("Unknown diff apply mode=" + mode);
            }
          }
        }).filter(Objects::nonNull).forEach(fs -> {
          rocksDB.putInBatch(batch, schemaHelper.getColFamilyForView(), schemaHelper.getKeyForSliceView(fg, fs), fs);
          fs.getDataFile().ifPresent(df -> {
            rocksDB.putInBatch(batch, schemaHelper.getColFamilyForView(), schemaHelper.getKeyForDataFileView(fg, fs),
                df);
          });
        });
      });
    });
  }

  @Override
  Stream> fetchPendingCompactionOperations() {
    return rocksDB.>prefixSearch(
        schemaHelper.getColFamilyForPendingCompaction(), "").map(Pair::getValue);
  }

  @Override
  Stream fetchAllDataFiles(String partitionPath) {
    return rocksDB.prefixSearch(schemaHelper.getColFamilyForView(),
        schemaHelper.getPrefixForDataFileViewByPartition(partitionPath))
        .map(Pair::getValue);
  }

  @Override
  Stream fetchAllStoredFileGroups(String partitionPath) {
    return getFileGroups(rocksDB.prefixSearch(schemaHelper.getColFamilyForView(),
        schemaHelper.getPrefixForSliceViewByPartition(partitionPath)).map(Pair::getValue));
  }

  @Override
  Stream fetchAllStoredFileGroups() {
    return getFileGroups(rocksDB.prefixSearch(schemaHelper.getColFamilyForView(),
        schemaHelper.getPrefixForSliceView()).map(Pair::getValue));
  }

  @Override
  protected Option fetchLatestFileSlice(String partitionPath, String fileId) {
    // Retries only file-slices of the file and filters for the latest
    return Option.ofNullable(rocksDB.prefixSearch(schemaHelper.getColFamilyForView(),
        schemaHelper.getPrefixForSliceViewByPartitionFile(partitionPath, fileId))
        .map(Pair::getValue)
        .reduce(null, (x, y) ->
            ((x == null) ? y : (y == null) ? null : HoodieTimeline.compareTimestamps(x.getBaseInstantTime(),
                y.getBaseInstantTime(), HoodieTimeline.GREATER) ? x : y)));
  }

  @Override
  protected Option fetchLatestDataFile(String partitionPath, String fileId) {
    // Retries only file-slices of the file and filters for the latest
    return Option.ofNullable(rocksDB.prefixSearch(schemaHelper.getColFamilyForView(),
        schemaHelper.getPrefixForDataFileViewByPartitionFile(partitionPath, fileId))
        .map(Pair::getValue)
        .reduce(null, (x, y) ->
            ((x == null) ? y : (y == null) ? null : HoodieTimeline.compareTimestamps(x.getCommitTime(),
                y.getCommitTime(), HoodieTimeline.GREATER) ? x : y)));
  }

  @Override
  Option fetchHoodieFileGroup(String partitionPath, String fileId)  {
    return Option.fromJavaOptional(
        getFileGroups(rocksDB.prefixSearch(schemaHelper.getColFamilyForView(),
            schemaHelper.getPrefixForSliceViewByPartitionFile(partitionPath, fileId))
            .map(Pair::getValue)).findFirst());
  }

  private Stream getFileGroups(Stream sliceStream) {
    return sliceStream.map(s -> Pair.of(Pair.of(s.getPartitionPath(), s.getFileId()), s))
        .collect(Collectors.groupingBy(Pair::getKey)).entrySet().stream().map(slicePair -> {
          HoodieFileGroup fg =
              new HoodieFileGroup(slicePair.getKey().getKey(), slicePair.getKey().getValue(), visibleActiveTimeline);
          slicePair.getValue().forEach(e -> fg.addFileSlice(e.getValue()));
          return fg;
        });
  }

  private FileSlice getFileSlice(String partitionPath, String fileId, String instantTime) {
    String key = schemaHelper.getKeyForSliceView(partitionPath, fileId, instantTime);
    return rocksDB.get(schemaHelper.getColFamilyForView(), key);
  }

  @Override
  public void close() {
    closed = true;
    rocksDB.close();
  }

  @Override
  boolean isClosed() {
    return closed;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy