All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sap.hana.datalake.files.HdlfsBaseFileSystem Maven / Gradle / Ivy

Go to download

An implementation of org.apache.hadoop.fs.FileSystem targeting SAP HANA Data Lake Files.

There is a newer version: 3.0.27
Show newest version
// © 2021-2024 SAP SE or an SAP affiliate company. All rights reserved.
package com.sap.hana.datalake.files;

import com.sap.hana.datalake.files.shaded.com.fasterxml.jackson.databind.JsonNode;
import com.sap.hana.datalake.files.shaded.com.fasterxml.jackson.databind.ObjectMapper;
import com.sap.hana.datalake.files.classification.InterfaceAudience;
import com.sap.hana.datalake.files.enumeration.CreateOperationMode;
import com.sap.hana.datalake.files.enumeration.MkdirsOperationMode;
import com.sap.hana.datalake.files.utils.HdlfsRetryUtils;
import com.sap.hana.datalake.files.utils.LRUCache;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileAlreadyExistsException;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.permission.FsPermission;
import com.sap.hana.datalake.files.shaded.org.apache.hadoop.hdfs.web.WebHdfsFileSystem;
import org.apache.hadoop.io.retry.RetryPolicy;
import org.apache.hadoop.security.Credentials;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.Progressable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

@InterfaceAudience.Private
public class HdlfsBaseFileSystem extends FileSystem implements HdlfsFileSystemCapabilities {

  public static final String SCHEME = HdlfsFileSystem.SCHEME;

  private static final Logger LOG = LoggerFactory.getLogger(HdlfsBaseFileSystem.class);
  private static LRUCache LOCAL_MKDIR_CACHE;
  private static LRUCache LOCAL_PENDING_FILES_CREATED_CACHE;
  private static final FileStatus[] NO_FILES = {};

  private final ObjectMapper mapper = new ObjectMapper();
  private final WebHdfsFileSystem delegateFS;

  protected List consistentWritePathPatterns;

  private boolean fsUriIsFileContainer;
  private CreateOperationMode createOperationMode;
  private boolean fsUriRequiresPort;
  private boolean hdlfsOutputCommitterEnabled;
  private MkdirsOperationMode mkdirsOperationMode;
  private HdlfsMultipartUpload.Config multipartUploadConfig;
  private RetryPolicy retryPolicy;
  private URI uri;
  private boolean fsCacheEnabled;
  private int listStatusRetryMax;
  private int listStatusRetryTimeoutMs;
  private RetryPolicy openFileNotFoundRetryPolicy;
  private Pattern openCheckExistsPattern;

  public HdlfsBaseFileSystem(final WebHdfsFileSystem webHdfsFileSystem) {
    this.delegateFS = webHdfsFileSystem;
  }

  @Override
  public void initialize(final URI fsUri, final Configuration conf) throws IOException {
    super.initialize(fsUri, conf);
    this.setConf(conf);

    // check if HdlfsOutputCommitter is enabled
    final String committerFactory = conf.get(HdlfsConstants.MAPREDUCE_OUTPUTCOMMITTER_FACTORY_SCHEME_HDLFS_KEY);
    this.hdlfsOutputCommitterEnabled = (committerFactory != null && committerFactory.equals(HdlfsCommitterFactory.class.getName()));

    this.uri = fsUri;
    this.fsUriRequiresPort = fsUri.getPort() > 0;

    final String endpoint = conf.get(HdlfsConstants.FS_HDLFS_ENDPOINT_KEY);
    final String[] parsedHost = HdlfsFileSystem.parseFsUriAuthority(fsUri);
    // The endpoint parameter will be used as the host and the FsUri authority as the FileContainer
    this.fsUriIsFileContainer = endpoint != null && !endpoint.isEmpty() && !HdlfsFileSystem.isParsedHostValid(parsedHost);

    // All relevant fs.hdlfs configurations

    this.retryPolicy = HdlfsRetryPolicies.createDefaultRetryPolicy(conf);

    LOG.info("Retry policy: [{}]", this.retryPolicy);

    this.setupMultipartUpload(conf);

    this.createOperationMode = conf.getEnum(HdlfsConstants.FS_HDLFS_OPERATION_CREATE_MODE_KEY, CreateOperationMode.DEFAULT);
    this.mkdirsOperationMode = conf.getEnum(HdlfsConstants.FS_HDLFS_OPERATION_MKDIRS_MODE_KEY, MkdirsOperationMode.DEFAULT);

    final String openCheckExistsPatternConf = conf.get(HdlfsConstants.FS_HDLFS_OPERATION_OPEN_CHECK_EXISTS_REGEX, HdlfsConstants.FS_HDLFS_OPERATION_OPEN_CHECK_EXISTS_REGEX_DEFAULT);
    this.openCheckExistsPattern = Pattern.compile(openCheckExistsPatternConf);

    if (LOCAL_MKDIR_CACHE == null) {
      final int localMkdirCacheSize = conf.getInt(HdlfsConstants.FS_LOCAL_CACHE_MKDIR_ENTRIES_MAX_SIZE, HdlfsConstants.FS_LOCAL_CACHE_MKDIR_ENTRIES_MAX_SIZE_DEFAULT);
      LOCAL_MKDIR_CACHE = new LRUCache<>(localMkdirCacheSize);
    }

    if (LOCAL_PENDING_FILES_CREATED_CACHE == null) {
      final int localPendingFilesCreatedCacheSize = conf.getInt(HdlfsConstants.FS_LOCAL_CACHE_PENDING_FILES_CREATED_ENTRIES_MAX_SIZE, HdlfsConstants.FS_LOCAL_CACHE_PENDING_FILES_CREATED_ENTRIES_MAX_SIZE_DEFAULT);
      LOCAL_PENDING_FILES_CREATED_CACHE = new LRUCache<>(localPendingFilesCreatedCacheSize);
    }

    this.fsCacheEnabled = conf.getBoolean(HdlfsConstants.FSCACHE_ENABLED, HdlfsConstants.FSCACHE_ENABLED_DEFAULT);
    this.listStatusRetryMax = conf.getInt(HdlfsConstants.FSCACHE_LISTSTATUS_RETRIES_KEY, HdlfsConstants.FSCACHE_MAX_LISTSTATUS_RETRIES_DEFAULT);
    this.listStatusRetryTimeoutMs = conf.getInt(HdlfsConstants.FSCACHE_LISTSTATUS_RETRY_TIMEOUT_KEY, HdlfsConstants.FSCACHE_LISTSTATUS_RETRY_TIMEOUT_MS_DEFAULT);

    this.openFileNotFoundRetryPolicy = HdlfsRetryPolicies.createOpenFileNotFoundRetryPolicy(conf);

    this.consistentWritePathPatterns = conf.getStringCollection(HdlfsConstants.FS_HDLFS_CONSISTENT_WRITE_PATH_PATTERNS)
        .stream()
        .map(Pattern::compile)
        .collect(Collectors.toCollection(() -> Collections.synchronizedList(new ArrayList<>())));

    if (!this.consistentWritePathPatterns.isEmpty() && !this.fsCacheEnabled) {
      throw new IllegalStateException("Consistent write path patterns should not be passed when FsCache is disabled.");
    }
  }

  @Override
  public String getScheme() {
    return SCHEME;
  }

  @Override
  public URI getUri() {
    return this.uri;
  }

  @Override
  public FSDataInputStream open(final Path path, final int bufferSize) throws IOException {
    LOG.debug("Performing OPEN on: {}", path);

    final Path delegateFsPath = this.rewritePathToDelegateFs(path);

    try {
      this.assertObjectExistsIfNeeded(delegateFsPath);

      return this.delegateFS.open(delegateFsPath, bufferSize);
    } catch (final FileNotFoundException ex) {
      // if no FsCache simply throw the exception
      if (!this.fsCacheEnabled) {
        throw ex;
      }

      LOG.info("OPEN operation on path {} threw FileNotFoundException. Checking FsCache to see if we need to retry the operation", path);
      final Path relativePath = this.getRelativePathFromSchemaPath(path);

      // If the path exists in the FsCache, then retry the OPEN operation (to avoid running into eventual consistency issues)
      if (this.fileExistsInFsCache(relativePath)) {
        final String operationName = String.format("Performing OPEN with retry on: %s", path);

        return HdlfsRetryUtils.execWithRetry(operationName, this.openFileNotFoundRetryPolicy, /* idempotent */ true, () -> this.delegateFS.open(delegateFsPath, bufferSize));
      } else {
        throw ex;
      }
    }
  }

  protected void setupMultipartUpload(final Configuration conf) {
    this.multipartUploadConfig = new HdlfsMultipartUpload.Config(conf);

    if (this.multipartUploadConfig.isEnabled()) {
      HdlfsMultipartUpload.initializeGlobalThreadPool(this.multipartUploadConfig, conf);
    }

    // If HdlfsCommitter is active, assert MultipartUpload configuration is enabled, if the user has manually disabled it then throw an error
    // Note that, if Direct Access is enabled this behavior can be overridden in overloads of this class, since Multipart Upload might not be necessary, and it might be disabled
    if (this.hdlfsOutputCommitterEnabled && !this.multipartUploadConfig.isEnabled()) {
      throw new IllegalStateException("HdlfsOutputCommitter can only be enabled if Multipart Upload is also enabled.");
    }
  }

  protected void assertObjectExistsIfNeeded(final Path path) throws IOException {
    final boolean shouldCheckPathExists = this.openCheckExistsPattern.matcher(path.toString()).matches();

    if (shouldCheckPathExists) {
      // delegateFS.open() does not throw a FileNotFoundException and Delta Lake relies on this behavior
      // Backlog BDSGOLD-2748 was created to review this
      this.delegateFS.getFileStatus(path);
    }
  }

  private HdlfsOutputStream createHdlfsOutputStream(final Path path, final FsPermission permission, final boolean overwrite,
                                                    final short replication, final Progressable progress) throws IOException {
    final boolean keepPendingAfterCloseEnabled = this.hdlfsOutputCommitterEnabled && this.isPendingPath(path)
            && !this.isPendingFile(path) && !this.isPendingSetFile(path);

    return new HdlfsOutputStream.Builder()
            .withFileSystem(this)
            .withMultipartUploadConfig(this.multipartUploadConfig)
            .withOverwrite(overwrite)
            .withPermission(permission)
            .withProgress(progress)
            .withReplication(replication)
            .withRetryPolicy(this.retryPolicy)
            .withTargetPath(path)
            .withKeepPendingAfterClose(keepPendingAfterCloseEnabled)
            .build();
  }

  private boolean isPendingFile(final Path path) {
    return path.getName().endsWith(HdlfsConstants.PENDING_SUFFIX);
  }

  private boolean isPendingSetFile(final Path path) {
    return path.getName().endsWith(HdlfsConstants.PENDINGSET_SUFFIX);
  }

  private boolean isPendingPath(final Path path) {
    return path.toString().contains(HdlfsConstants.PENDING_PREFIX_NAME);
  }

  @Override
  public FSDataOutputStream create(final Path path, final FsPermission fsPermission, final boolean overwrite, final int bufferSize,
                                   final short replication, final long blockSize, final Progressable progress) throws IOException {
    final Path delegateFsPath;

    this.checkCreateOperation(path, overwrite);
    delegateFsPath = this.rewritePathToDelegateFs(path);

    final Path relativePath = this.getRelativePathFromSchemaPath(path);

    if (this.fsCacheEnabled && this.pathNeedsConsistentWrite(relativePath)) {
      this.savePathToFsCache(relativePath, false);
    }

    final boolean shouldSkipMpu = this.isPendingPath(path) && this.isPendingSetFile(path);

    if (!shouldSkipMpu && this.multipartUploadConfig.isEnabled()) {
      return new FSDataOutputStream(this.createHdlfsOutputStream(delegateFsPath, fsPermission, true, replication, progress), /* stats */ null);
    } else {
      return this.delegateFS.create(delegateFsPath, fsPermission, true, bufferSize, replication, blockSize, progress);
    }
  }

  @Override
  public FSDataOutputStream append(final Path path, final int bufferSize, final Progressable progress) throws IOException {
    LOG.debug("Performing APPEND on: {}", path.toString());
    final Path delegateFsPath = this.rewritePathToDelegateFs(path);
    return this.delegateFS.append(delegateFsPath, bufferSize, progress);
  }

  @Override
  public boolean rename(final Path pathFrom, final Path pathTo) throws IOException {
    final Path delegateFsPathFrom;
    final Path delegateFsPathTo;

    delegateFsPathFrom = this.rewritePathToDelegateFs(pathFrom);
    delegateFsPathTo = this.rewritePathToDelegateFs(pathTo);

    if (this.fsCacheEnabled) {
      final Path relativePathFrom = this.getRelativePathFromSchemaPath(pathFrom);
      final Path relativePathTo = this.getRelativePathFromSchemaPath(pathTo);

      if (this.pathNeedsConsistentWrite(relativePathFrom)) {
        this.savePathToFsCache(relativePathFrom, true);
      }

      if (this.pathNeedsConsistentWrite(relativePathTo)) {
        this.savePathToFsCache(relativePathTo, false);
      }
    }

    return this.delegateFS.rename(delegateFsPathFrom, delegateFsPathTo);
  }

  @Override
  public boolean delete(final Path path, final boolean recursive) throws IOException {
    final Path delegateFsPath;

    try {
      delegateFsPath = this.rewritePathToDelegateFs(path);

      final Path relativePath = this.getRelativePathFromSchemaPath(path);

      if (this.fsCacheEnabled && this.pathNeedsConsistentWrite(relativePath)) {
        this.savePathToFsCache(relativePath, true);
      }

      return this.delegateFS.delete(delegateFsPath, recursive);
    } finally {
      final Path qualifiedPath = this.makeQualified(path);
      LOCAL_MKDIR_CACHE.remove(qualifiedPath.toString());

      if (recursive) {
        LOCAL_MKDIR_CACHE.removeAll(key -> key.startsWith(qualifiedPath + "/"));
      }
    }
  }

  @Override
  public FileStatus[] listStatus(final Path path) throws IOException {
    LOG.debug("Performing LISTSTATUS on: {}", path.toString());

    // If FsCache is enabled, we assume that path is always a "directory"
    if (this.fsCacheEnabled) {
      final Path relativePath = this.getRelativePathFromSchemaPath(path);
      final FileStatus[] fileStatuses = this.consistentListStatus(relativePath);

      if (fileStatuses == null) {
        final String message = String.format("Path %s was not found", path);
        throw new FileNotFoundException(message);
      }

      return fileStatuses;
    }

    return this.regularListStatus(path);
  }

  @Override
  protected DirectoryEntries listStatusBatch(final Path path, final byte[] token) throws IOException {
    LOG.debug("Performing LISTSTATUS_BATCH on: {}", path.toString());

    try {
      final Path delegateFsPath = this.rewritePathToDelegateFs(path);
      final DirectoryEntries directoryEntries = this.delegateFS.listStatusBatch(delegateFsPath, token);
      final FileStatus[] fileStatuses = directoryEntries.getEntries();
      int idx = 0;

      for (final FileStatus fileStatus : fileStatuses) {
        fileStatuses[idx++] = HdlfsFileStatus.create(fileStatus, this.fsUriRequiresPort, this.uri);
      }

      return directoryEntries;
    } catch (final FileNotFoundException ex) {
      final Path qualifiedPath = this.makeQualified(path);

      if (LOCAL_MKDIR_CACHE.has(qualifiedPath.toString())) {
        return new DirectoryEntries(NO_FILES, null, false);
      } else {
        throw ex;
      }
    }
  }

  @Override
  public DirectoryEntries listStatusRecursive(final Path path, final byte[] token) throws IOException {
    LOG.debug("Performing LISTSTATUS_RECURSIVE on: {}", path.toString());

    try {
      final Path delegateFsPath = this.rewritePathToDelegateFs(path);
      final DirectoryEntries directoryEntries = ((HdlfsFileSystemCapabilities) this.delegateFS).listStatusRecursive(delegateFsPath, token);
      final FileStatus[] fileStatuses = directoryEntries.getEntries();
      int idx = 0;

      for (final FileStatus fileStatus : fileStatuses) {
        fileStatuses[idx++] = HdlfsFileStatus.create(fileStatus, this.fsUriRequiresPort, this.uri);
      }

      return directoryEntries;
    } catch (final FileNotFoundException ex) {
      final Path qualifiedPath = this.makeQualified(path);

      if (LOCAL_MKDIR_CACHE.has(qualifiedPath.toString())) {
        return new DirectoryEntries(NO_FILES, null, false);
      } else {
        throw ex;
      }
    }
  }

  @Override
  public void setWorkingDirectory(final Path path) {
    final Path delegateFsPath = this.rewritePathToDelegateFs(path);
    this.delegateFS.setWorkingDirectory(delegateFsPath);
  }

  @Override
  public Path getWorkingDirectory() {
    final Path workingDirectory = this.delegateFS.getWorkingDirectory();
    return this.rewritePathFromDelegateFs(workingDirectory);
  }

  @Override
  public boolean mkdirs(final Path path, final FsPermission fp) throws IOException {
    if (this.mkdirsOperationMode == MkdirsOperationMode.COMPATIBLE) {
      LOG.debug("Performing MKDIRS on: {}", path.toString());
      final Path delegateFsPath = this.rewritePathToDelegateFs(path);

      return this.delegateFS.mkdirs(delegateFsPath, fp);
    } else if (this.mkdirsOperationMode == MkdirsOperationMode.DEFAULT) {
      LOG.debug("MKDIRS operation is not supported, doing nothing but caching the dir locally.");
      final Path qualifiedPath = this.makeQualified(path);
      final FileStatus fileStatus = new FileStatus(0, true, 0, 0, System.currentTimeMillis(), qualifiedPath);

      LOCAL_MKDIR_CACHE.put(qualifiedPath.toString(), fileStatus);

      return true;
    } else {
      final String errorMessage = String.format("Unrecognized MKDIRS operation mode: [%s]", this.mkdirsOperationMode);
      LOG.error(errorMessage);
      throw new IllegalStateException(errorMessage);
    }
  }

  @Override
  public FileStatus getFileStatus(final Path path) throws IOException {
    LOG.debug("Performing GETFILESTATUS on: {}", path.toString());

    final Path relativePath = this.getRelativePathFromSchemaPath(path);

    if (LOCAL_PENDING_FILES_CREATED_CACHE.has(relativePath)) {
      LOG.debug("Path {} found in pending files cache, skipping the GETFILESTATUS call", relativePath);
      throw new FileNotFoundException("File was created by the pending algorithm");
    }

    if (this.fsCacheEnabled) {
      final FileStatus fileStatus = this.consistentGetFileStatus(relativePath);

      if (fileStatus == null) {
        final String message = String.format("Path %s was not found", relativePath);
        throw new FileNotFoundException(message);
      }

      return fileStatus;
    }

    return this.regularGetFileStatus(path);
  }

  @Override
  public Token[] addDelegationTokens(final String renewer, final Credentials credentials) throws IOException {
    return this.delegateFS.addDelegationTokens(renewer, credentials);
  }

  @Override
  public DeleteBatchResult deleteBatch(final Collection files) throws IOException {
    return this.delegateFS.deleteBatch(files);
  }

  @Override
  public DeleteBatchResult deleteBatch(final Collection files, final boolean shouldWaitForResult) throws IOException {
    return this.delegateFS.deleteBatch(files, shouldWaitForResult);
  }

  @Override
  public DeleteBatchResult completeDeleteBatch(final String token, final long waitTimeSeconds) throws IOException {
    return this.delegateFS.completeDeleteBatch(token, waitTimeSeconds);
  }

  @Override
  public MergeResult merge(final Path path, final Collection sources) throws IOException {
    return this.delegateFS.merge(path, sources);
  }

  @Override
  public CopyResult copy(final Path path, final Path destination) throws IOException {
    return this.delegateFS.copy(path, destination);
  }

  public WebHdfsFileSystem getWebHdfsFileSystem() {
    return this.delegateFS;
  }

  FsCache getFsCache() {
    return this.delegateFS.getFsCache();
  }

  Catalog getCatalog() {
    return this.delegateFS.getCatalog();
  }

  RetryPolicy getRetryPolicy() {
    return this.retryPolicy;
  }

  void addPathToPendingFilesCreated(final Path path) {
    final Path relativePath = this.getRelativePathFromSchemaPath(path);
    LOCAL_PENDING_FILES_CREATED_CACHE.put(relativePath, true);
    LOG.debug("File with path {} was added to pending files cache", relativePath);
  }

  protected Path rewritePathToDelegateFs(final Path path) {
    final URI pathUri = path.toUri();

    if (!SCHEME.equals(pathUri.getScheme())) {
      return path;
    }

    final URI delegateFsUri = this.delegateFS.getUri();
    return new Path(delegateFsUri.getScheme(), delegateFsUri.getAuthority(), pathUri.getPath());
  }

  protected Path rewritePathFromDelegateFs(final Path path) {
    final URI pathUri = path.toUri();

    if (pathUri.getScheme() == null || SCHEME.equals(pathUri.getScheme())) {
      return path;
    }

    if (fsUriIsFileContainer) {
      return new Path(HdlfsBaseFileSystem.SCHEME, this.getUri().getAuthority(), pathUri.getPath());
    }

    final URI delegateFsUri = this.delegateFS.getUri();

    if (fsUriRequiresPort) {
      return new Path(HdlfsBaseFileSystem.SCHEME, delegateFsUri.getAuthority(), pathUri.getPath());
    } else {
      return new Path(HdlfsBaseFileSystem.SCHEME, delegateFsUri.getHost(), pathUri.getPath());
    }
  }

  protected void checkCreateOperation(final Path path, final boolean overwrite) throws IOException {
    if (this.createOperationMode == CreateOperationMode.OVERWRITE) {
      if (!overwrite) {
        LOG.debug("Operation CREATE(path={}); ignoring parameter [overwrite=false]", path);
      }

      return;
    }

    if (this.createOperationMode == CreateOperationMode.DEFAULT && overwrite) {
      return;
    }

    final boolean isDirectory;

    try {
      final Path delegateFsPath = this.rewritePathToDelegateFs(path);
      final FileStatus fileStatus = this.delegateFS.getFileStatus(delegateFsPath);
      isDirectory = fileStatus.isDirectory();
    } catch (final FileNotFoundException ex) {
      LOG.debug("Path not found: " + path, ex);
      return;
    }

    if (this.createOperationMode == CreateOperationMode.DEFAULT) {
      if (!isDirectory) {
        throw new FileAlreadyExistsException(path + " already exists");
      }

      return;
    }

    if (this.createOperationMode == CreateOperationMode.COMPATIBLE && (!overwrite || isDirectory)) {
      final String errorMessage = path + (isDirectory ? " is a directory" : " already exists");
      throw new FileAlreadyExistsException(errorMessage);
    }
  }

  private FileStatus[] regularListStatus(final Path path) throws IOException {
    try {

      final Path delegateFsPath = this.rewritePathToDelegateFs(path);
      final FileStatus[] listStatusResult = this.delegateFS.listStatus(delegateFsPath);

      for (int i = 0; i < listStatusResult.length; ++i) {
        listStatusResult[i] = HdlfsFileStatus.create(listStatusResult[i], this.fsUriRequiresPort, this.uri);
      }

      return listStatusResult;
    } catch (final FileNotFoundException ex) {
      final Path qualifiedPath = this.makeQualified(path);

      if (LOCAL_MKDIR_CACHE.has(qualifiedPath.toString())) {
        return NO_FILES;
      } else {
        throw ex;
      }
    }
  }

  private FileStatus regularGetFileStatus(final Path path) throws IOException {
    FileStatus fileStatus;

    final Path delegateFsPath = this.rewritePathToDelegateFs(path);

    try {
      fileStatus = this.delegateFS.getFileStatus(delegateFsPath);

    } catch (final FileNotFoundException ex) {
      final String pathStr = this.makeQualified(path).toString();

      if (LOCAL_MKDIR_CACHE.has(pathStr)) {
        fileStatus = LOCAL_MKDIR_CACHE.get(pathStr);
      } else {
        throw ex;
      }

    }

    return HdlfsFileStatus.create(fileStatus, this.fsUriRequiresPort, this.uri);
  }

  private boolean fileExistsInFsCache(final Path path) throws IOException {
    final FsCache fsCache = ((FsCacheProvider) this.delegateFS).getFsCache();
    final FsCache.FsCacheOperationResult cacheResult = fsCache.getEntry(path.getParent());
    final JsonNode fileQueryResult = cacheResult.getNode().findPath(path.getName());

    return !fileQueryResult.isMissingNode();
  }

  // Throws IllegalStateException when FileSystem and FsCache can not agree with each other
  // Returns null when path does not contain data and achieves consistency
  // Returns FileStatus when path contains data and achieves consistency
  protected FileStatus consistentGetFileStatus(final Path path) throws IOException {
    final FsCache fsCache = ((FsCacheProvider) this.delegateFS).getFsCache();

    for (int retryAttempt = 1; retryAttempt <= this.listStatusRetryMax; retryAttempt++) {
      try {
        FileStatus fileSystemResult = null;

        try {
          fileSystemResult = this.regularGetFileStatus(path);
        } catch (final FileNotFoundException ex) {
          LOG.debug("Path {} was not found when performing consistent GetFileStatus operation", path);
        }

        // Path might either be a directory, a file, or not exist
        final JsonNode directoryQueryResult = fsCache.getEntry(path).getNode();
        final JsonNode fileQueryResult = fsCache.getEntry(path.getParent()).getNode().findPath(path.getName());

        // no information about this path in the cache, the filesystem is the source of truth
        if (directoryQueryResult.isNull() && fileQueryResult.isMissingNode()) {
          return fileSystemResult;
        }

        // if there are entries in the cache under the path, and at least one of the entries is not marked as deleted
        // it must be a directory so the filesystem must contain the correct status for it
        if (!directoryQueryResult.isNull() && !areAllEntriesDeleted(directoryQueryResult)) {
          if (fileSystemResult == null) {
            throw new IllegalStateException("Inconsistency detected between FsCache and FileSystem");
          }
        }

        // if there is an entry for this path in the parent directory in the cache, the filesystem must contain the correct status for it
        if (!fileQueryResult.isMissingNode()) {
          final boolean isDeletedInCache = isEntryDeleted(fileQueryResult);
          final boolean existsInFileSystem = fileSystemResult != null;

          if (existsInFileSystem == isDeletedInCache) {
            throw new IllegalStateException("Inconsistency detected between FsCache and FileSystem");
          }
        }

        // no inconsistencies detected, so we can trust the filesystem
        return fileSystemResult;
      } catch (final IllegalStateException ex) {
        sleepAndRetry(retryAttempt, this.listStatusRetryTimeoutMs, this.listStatusRetryMax);
      }
    }

    throw new IllegalStateException("Inconsistency detected between FsCache and FileSystem");
  }

  // Throws IllegalStateException when FileSystem and FsCache can not agree with each other
  // Returns null when path does not contain data and achieves consistency
  // Returns FileStatus[] when path contains data and achieves consistency
  protected FileStatus[] consistentListStatus(final Path path) throws IOException {
    final FsCache fsCache = ((FsCacheProvider) this.delegateFS).getFsCache();

    for (int retryAttempt = 1; retryAttempt <= this.listStatusRetryMax; retryAttempt++) {
      FileStatus[] fileSystemResult = null;

      try {
        fileSystemResult = this.regularListStatus(path);
      } catch (final FileNotFoundException ex) {
        LOG.debug("Path {} was not found when performing consistent ListStatus operation", path);
      }

      final Set fsResultSet = fileSystemResult == null ? new HashSet<>() : Arrays.stream(fileSystemResult).map(fs -> fs.getPath().getName()).collect(Collectors.toSet());
      final JsonNode fsCacheDirectoryResult = fsCache.getEntry(path).getNode();

      if (fsCacheDirectoryResult.isNull()) {
        return fileSystemResult;
      }

      try {

        final Iterator> iterator = fsCacheDirectoryResult.fields();

        while (iterator.hasNext()) {
          final Map.Entry field = iterator.next();
          final JsonNode markAsDeletedField = field.getValue().get(HdlfsConstants.MARK_AS_DELETED_FIELD_NAME);
          final boolean markAsDeletedFieldValue;

          if (markAsDeletedField != null) {
            markAsDeletedFieldValue = markAsDeletedField.asBoolean();
          } else {
            markAsDeletedFieldValue = false;
          }

          final boolean resultHasField = fsResultSet.contains(field.getKey());

          if (markAsDeletedFieldValue == resultHasField) {
            throw new IllegalStateException("Inconsistency detected between FsCache and FileSystem");
          }
        }

        return fileSystemResult;
      } catch (final IllegalStateException ex) {
        sleepAndRetry(retryAttempt, this.listStatusRetryTimeoutMs, this.listStatusRetryMax);
      }
    }

    throw new IllegalStateException("Inconsistency detected between FsCache and FileSystem");
  }

  private void sleepAndRetry(final int retry, final int retryTimeoutMs, final int maxRetry) throws IllegalStateException {
    LOG.warn("Retry attempt: {}/{} - Inconsistency detected between FsCache and FileSystem.", retry, maxRetry);

    try {
      TimeUnit.MILLISECONDS.sleep(retryTimeoutMs);
    } catch (final InterruptedException e) {
      Thread.currentThread().interrupt();
    }
  }

  private boolean isEntryDeleted(final JsonNode node) {
    final JsonNode deletedField = node.get(HdlfsConstants.MARK_AS_DELETED_FIELD_NAME);
    return deletedField != null && deletedField.asBoolean();
  }

  private boolean areAllEntriesDeleted(final JsonNode node) {
    final Iterator it = node.elements();

    while(it.hasNext()) {
      final JsonNode currentNode = it.next();

      if (!this.isEntryDeleted(currentNode)) {
        return false;
      }
    }

    return true;
  }

  private Path getRelativePathFromSchemaPath(final Path outputPath) {
    return new Path(outputPath.toUri().getPath());
  }

  void addConsistentWritePathPatterns(final Collection patterns) {
    this.consistentWritePathPatterns.addAll(patterns);
  }

  void removeConsistentWritePathPatterns(final Collection patterns) {
    patterns.forEach(this.consistentWritePathPatterns::remove);
  }

  private boolean pathNeedsConsistentWrite(final Path path) {
    final String effectivePath = path.toString();

    return this.consistentWritePathPatterns.stream()
        .map(pattern -> pattern.matcher(effectivePath))
        .anyMatch(Matcher::matches);
  }

  private void savePathToFsCache(final Path path, final boolean markAsDeleted) throws IOException {
    final FsCache fsCache = ((FsCacheProvider) this.delegateFS).getFsCache();
    final JsonNode patch = this.createPatch(path.getName(), markAsDeleted);
    final FsCache.FsCacheOperationResult result = fsCache.applyPatch(path.getParent(), patch);

    LOG.debug("Added file to FsCache due to matching pattern, got result = [{}] from patch operation", result.getNode().toString());
  }

  private JsonNode createPatch(final String filename, final boolean markAsDeleted) {
    final Map entry = new HashMap<>();
    entry.put(HdlfsConstants.MARK_AS_DELETED_FIELD_NAME, markAsDeleted);
    entry.put(HdlfsConstants.IS_PREFIX_FIELD_NAME, false);

    final Map patchOp = new HashMap<>();
    patchOp.put(HdlfsConstants.JSON_PATCH_OP_KEY, HdlfsConstants.JSON_PATCH_ADD_OP);
    patchOp.put(HdlfsConstants.JSON_PATCH_PATH_KEY, String.format("/%s", filename));
    patchOp.put(HdlfsConstants.JSON_PATCH_VALUE_KEY, entry);

    final List> patch = Collections.singletonList(patchOp);

    return this.mapper.valueToTree(patch);
  }

}

// © 2021-2024 SAP SE or an SAP affiliate company. All rights reserved.




© 2015 - 2025 Weber Informatics LLC | Privacy Policy