All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.expediagroup.beekeeper.cleanup.aws.S3PathCleaner Maven / Gradle / Ivy

The newest version!
/**
 * Copyright (C) ${license.git.copyrightYears} Expedia, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.expediagroup.beekeeper.cleanup.aws;

import static java.lang.String.format;

import java.util.List;
import java.util.stream.Collectors;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.google.common.base.Strings;

import com.expediagroup.beekeeper.cleanup.monitoring.BytesDeletedReporter;
import com.expediagroup.beekeeper.cleanup.path.PathCleaner;
import com.expediagroup.beekeeper.cleanup.path.SentinelFilesCleaner;
import com.expediagroup.beekeeper.core.config.FileSystemType;
import com.expediagroup.beekeeper.core.error.BeekeeperException;
import com.expediagroup.beekeeper.core.model.HousekeepingEntity;
import com.expediagroup.beekeeper.core.monitoring.TimedTaggable;

public class S3PathCleaner implements PathCleaner {

  private static final Logger log = LoggerFactory.getLogger(S3PathCleaner.class);

  private final S3Client s3Client;
  private final SentinelFilesCleaner sentinelFilesCleaner;
  private final BytesDeletedReporter bytesDeletedReporter;

  public S3PathCleaner(S3Client s3Client, SentinelFilesCleaner sentinelFilesCleaner,
      BytesDeletedReporter bytesDeletedReporter) {
    this.s3Client = s3Client;
    this.sentinelFilesCleaner = sentinelFilesCleaner;
    this.bytesDeletedReporter = bytesDeletedReporter;
  }

  @Override
  @TimedTaggable("s3-paths-deleted")
  public void cleanupPath(HousekeepingEntity housekeepingEntity) {
    S3SchemeURI s3SchemeURI = new S3SchemeURI(housekeepingEntity.getPath());
    String key = s3SchemeURI.getKey();
    String bucket = s3SchemeURI.getBucket();
    S3BytesDeletedCalculator bytesDeletedCalculator = new S3BytesDeletedCalculator(s3Client);
    boolean isFile = s3Client.doesObjectExist(bucket, key);
    try {
      if (isFile) {
        deleteFile(bucket, key, bytesDeletedCalculator);
      } else {
        deleteFilesInDirectory(bucket, key, bytesDeletedCalculator);
        deleteSentinelFiles(s3SchemeURI, key, bucket, housekeepingEntity.getTableName());
      }
    } finally {
      long bytesDeleted = bytesDeletedCalculator.getBytesDeleted();
      if (bytesDeleted > 0) {
        bytesDeletedReporter.reportTaggable(bytesDeleted, housekeepingEntity, FileSystemType.S3);
      }
    }
  }

  private void deleteFile(String bucket, String key, S3BytesDeletedCalculator bytesDeletedCalculator) {
    bytesDeletedCalculator.storeFileSize(bucket, key);
    s3Client.deleteObject(bucket, key);
    bytesDeletedCalculator.calculateBytesDeleted(List.of(key));
  }

  private void deleteFilesInDirectory(String bucket, String key, S3BytesDeletedCalculator bytesDeletedCalculator) {
    if (!key.endsWith("/")) {
      key += "/";
    }
    List objectSummaries = s3Client.listObjects(bucket, key);
    bytesDeletedCalculator.storeFileSizes(objectSummaries);
    List keys = objectSummaries.stream()
        .map(S3ObjectSummary::getKey)
        .collect(Collectors.toList());
    List deletedKeys = s3Client.deleteObjects(bucket, keys);
    bytesDeletedCalculator.calculateBytesDeleted(deletedKeys);
    int totalFiles = keys.size();
    int successfulDeletes = deletedKeys.size();
    if (successfulDeletes != totalFiles) {
      keys.removeAll(deletedKeys);
      String failedDeletions = keys.stream()
          .map(k -> format("'%s'", k))
          .collect(Collectors.joining(", "));
      throw new BeekeeperException(
          format("Not all files could be deleted at path \"%s/%s\"; deleted %s/%s objects. Objects not deleted: %s.",
              bucket, key, successfulDeletes, totalFiles, failedDeletions));
    }
  }

  private void deleteSentinelFiles(S3SchemeURI s3SchemeURI, String key, String bucket, String tableName) {
    try {
      String path = s3SchemeURI.getPath();
      if (path.endsWith("/")) {
        path = path.substring(0, path.length() - 1);
      }
      sentinelFilesCleaner.deleteSentinelFiles(path);

      // attempt to delete parents if there is at least one parent
      if (key.contains("/")) {
        deleteParentSentinelFiles(bucket, key, path, tableName);
      }
    } catch (Exception e) {
      log.warn("Sentinel file(s) could not be deleted", e);
    }
  }

  private void deleteParentSentinelFiles(String bucket, String key, String absolutePath, String tableName) {
    String parentPath = absolutePath.substring(0, absolutePath.lastIndexOf("/"));
    String parentKey = key.substring(0, key.lastIndexOf("/"));

    if (pathHasValidTableName(parentPath, tableName) && s3Client.isEmpty(bucket, parentKey, key)) {
      sentinelFilesCleaner.deleteSentinelFiles(parentPath);

      if (parentKey.contains("/")) {
        deleteParentSentinelFiles(bucket, parentKey, parentPath, tableName);
      }
    }
  }

  // stop deleting if the path doesn't contain the table name or we got to the table directory
  private boolean pathHasValidTableName(String parent, String tableName) {
    String tableDirectory = "/" + tableName + "/";
    return !Strings.isNullOrEmpty(tableName) && parent.contains(tableDirectory) && !parent.endsWith("/" + tableName);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy