All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yelp.nrtsearch.tools.nrt_utils.backup.CleanupSnapshotsCommand Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2022 Yelp Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.yelp.nrtsearch.tools.nrt_utils.backup;

import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.ListObjectsV2Request;
import com.amazonaws.services.s3.model.ListObjectsV2Result;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.google.common.annotations.VisibleForTesting;
import com.yelp.nrtsearch.server.remote.s3.S3Backend;
import com.yelp.nrtsearch.server.utils.TimeStringUtils;
import com.yelp.nrtsearch.tools.nrt_utils.state.StateCommandUtils;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.stream.Collectors;
import picocli.CommandLine;

@CommandLine.Command(
    name = CleanupSnapshotsCommand.CLEANUP_SNAPSHOTS,
    description = "Delete snapshots of index data.")
public class CleanupSnapshotsCommand implements Callable {
  public static final String CLEANUP_SNAPSHOTS = "cleanupSnapshots";
  private static final int DELETE_BATCH_SIZE = 1000;

  @CommandLine.Option(
      names = {"-s", "--serviceName"},
      description = "Name of nrtsearch cluster",
      required = true)
  private String serviceName;

  @CommandLine.Option(
      names = {"-i", "--indexName"},
      description = "Name of cluster index",
      required = true)
  private String indexName;

  @CommandLine.Option(
      names = {"--exactResourceName"},
      description = "If index resource name already has unique identifier")
  private boolean exactResourceName;

  @CommandLine.Option(
      names = {"-b", "--bucketName"},
      description = "Name of bucket containing state files",
      required = true)
  private String bucketName;

  @CommandLine.Option(
      names = {"--region"},
      description = "AWS region name, such as us-west-1, us-west-2, us-east-1")
  private String region;

  @CommandLine.Option(
      names = {"-c", "--credsFile"},
      description =
          "File holding AWS credentials; Will use DefaultCredentialProvider if this is unset.")
  private String credsFile;

  @CommandLine.Option(
      names = {"-p", "--credsProfile"},
      description = "Profile to use from creds file; Neglected when credsFile is unset.",
      defaultValue = "default")
  private String credsProfile;

  @CommandLine.Option(
      names = {"--snapshotRoot"},
      description = "Root s3 snapshot path, defaults to /snapshots/")
  private String snapshotRoot;

  @CommandLine.Option(
      names = {"-d", "--deleteAfter"},
      description =
          "Delete snapshot data that is older than this, in the form <#> "
              + "with valid units (s)econds, (m)inutes, (h)ours, (d)ays. (60m, 7h, 3d, etc.)",
      required = true)
  private String deleteAfter;

  @CommandLine.Option(
      names = {"--minSnapshots"},
      description =
          "Minimum number of latest snapshots to keep, regardless of age. default: ${DEFAULT-VALUE}",
      defaultValue = "1")
  private int minSnapshots;

  @CommandLine.Option(
      names = {"--dryRun"},
      description = "Print file deletions, instead of applying to S3")
  private boolean dryRun;

  @CommandLine.Option(
      names = {"--maxRetry"},
      description = "Maximum number of retry attempts for S3 failed requests",
      defaultValue = "20")
  private int maxRetry;

  private AmazonS3 s3Client;

  record TimestampAndTimeString(long timestampMs, String timeString) {}

  @VisibleForTesting
  void setS3Client(AmazonS3 s3Client) {
    this.s3Client = s3Client;
  }

  @Override
  public Integer call() throws Exception {
    if (s3Client == null) {
      s3Client =
          StateCommandUtils.createS3Client(bucketName, region, credsFile, credsProfile, maxRetry);
    }
    S3Backend s3Backend = new S3Backend(bucketName, false, s3Client);

    long deleteAfterMs = BackupCommandUtils.getTimeIntervalMs(deleteAfter);
    long minTimestampMs = System.currentTimeMillis() - deleteAfterMs;
    System.out.println("minTimestampMs: " + minTimestampMs);

    String resolvedIndexResource =
        StateCommandUtils.getResourceName(s3Backend, serviceName, indexName, exactResourceName);
    String resolvedSnapshotRoot = BackupCommandUtils.getSnapshotRoot(snapshotRoot, serviceName);
    List snapshotTimestamps =
        getSnapshotTimestamps(s3Client, resolvedIndexResource, resolvedSnapshotRoot);
    System.out.println("Found metadata: " + snapshotTimestamps);

    if (minSnapshots > 0 && !snapshotTimestamps.isEmpty()) {
      // timestamps are sorted in natural order
      int index = Math.max(0, snapshotTimestamps.size() - minSnapshots);
      if (snapshotTimestamps.get(index).timestampMs < minTimestampMs) {
        minTimestampMs = snapshotTimestamps.get(index).timestampMs;
      }
      System.out.println("Adjusted min timestamp: " + minTimestampMs);
    }

    final long finalMinTimestampMs = minTimestampMs;
    List deleteTimestamps =
        snapshotTimestamps.stream()
            .filter(l -> l.timestampMs < finalMinTimestampMs)
            .collect(Collectors.toList());

    deleteSnapshotMetadata(s3Client, resolvedIndexResource, resolvedSnapshotRoot, deleteTimestamps);
    deleteSnapshotIndexData(s3Client, resolvedIndexResource, resolvedSnapshotRoot, minTimestampMs);

    return 0;
  }

  private void deleteSnapshotIndexData(
      AmazonS3 s3Client,
      String resolvedIndexResource,
      String resolvedSnapshotRoot,
      long minTimestampMs) {
    String dataPrefix = getIndexSnapshotDataPrefix(resolvedSnapshotRoot, resolvedIndexResource);
    ListObjectsV2Request req =
        new ListObjectsV2Request()
            .withBucketName(bucketName)
            .withDelimiter("/")
            .withPrefix(dataPrefix);
    ListObjectsV2Result result = s3Client.listObjectsV2(req);
    List dataPrefixes = result.getCommonPrefixes();
    System.out.println("Data prefixes: " + dataPrefixes);

    List deletePrefixes = new ArrayList<>();
    for (String prefix : dataPrefixes) {
      String[] splits = prefix.split("/");
      String timeString = splits[splits.length - 1];
      if (TimeStringUtils.isTimeStringMs(timeString)) {
        long dataTimestamp = TimeStringUtils.parseTimeStringMs(timeString).toEpochMilli();
        if (dataTimestamp < minTimestampMs) {
          deletePrefixes.add(prefix);
        }
      } else {
        System.out.println("Skipping invalid timestamp: " + timeString);
      }
    }
    deletePrefixes(s3Client, deletePrefixes);
  }

  private void deletePrefixes(AmazonS3 s3Client, List prefixes) {
    List deleteList = new ArrayList<>(DELETE_BATCH_SIZE);
    for (String prefix : prefixes) {
      System.out.println("Deleting data prefix: " + prefix);
      ListObjectsV2Request req =
          new ListObjectsV2Request().withBucketName(bucketName).withPrefix(prefix);
      ListObjectsV2Result result;

      do {
        result = s3Client.listObjectsV2(req);

        for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
          System.out.println("Delete: " + objectSummary.getKey());
          deleteList.add(objectSummary.getKey());
          if (deleteList.size() == DELETE_BATCH_SIZE) {
            if (!dryRun) {
              BackupCommandUtils.deleteObjects(s3Client, bucketName, deleteList);
            }
            deleteList.clear();
          }
        }
        String token = result.getNextContinuationToken();
        req.setContinuationToken(token);
      } while (result.isTruncated());
    }
    if (!deleteList.isEmpty() && !dryRun) {
      BackupCommandUtils.deleteObjects(s3Client, bucketName, deleteList);
    }
  }

  private void deleteSnapshotMetadata(
      AmazonS3 s3Client,
      String resolvedIndexResource,
      String resolvedSnapshotRoot,
      List versions) {
    for (TimestampAndTimeString version : versions) {
      String key =
          BackupCommandUtils.getSnapshotIndexMetadataKey(
              resolvedSnapshotRoot, resolvedIndexResource, version.timeString);
      System.out.println("Deleting snapshot metadata: " + key);
      if (!dryRun) {
        s3Client.deleteObject(bucketName, key);
      }
    }
  }

  private List getSnapshotTimestamps(
      AmazonS3 s3Client, String resolvedIndexResource, String resolvedSnapshotRoot) {
    String metadataPrefix =
        BackupCommandUtils.getSnapshotIndexMetadataPrefix(
            resolvedSnapshotRoot, resolvedIndexResource);
    List snapshotTimestamps = new ArrayList<>();

    ListObjectsV2Request req =
        new ListObjectsV2Request().withBucketName(bucketName).withPrefix(metadataPrefix);
    ListObjectsV2Result result;

    do {
      result = s3Client.listObjectsV2(req);

      for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
        String snapshotSuffix = objectSummary.getKey().split(metadataPrefix)[1];
        if (TimeStringUtils.isTimeStringMs(snapshotSuffix)) {
          long timestampMs = TimeStringUtils.parseTimeStringMs(snapshotSuffix).toEpochMilli();
          snapshotTimestamps.add(new TimestampAndTimeString(timestampMs, snapshotSuffix));
        } else {
          System.out.println("Skipping invalid timestamp: " + snapshotSuffix);
        }
      }
      String token = result.getNextContinuationToken();
      req.setContinuationToken(token);
    } while (result.isTruncated());

    snapshotTimestamps.sort(Comparator.comparingLong(e -> e.timestampMs));
    return snapshotTimestamps;
  }

  private String getIndexSnapshotDataPrefix(
      String resolvedSnapshotRoot, String resolvedIndexResource) {
    return resolvedSnapshotRoot + resolvedIndexResource + "/";
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy