All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yelp.nrtsearch.tools.nrt_utils.legacy.incremental.DeleteIncrementalSnapshotsCommand Maven / Gradle / Ivy

There is a newer version: 1.0.0-beta.1
Show newest version
/*
 * Copyright 2022 Yelp Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.yelp.nrtsearch.tools.nrt_utils.legacy.incremental;

import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.model.ListObjectsV2Request;
import com.amazonaws.services.s3.model.ListObjectsV2Result;
import com.amazonaws.services.s3.model.S3ObjectSummary;
import com.google.common.annotations.VisibleForTesting;
import com.yelp.nrtsearch.tools.nrt_utils.legacy.LegacyVersionManager;
import com.yelp.nrtsearch.tools.nrt_utils.legacy.state.LegacyStateCommandUtils;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.Callable;
import java.util.stream.Collectors;
import picocli.CommandLine;

@CommandLine.Command(
    name = DeleteIncrementalSnapshotsCommand.DELETE_INCREMENTAL_SNAPSHOTS,
    description =
        "Delete snapshots of incremental index data. Legacy command for use with v0 cluster data.")
public class DeleteIncrementalSnapshotsCommand implements Callable {
  public static final String DELETE_INCREMENTAL_SNAPSHOTS = "deleteIncrementalSnapshots";
  private static final int DELETE_BATCH_SIZE = 1000;

  @CommandLine.Option(
      names = {"-s", "--serviceName"},
      description = "Name of nrtsearch cluster",
      required = true)
  private String serviceName;

  @CommandLine.Option(
      names = {"-i", "--indexName"},
      description = "Name of cluster index",
      required = true)
  private String indexName;

  @CommandLine.Option(
      names = {"--exactResourceName"},
      description = "If index resource name already has unique identifier")
  private boolean exactResourceName;

  @CommandLine.Option(
      names = {"-b", "--bucketName"},
      description = "Name of bucket containing state files",
      required = true)
  private String bucketName;

  @CommandLine.Option(
      names = {"--region"},
      description = "AWS region name, such as us-west-1, us-west-2, us-east-1")
  private String region;

  @CommandLine.Option(
      names = {"-c", "--credsFile"},
      description =
          "File holding AWS credentials; Will use DefaultCredentialProvider if this is unset.")
  private String credsFile;

  @CommandLine.Option(
      names = {"-p", "--credsProfile"},
      description = "Profile to use from creds file; Neglected when credsFile is unset.",
      defaultValue = "default")
  private String credsProfile;

  @CommandLine.Option(
      names = {"--snapshotRoot"},
      description = "Root s3 snapshot path, defaults to /snapshots/")
  private String snapshotRoot;

  @CommandLine.Option(
      names = {"-d", "--deleteAfter"},
      description =
          "Delete snapshot data that is older than this, in the form <#> "
              + "with valid units (s)econds, (m)inutes, (h)ours, (d)ays. (60m, 7h, 3d, etc.)",
      required = true)
  private String deleteAfter;

  @CommandLine.Option(
      names = {"--minSnapshots"},
      description =
          "Minimum number of latest snapshots to keep, regardless of age. default: ${DEFAULT-VALUE}",
      defaultValue = "1")
  private int minSnapshots;

  @CommandLine.Option(
      names = {"--dryRun"},
      description = "Print file deletions, instead of applying to S3")
  private boolean dryRun;

  @CommandLine.Option(
      names = {"--maxRetry"},
      description = "Maximum number of retry attempts for S3 failed requests",
      defaultValue = "20")
  private int maxRetry;

  private AmazonS3 s3Client;

  @VisibleForTesting
  void setS3Client(AmazonS3 s3Client) {
    this.s3Client = s3Client;
  }

  @Override
  public Integer call() throws Exception {
    if (s3Client == null) {
      s3Client =
          LegacyStateCommandUtils.createS3Client(
              bucketName, region, credsFile, credsProfile, maxRetry);
    }
    LegacyVersionManager versionManager = new LegacyVersionManager(s3Client, bucketName);

    long deleteAfterMs = IncrementalDataCleanupCommand.getTimeIntervalMs(deleteAfter);
    long minTimestampMs = System.currentTimeMillis() - deleteAfterMs;
    System.out.println("minTimestampMs: " + minTimestampMs);

    String resolvedIndexResource =
        LegacyStateCommandUtils.getResourceName(
            versionManager, serviceName, indexName, exactResourceName);
    String resolvedSnapshotRoot =
        IncrementalCommandUtils.getSnapshotRoot(snapshotRoot, serviceName);
    List snapshotTimestamps =
        getSnapshotTimestamps(s3Client, resolvedIndexResource, resolvedSnapshotRoot);
    System.out.println("Found metadata: " + snapshotTimestamps);

    if (minSnapshots > 0 && !snapshotTimestamps.isEmpty()) {
      // timestamps are sorted in natural order
      int index = Math.max(0, snapshotTimestamps.size() - minSnapshots);
      if (snapshotTimestamps.get(index) < minTimestampMs) {
        minTimestampMs = snapshotTimestamps.get(index);
      }
      System.out.println("Adjusted min timestamp: " + minTimestampMs);
    }

    final long finalMinTimestampMs = minTimestampMs;
    List deleteTimestamps =
        snapshotTimestamps.stream()
            .filter(l -> l < finalMinTimestampMs)
            .collect(Collectors.toList());

    deleteSnapshotMetadata(s3Client, resolvedIndexResource, resolvedSnapshotRoot, deleteTimestamps);
    deleteSnapshotIndexData(s3Client, resolvedIndexResource, resolvedSnapshotRoot, minTimestampMs);

    return 0;
  }

  private void deleteSnapshotIndexData(
      AmazonS3 s3Client,
      String resolvedIndexResource,
      String resolvedSnapshotRoot,
      long minTimestampMs) {
    String dataPrefix = getIndexSnapshotDataPrefix(resolvedSnapshotRoot, resolvedIndexResource);
    ListObjectsV2Request req =
        new ListObjectsV2Request()
            .withBucketName(bucketName)
            .withDelimiter("/")
            .withPrefix(dataPrefix);
    ListObjectsV2Result result = s3Client.listObjectsV2(req);
    List dataPrefixes = result.getCommonPrefixes();
    System.out.println("Data prefixes: " + dataPrefixes);

    List deletePrefixes = new ArrayList<>();
    for (String prefix : dataPrefixes) {
      String[] splits = prefix.split("/");
      long dataTimestamp = Long.parseLong(splits[splits.length - 1]);
      if (dataTimestamp < minTimestampMs) {
        deletePrefixes.add(prefix);
      }
    }
    deletePrefixes(s3Client, deletePrefixes);
  }

  private void deletePrefixes(AmazonS3 s3Client, List prefixes) {
    List deleteList = new ArrayList<>(DELETE_BATCH_SIZE);
    for (String prefix : prefixes) {
      System.out.println("Deleting data prefix: " + prefix);
      ListObjectsV2Request req =
          new ListObjectsV2Request().withBucketName(bucketName).withPrefix(prefix);
      ListObjectsV2Result result;

      do {
        result = s3Client.listObjectsV2(req);

        for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
          System.out.println("Delete: " + objectSummary.getKey());
          deleteList.add(objectSummary.getKey());
          if (deleteList.size() == DELETE_BATCH_SIZE) {
            if (!dryRun) {
              IncrementalDataCleanupCommand.deleteObjects(s3Client, bucketName, deleteList);
            }
            deleteList.clear();
          }
        }
        String token = result.getNextContinuationToken();
        req.setContinuationToken(token);
      } while (result.isTruncated());
    }
    if (!deleteList.isEmpty() && !dryRun) {
      IncrementalDataCleanupCommand.deleteObjects(s3Client, bucketName, deleteList);
    }
  }

  private void deleteSnapshotMetadata(
      AmazonS3 s3Client,
      String resolvedIndexResource,
      String resolvedSnapshotRoot,
      List versions) {
    for (Long version : versions) {
      String key =
          IncrementalCommandUtils.getSnapshotIndexMetadataKey(
              resolvedSnapshotRoot, resolvedIndexResource, version);
      System.out.println("Deleting snapshot metadata: " + key);
      if (!dryRun) {
        s3Client.deleteObject(bucketName, key);
      }
    }
  }

  private List getSnapshotTimestamps(
      AmazonS3 s3Client, String resolvedIndexResource, String resolvedSnapshotRoot) {
    String metadataPrefix =
        getIndexSnapshotMetadataPrefix(resolvedSnapshotRoot, resolvedIndexResource);
    List snapshotTimestamps = new ArrayList<>();

    ListObjectsV2Request req =
        new ListObjectsV2Request().withBucketName(bucketName).withPrefix(metadataPrefix);
    ListObjectsV2Result result;

    do {
      result = s3Client.listObjectsV2(req);

      for (S3ObjectSummary objectSummary : result.getObjectSummaries()) {
        String snapshotSuffix = objectSummary.getKey().split(metadataPrefix)[1];
        snapshotTimestamps.add(Long.parseLong(snapshotSuffix));
      }
      String token = result.getNextContinuationToken();
      req.setContinuationToken(token);
    } while (result.isTruncated());

    snapshotTimestamps.sort(Comparator.comparingLong(e -> e));
    return snapshotTimestamps;
  }

  private String getIndexSnapshotMetadataPrefix(
      String resolvedSnapshotRoot, String resolvedIndexResource) {
    return resolvedSnapshotRoot
        + IncrementalCommandUtils.METADATA_DIR
        + "/"
        + resolvedIndexResource
        + "/";
  }

  private String getIndexSnapshotDataPrefix(
      String resolvedSnapshotRoot, String resolvedIndexResource) {
    return resolvedSnapshotRoot + resolvedIndexResource + "/";
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy