All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.storage.s3.S3DataSegmentKiller Maven / Gradle / Ivy

There is a newer version: 31.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.storage.s3;

import com.amazonaws.AmazonServiceException;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import com.amazonaws.services.s3.model.MultiObjectDeleteException;
import com.google.common.base.Predicates;
import com.google.common.base.Supplier;
import com.google.common.collect.Lists;
import com.google.inject.Inject;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.MapUtils;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.segment.loading.DataSegmentKiller;
import org.apache.druid.segment.loading.SegmentLoadingException;
import org.apache.druid.timeline.DataSegment;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;

/**
 *
 */
public class S3DataSegmentKiller implements DataSegmentKiller
{
  private static final Logger log = new Logger(S3DataSegmentKiller.class);

  // AWS has max limit of 1000 objects that can be requested to be deleted at a time.
  private static final int MAX_MULTI_OBJECT_DELETE_SIZE = 1000;

  private static final String MULTI_OBJECT_DELETE_EXEPTION_ERROR_FORMAT = "message: [%s], code: [%s]";

  /**
   * Any implementation of DataSegmentKiller is initialized when an ingestion job starts if the extension is loaded,
   * even when the implementation of DataSegmentKiller is not used. As a result, if we have a s3 client instead
   * of a supplier of it, it can cause unnecessary config validation for s3 even when it's not used at all.
   * To perform the config validation only when it is actually used, we use a supplier.
   * 

* See OmniDataSegmentKiller for how DataSegmentKillers are initialized. */ private final Supplier s3ClientSupplier; private final S3DataSegmentPusherConfig segmentPusherConfig; private final S3InputDataConfig inputDataConfig; @Inject public S3DataSegmentKiller( Supplier s3Client, S3DataSegmentPusherConfig segmentPusherConfig, S3InputDataConfig inputDataConfig ) { this.s3ClientSupplier = s3Client; this.segmentPusherConfig = segmentPusherConfig; this.inputDataConfig = inputDataConfig; } @Override public void kill(List segments) throws SegmentLoadingException { if (segments.isEmpty()) { return; } if (segments.size() == 1) { kill(segments.get(0)); return; } // create a map of bucket to keys to delete Map> bucketToKeysToDelete = new HashMap<>(); for (DataSegment segment : segments) { String s3Bucket = MapUtils.getString(segment.getLoadSpec(), S3DataSegmentPuller.BUCKET); String path = MapUtils.getString(segment.getLoadSpec(), S3DataSegmentPuller.KEY); List keysToDelete = bucketToKeysToDelete.computeIfAbsent( s3Bucket, k -> new ArrayList<>() ); keysToDelete.add(new DeleteObjectsRequest.KeyVersion(path)); keysToDelete.add(new DeleteObjectsRequest.KeyVersion(DataSegmentKiller.descriptorPath(path))); } final ServerSideEncryptingAmazonS3 s3Client = this.s3ClientSupplier.get(); boolean shouldThrowException = false; for (Map.Entry> bucketToKeys : bucketToKeysToDelete.entrySet()) { String s3Bucket = bucketToKeys.getKey(); List keysToDelete = bucketToKeys.getValue(); boolean hadException = deleteKeysForBucket(s3Client, s3Bucket, keysToDelete); if (hadException) { shouldThrowException = true; } } if (shouldThrowException) { // exception error message gets cutoff without providing any details. look at the logs for more details. // this was a shortcut to handle the many different ways there could potentially be failures and handle them // reasonably throw new SegmentLoadingException( "Couldn't delete segments from S3. See the task logs for more details." ); } } /** * Delete all keys in a bucket from s3 * * @param s3Client client used to communicate with s3 * @param s3Bucket the bucket where the keys exist * @param keysToDelete the keys to delete * @return a boolean value of true if there was an issue deleting one or many keys, a boolean value of false if * succesful */ private boolean deleteKeysForBucket( ServerSideEncryptingAmazonS3 s3Client, String s3Bucket, List keysToDelete ) { boolean hadException = false; DeleteObjectsRequest deleteObjectsRequest = new DeleteObjectsRequest(s3Bucket); deleteObjectsRequest.setQuiet(true); List> keysChunks = Lists.partition( keysToDelete, MAX_MULTI_OBJECT_DELETE_SIZE ); for (List chunkOfKeys : keysChunks) { List keysToDeleteStrings = chunkOfKeys.stream().map( DeleteObjectsRequest.KeyVersion::getKey).collect(Collectors.toList()); try { deleteObjectsRequest.setKeys(chunkOfKeys); log.info( "Removing from bucket: [%s] the following index files: [%s] from s3!", s3Bucket, keysToDeleteStrings ); S3Utils.retryS3Operation( () -> { s3Client.deleteObjects(deleteObjectsRequest); return null; }, 3 ); } catch (MultiObjectDeleteException e) { hadException = true; Map> errorToKeys = new HashMap<>(); for (MultiObjectDeleteException.DeleteError error : e.getErrors()) { errorToKeys.computeIfAbsent(StringUtils.format( MULTI_OBJECT_DELETE_EXEPTION_ERROR_FORMAT, error.getMessage(), error.getCode() ), k -> new ArrayList<>()).add(error.getKey()); } errorToKeys.forEach((key, value) -> log.error( "Unable to delete from bucket [%s], the following keys [%s], because [%s]", s3Bucket, String.join(", ", value), key )); } catch (AmazonServiceException e) { hadException = true; log.noStackTrace().warn(e, "Unable to delete from bucket [%s], the following keys [%s]", s3Bucket, chunkOfKeys.stream().map(DeleteObjectsRequest.KeyVersion::getKey).collect(Collectors.joining(", ")) ); } catch (Exception e) { hadException = true; log.noStackTrace().warn(e, "Unexpected exception occurred when deleting from bucket [%s], the following keys [%s]", s3Bucket, chunkOfKeys.stream().map(DeleteObjectsRequest.KeyVersion::getKey).collect(Collectors.joining(", ")) ); } } return hadException; } @Override public void kill(DataSegment segment) throws SegmentLoadingException { try { Map loadSpec = segment.getLoadSpec(); String s3Bucket = MapUtils.getString(loadSpec, S3DataSegmentPuller.BUCKET); String s3Path = MapUtils.getString(loadSpec, S3DataSegmentPuller.KEY); String s3DescriptorPath = DataSegmentKiller.descriptorPath(s3Path); final ServerSideEncryptingAmazonS3 s3Client = this.s3ClientSupplier.get(); if (s3Client.doesObjectExist(s3Bucket, s3Path)) { log.info("Removing index file[s3://%s/%s] from s3!", s3Bucket, s3Path); s3Client.deleteObject(s3Bucket, s3Path); } // descriptor.json is a file to store segment metadata in deep storage. This file is deprecated and not stored // anymore, but we still delete them if exists. if (s3Client.doesObjectExist(s3Bucket, s3DescriptorPath)) { log.info("Removing descriptor file[s3://%s/%s] from s3!", s3Bucket, s3DescriptorPath); s3Client.deleteObject(s3Bucket, s3DescriptorPath); } } catch (AmazonServiceException e) { throw new SegmentLoadingException(e, "Couldn't kill segment[%s]: [%s]", segment.getId(), e); } } @Override public void killAll() throws IOException { if (segmentPusherConfig.getBucket() == null || segmentPusherConfig.getBaseKey() == null) { throw new ISE( "Cannot delete all segment from S3 Deep Storage since druid.storage.bucket and druid.storage.baseKey are not both set."); } log.info("Deleting all segment files from s3 location [bucket: '%s' prefix: '%s']", segmentPusherConfig.getBucket(), segmentPusherConfig.getBaseKey() ); try { S3Utils.deleteObjectsInPath( s3ClientSupplier.get(), inputDataConfig.getMaxListingLength(), segmentPusherConfig.getBucket(), segmentPusherConfig.getBaseKey(), Predicates.alwaysTrue() ); } catch (Exception e) { log.error("Error occurred while deleting segment files from s3. Error: %s", e.getMessage()); throw new IOException(e); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy