All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.fs.s3a.s3guard.PurgeS3GuardDynamoTable Maven / Gradle / Ivy

Go to download

This module contains code to support integration with Amazon Web Services. It also declares the dependencies needed to work with AWS services.

There is a newer version: 3.4.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.fs.s3a.s3guard;

import javax.annotation.Nullable;
import java.io.IOException;
import java.net.URI;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import com.amazonaws.services.dynamodbv2.xspec.ExpressionSpecBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.commons.lang3.tuple.Pair;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3AFileSystem;
import org.apache.hadoop.service.Service;
import org.apache.hadoop.service.launcher.LauncherExitCodes;
import org.apache.hadoop.service.launcher.ServiceLaunchException;
import org.apache.hadoop.service.launcher.ServiceLauncher;
import org.apache.hadoop.util.DurationInfo;
import org.apache.hadoop.util.ExitUtil;

import static org.apache.hadoop.thirdparty.com.google.common.base.Preconditions.checkNotNull;
import static org.apache.hadoop.fs.s3a.s3guard.DumpS3GuardDynamoTable.serviceMain;
import static org.apache.hadoop.fs.s3a.s3guard.PathMetadataDynamoDBTranslation.PARENT;

/**
 * Purge the S3Guard table of a FileSystem from all entries related to
 * that table.
 * Will fail if there is no table, or the store is in auth mode.
 * 
 *   hadoop org.apache.hadoop.fs.s3a.s3guard.PurgeS3GuardDynamoTable \
 *   -force s3a://example-bucket/
 * 
* */ @InterfaceAudience.Private @InterfaceStability.Unstable public class PurgeS3GuardDynamoTable extends AbstractS3GuardDynamoDBDiagnostic { private static final Logger LOG = LoggerFactory.getLogger(PurgeS3GuardDynamoTable.class); public static final String NAME = "PurgeS3GuardDynamoTable"; /** * Name of the force option. */ public static final String FORCE = "-force"; /** * Usage message. */ private static final String USAGE_MESSAGE = NAME + " [-force] "; /** * Flag which actually triggers the delete. */ private boolean force; private long filesFound; private long filesDeleted; public PurgeS3GuardDynamoTable(final String name) { super(name); } public PurgeS3GuardDynamoTable() { this(NAME); } public PurgeS3GuardDynamoTable( final S3AFileSystem filesystem, final DynamoDBMetadataStore store, final URI uri, final boolean force) { super(NAME, filesystem, store, uri); this.force = force; } /** * Bind to the argument list, including validating the CLI. * @throws Exception failure. */ @Override protected void serviceStart() throws Exception { if (getStore() == null) { List arg = getArgumentList(1, 2, USAGE_MESSAGE); String fsURI = arg.get(0); if (arg.size() == 2) { if (!arg.get(0).equals(FORCE)) { throw new ServiceLaunchException(LauncherExitCodes.EXIT_USAGE, USAGE_MESSAGE); } force = true; fsURI = arg.get(1); } bindFromCLI(fsURI); } } /** * Extract the host from the FS URI, then scan and * delete all entries from that bucket. * @return the exit code. * @throws ServiceLaunchException on failure. * @throws IOException IO failure. */ @Override public int execute() throws ServiceLaunchException, IOException { URI uri = getUri(); String host = uri.getHost(); String prefix = "/" + host + "/"; DynamoDBMetadataStore ddbms = getStore(); S3GuardTableAccess tableAccess = new S3GuardTableAccess(ddbms); ExpressionSpecBuilder builder = new ExpressionSpecBuilder(); builder.withKeyCondition( ExpressionSpecBuilder.S(PARENT).beginsWith(prefix)); LOG.info("Scanning for entries with prefix {} to delete from {}", prefix, ddbms); Iterable entries = ddbms.wrapWithRetries(tableAccess.scanMetadata(builder)); List list = new ArrayList<>(); entries.iterator().forEachRemaining(e -> { if (!(e instanceof S3GuardTableAccess.VersionMarker)) { Path p = e.getFileStatus().getPath(); String type = e.getFileStatus().isFile() ? "file" : "directory"; boolean tombstone = e.isDeleted(); if (tombstone) { type = "tombstone " + type; } LOG.info("{} {}", type, p); list.add(p); } }); int count = list.size(); filesFound = count; LOG.info("Found {} entries{}", count, (count == 0 ? " -nothing to purge": "")); if (count > 0) { if (force) { DurationInfo duration = new DurationInfo(LOG, "deleting %s entries from %s", count, ddbms.toString()); // sending this in one by one for more efficient retries for (Path path: list) { ddbms.getInvoker() .retry("delete", prefix, true, () -> tableAccess.delete(path)); } duration.close(); long durationMillis = duration.value(); long timePerEntry = durationMillis / count; LOG.info("Time per entry: {} ms", timePerEntry); filesDeleted = count; } else { LOG.info("Delete process will only be executed when " + FORCE + " is set"); } } return LauncherExitCodes.EXIT_SUCCESS; } /** * This is the Main entry point for the service launcher. * * Converts the arguments to a list, instantiates a instance of the class * then executes it. * @param args command line arguments. */ public static void main(String[] args) { try { serviceMain(Arrays.asList(args), new PurgeS3GuardDynamoTable()); } catch (ExitUtil.ExitException e) { ExitUtil.terminate(e); } } /** * API Entry point to dump the metastore and S3 store world views *

* Both the FS and the store will be dumped: the store is scanned * before and after the sequence to show what changes were made to * the store during the list operation. * @param fs fs to dump. If null a store must be provided. * @param store store to dump (fallback to FS) * @param conf configuration to use (fallback to fs) * @param uri URI of store -only needed if FS is null. * @param force force the actual delete * @return (filesFound, filesDeleted) * @throws ExitUtil.ExitException failure. */ @InterfaceAudience.Private @InterfaceStability.Unstable public static Pair purgeStore( @Nullable final S3AFileSystem fs, @Nullable DynamoDBMetadataStore store, @Nullable Configuration conf, @Nullable URI uri, boolean force) throws ExitUtil.ExitException { ServiceLauncher serviceLauncher = new ServiceLauncher<>(NAME); if (conf == null) { conf = checkNotNull(fs, "No filesystem").getConf(); } if (store == null) { store = (DynamoDBMetadataStore) checkNotNull(fs, "No filesystem") .getMetadataStore(); } PurgeS3GuardDynamoTable purge = new PurgeS3GuardDynamoTable(fs, store, uri, force); ExitUtil.ExitException ex = serviceLauncher.launchService( conf, purge, Collections.emptyList(), false, true); if (ex != null && ex.getExitCode() != 0) { throw ex; } return Pair.of(purge.filesFound, purge.filesDeleted); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy