All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.fs.s3a.s3guard.DelayedUpdateRenameTracker Maven / Gradle / Ivy

Go to download

This module contains code to support integration with Amazon Web Services. It also declares the dependencies needed to work with AWS services.

There is a newer version: 3.4.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.fs.s3a.s3guard;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import com.amazonaws.SdkBaseException;

import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.s3a.S3ObjectAttributes;
import org.apache.hadoop.fs.s3a.Tristate;
import org.apache.hadoop.fs.s3a.impl.StoreContext;
import org.apache.hadoop.util.DurationInfo;

import static org.apache.hadoop.fs.s3a.s3guard.S3Guard.addMoveAncestors;
import static org.apache.hadoop.fs.s3a.s3guard.S3Guard.addMoveDir;
import static org.apache.hadoop.fs.s3a.s3guard.S3Guard.addMoveFile;

/**
 * This is the rename updating strategy originally used:
 * a collection of source paths and a list of destinations are created,
 * then updated at the end (possibly slow).
 * 

* It is not currently instantiated by any of the active trackers, * but is preserved to show that the original rename strategy * can be implemented via the tracker model. */ public class DelayedUpdateRenameTracker extends RenameTracker { private final MetadataStore metadataStore; private final Collection sourcePaths = new HashSet<>(); private final List destMetas = new ArrayList<>(); private final List deletedPaths = new ArrayList<>(); public DelayedUpdateRenameTracker( final StoreContext storeContext, final MetadataStore metadataStore, final Path sourceRoot, final Path dest, final BulkOperationState operationState) { super("DelayedUpdateRenameTracker", storeContext, metadataStore, sourceRoot, dest, operationState); this.metadataStore = storeContext.getMetadataStore(); } @Override public synchronized void fileCopied( final Path sourcePath, final S3ObjectAttributes sourceAttributes, final S3ObjectAttributes destAttributes, final Path destPath, final long blockSize, final boolean addAncestors) throws IOException { addMoveFile(metadataStore, sourcePaths, destMetas, sourcePath, destPath, sourceAttributes.getLen(), blockSize, getOwner(), destAttributes.getETag(), destAttributes.getVersionId()); // Ancestor directories may not be listed, so we explicitly add them if (addAncestors) { addMoveAncestors(metadataStore, sourcePaths, destMetas, getSourceRoot(), sourcePath, destPath, getOwner()); } } @Override public synchronized void directoryMarkerCopied(final Path sourcePath, final Path destPath, final boolean addAncestors) throws IOException { addMoveDir(metadataStore, sourcePaths, destMetas, sourcePath, destPath, getOwner()); // Ancestor directories may not be listed, so we explicitly add them if (addAncestors) { addMoveAncestors(metadataStore, sourcePaths, destMetas, getSourceRoot(), sourcePath, destPath, getOwner()); } } @Override public synchronized void moveSourceDirectory() throws IOException { if (!sourcePaths.contains(getSourceRoot())) { addMoveDir(metadataStore, sourcePaths, destMetas, getSourceRoot(), getDest(), getOwner()); } } @Override public synchronized void sourceObjectsDeleted( final Collection paths) throws IOException { // add to the list of deleted paths. deletedPaths.addAll(paths); } @Override public void completeRename() throws IOException { metadataStore.move(sourcePaths, destMetas, getOperationState()); super.completeRename(); } @Override public IOException renameFailed(final Exception ex) { LOG.warn("Rename has failed; updating s3guard with destination state"); try (DurationInfo ignored = new DurationInfo(LOG, "Cleaning up deleted paths")) { // the destination paths are updated; the source is left alone. metadataStore.move(new ArrayList<>(0), destMetas, getOperationState()); for (Path deletedPath : deletedPaths) { // this is not ideal in that it may leave parent stuff around. metadataStore.delete(deletedPath, getOperationState()); } deleteParentPaths(); } catch (IOException | SdkBaseException e) { LOG.warn("Ignoring error raised in AWS SDK ", e); } return super.renameFailed(ex); } /** * Delete all the parent paths we know to be empty (by walking up the tree * deleting as appropriate). * @throws IOException failure */ private void deleteParentPaths() throws IOException { Set parentPaths = new HashSet<>(); for (Path deletedPath : deletedPaths) { Path parent = deletedPath.getParent(); if (!parent.equals(getSourceRoot())) { parentPaths.add(parent); } } // now there's a set of parent paths. We now want to // get them ordered by depth, so that deeper entries come first // that way: when we check for a parent path existing we can // see if it really is empty. List parents = new ArrayList<>(parentPaths); parents.sort(PathOrderComparators.TOPMOST_PATH_LAST); for (Path parent : parents) { PathMetadata md = metadataStore.get(parent, true); if (md != null && md.isEmptyDirectory() == Tristate.TRUE) { // if were confident that this is empty: delete it. metadataStore.delete(parent, getOperationState()); } } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy