org.apache.hadoop.fs.s3a.impl.RenameOperation Maven / Gradle / Ivy
Show all versions of hadoop-aws Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.fs.s3a.impl;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.atomic.AtomicLong;
import com.amazonaws.AmazonClientException;
import com.amazonaws.SdkBaseException;
import com.amazonaws.services.s3.model.DeleteObjectsRequest;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.RemoteIterator;
import org.apache.hadoop.fs.s3a.Invoker;
import org.apache.hadoop.fs.s3a.RenameFailedException;
import org.apache.hadoop.fs.s3a.Retries;
import org.apache.hadoop.fs.s3a.S3AFileStatus;
import org.apache.hadoop.fs.s3a.S3ALocatedFileStatus;
import org.apache.hadoop.fs.s3a.S3AReadOpContext;
import org.apache.hadoop.fs.s3a.S3ObjectAttributes;
import org.apache.hadoop.fs.s3a.Tristate;
import org.apache.hadoop.util.DurationInfo;
import org.apache.hadoop.util.OperationDuration;
import static org.apache.hadoop.fs.s3a.S3AUtils.translateException;
import static org.apache.hadoop.fs.store.audit.AuditingFunctions.callableWithinAuditSpan;
import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.submit;
import static org.apache.hadoop.fs.s3a.impl.CallableSupplier.waitForCompletion;
import static org.apache.hadoop.fs.s3a.impl.InternalConstants.RENAME_PARALLEL_LIMIT;
import static org.apache.hadoop.util.Preconditions.checkArgument;
/**
* A parallelized rename operation.
*
* The parallel execution is in groups of size
* {@link InternalConstants#RENAME_PARALLEL_LIMIT}; it is only
* after one group completes that the next group is initiated.
*
* Once enough files have been copied that they meet the
* {@link InternalConstants#MAX_ENTRIES_TO_DELETE} threshold, a delete
* is initiated.
* If it succeeds, the rename continues with the next group of files.
*
* Directory Markers which have child entries are never copied; only those
* which represent empty directories are copied in the rename.
* The {@link DirMarkerTracker} tracks which markers must be copied, and
* which can simply be deleted from the source.
* As a result: rename always purges all non-leaf directory markers from
* the copied tree. This is to ensure that even if a directory tree
* is copied from an authoritative path to a non-authoritative one
* there is never any contamination of the non-auth path with markers.
*
* The rename operation implements the classic HDFS rename policy of
* rename(file, dir) renames the file under the directory.
*
*
* There is no validation of input and output paths.
* Callers are required to themselves verify that destination is not under
* the source, above the source, the source itself, etc, etc.
*/
public class RenameOperation extends ExecutingStoreOperation {
private static final Logger LOG = LoggerFactory.getLogger(
RenameOperation.class);
private final Path sourcePath;
private final String sourceKey;
private final S3AFileStatus sourceStatus;
private final Path destPath;
private final String destKey;
private final S3AFileStatus destStatus;
/**
* Callbacks into the filesystem.
*/
private final OperationCallbacks callbacks;
/**
* Counter of bytes copied.
*/
private final AtomicLong bytesCopied = new AtomicLong();
/**
* Page size for bulk deletes.
*/
private final int pageSize;
/**
* List of active copies.
*/
private final List> activeCopies =
new ArrayList<>(RENAME_PARALLEL_LIMIT);
/**
* list of keys to delete on the next (bulk) delete call.
*/
private final List keysToDelete =
new ArrayList<>();
/**
* Initiate the rename.
*
* @param storeContext store context
* @param sourcePath source path
* @param sourceKey key of source
* @param sourceStatus pre-fetched source status
* @param destPath destination path.
* @param destKey destination key
* @param destStatus destination status.
* @param callbacks callback provider
* @param pageSize size of delete requests
*/
public RenameOperation(
final StoreContext storeContext,
final Path sourcePath,
final String sourceKey,
final S3AFileStatus sourceStatus,
final Path destPath,
final String destKey,
final S3AFileStatus destStatus,
final OperationCallbacks callbacks,
final int pageSize) {
super(storeContext);
this.sourcePath = sourcePath;
this.sourceKey = sourceKey;
this.sourceStatus = sourceStatus;
this.destPath = destPath;
this.destKey = destKey;
this.destStatus = destStatus;
this.callbacks = callbacks;
checkArgument(pageSize > 0
&& pageSize <= InternalConstants.MAX_ENTRIES_TO_DELETE,
"page size out of range: %s", pageSize);
this.pageSize = pageSize;
}
/**
* Wait for the active copies to complete then reset the list.
* @param reason for messages
* @throws IOException if one of the called futures raised an IOE.
* @throws RuntimeException if one of the futures raised one.
*/
@Retries.OnceTranslated
private void completeActiveCopies(String reason) throws IOException {
LOG.debug("Waiting for {} active copies to complete: {}",
activeCopies.size(), reason);
waitForCompletion(activeCopies);
activeCopies.clear();
}
/**
* Queue an object for deletion.
*
* This object will be deleted when the next page of objects to delete
* is posted to S3. Therefore, the COPY must have finished
* before that deletion operation takes place.
* This is managed by:
*
* -
* The delete operation only being executed once all active
* copies have completed.
*
* -
* Only queuing objects here whose copy operation has
* been submitted and so is in that thread pool.
*
*
* This method must only be called from the primary thread.
* @param path path to the object.
* @param key key of the object.
*/
private void queueToDelete(Path path, String key) {
LOG.debug("Queueing to delete {}", path);
keysToDelete.add(new DeleteObjectsRequest.KeyVersion(key));
}
/**
* Queue a list of markers for deletion.
*
* no-op if the list is empty.
*
* See {@link #queueToDelete(Path, String)} for
* details on safe use of this method.
*
* @param markersToDelete markers
*/
private void queueToDelete(
List markersToDelete) {
markersToDelete.forEach(m -> queueToDelete(
null,
m.getKey()));
}
/**
* Queue a single marker for deletion.
*
* See {@link #queueToDelete(Path, String)} for
* details on safe use of this method.
*
* @param marker markers
*/
private void queueToDelete(final DirMarkerTracker.Marker marker) {
queueToDelete(marker.getPath(), marker.getKey());
}
/**
* Block waiting for ay active copies to finish
* then delete all queued keys + paths to delete.
* @param reason reason for logs
* @throws IOException failure.
*/
@Retries.RetryTranslated
private void completeActiveCopiesAndDeleteSources(String reason)
throws IOException {
completeActiveCopies(reason);
removeSourceObjects(
keysToDelete
);
// now reset the lists.
keysToDelete.clear();
}
@Retries.RetryMixed
public Long execute() throws IOException {
executeOnlyOnce();
// The path to whichever file or directory is created by the
// rename. When deleting markers all parents of
// this path will need their markers pruned.
Path destCreated = destPath;
// Ok! Time to start
try {
if (sourceStatus.isFile()) {
// rename the file. The destination path will be different
// from that passed in if the destination is a directory;
// the final value is needed to completely delete parent markers
// when they are not being retained.
destCreated = renameFileToDest();
} else {
recursiveDirectoryRename();
}
} catch (AmazonClientException | IOException ex) {
// rename failed.
// block for all ongoing copies to complete, successfully or not
try {
completeActiveCopies("failure handling");
} catch (IOException e) {
// Downgrading to warn because an exception is already
// about to be thrown.
LOG.warn("While completing all active copies", e);
}
throw convertToIOException(ex);
}
callbacks.finishRename(sourcePath, destCreated);
return bytesCopied.get();
}
/**
* The source is a file: rename it to the destination, which
* will be under the current destination path if that is a directory.
* @return the path of the object created.
* @throws IOException failure
*/
protected Path renameFileToDest() throws IOException {
final StoreContext storeContext = getStoreContext();
// the source is a file.
Path copyDestinationPath = destPath;
String copyDestinationKey = destKey;
S3ObjectAttributes sourceAttributes =
callbacks.createObjectAttributes(sourceStatus);
S3AReadOpContext readContext = callbacks.createReadContext(sourceStatus);
if (destStatus != null && destStatus.isDirectory()) {
// destination is a directory: build the final destination underneath
String newDestKey = maybeAddTrailingSlash(destKey);
String filename = sourceKey.substring(
storeContext.pathToKey(sourcePath.getParent()).length() + 1);
newDestKey = newDestKey + filename;
copyDestinationKey = newDestKey;
copyDestinationPath = storeContext.keyToPath(newDestKey);
}
// destination either does not exist or is a file to overwrite.
LOG.debug("rename: renaming file {} to {}", sourcePath,
copyDestinationPath);
copySource(
sourceKey,
sourceAttributes,
readContext,
copyDestinationPath,
copyDestinationKey);
bytesCopied.addAndGet(sourceStatus.getLen());
// delete the source
callbacks.deleteObjectAtPath(sourcePath, sourceKey, true);
return copyDestinationPath;
}
/**
* Execute a full recursive rename.
* There is a special handling of directly markers here -only leaf markers
* are copied. This reduces incompatibility "regions" across versions.
* @throws IOException failure
*/
protected void recursiveDirectoryRename() throws IOException {
final StoreContext storeContext = getStoreContext();
LOG.debug("rename: renaming directory {} to {}", sourcePath, destPath);
// This is a directory-to-directory copy
String dstKey = maybeAddTrailingSlash(destKey);
String srcKey = maybeAddTrailingSlash(sourceKey);
// Verify dest is not a child of the source directory
if (dstKey.startsWith(srcKey)) {
throw new RenameFailedException(srcKey, dstKey,
"cannot rename a directory to a subdirectory of itself ");
}
if (destStatus != null
&& destStatus.isEmptyDirectory() == Tristate.TRUE) {
// delete unnecessary fake directory at the destination.
LOG.debug("Deleting fake directory marker at destination {}",
destStatus.getPath());
// Although the dir marker policy doesn't always need to do this,
// it's simplest just to be consistent here.
callbacks.deleteObjectAtPath(destStatus.getPath(), dstKey, false);
}
Path parentPath = storeContext.keyToPath(srcKey);
// Track directory markers so that we know which leaf directories need to be
// recreated
DirMarkerTracker dirMarkerTracker = new DirMarkerTracker(parentPath,
false);
final RemoteIterator iterator =
callbacks.listFilesAndDirectoryMarkers(parentPath,
sourceStatus,
true);
while (iterator.hasNext()) {
// get the next entry in the listing.
S3ALocatedFileStatus child = iterator.next();
LOG.debug("To rename {}", child);
// convert it to an S3 key.
String k = storeContext.pathToKey(child.getPath());
// possibly adding a "/" if it represents directory and it does
// not have a trailing slash already.
String key = (child.isDirectory() && !k.endsWith("/"))
? k + "/"
: k;
// the source object to copy as a path.
Path childSourcePath = storeContext.keyToPath(key);
List markersToDelete;
boolean isMarker = key.endsWith("/");
if (isMarker) {
// add the marker to the tracker.
// it will not be deleted _yet_ but it may find a list of parent
// markers which may now be deleted.
markersToDelete = dirMarkerTracker.markerFound(
childSourcePath, key, child);
} else {
// it is a file.
// note that it has been found -this may find a list of parent
// markers which may now be deleted.
markersToDelete = dirMarkerTracker.fileFound(
childSourcePath, key, child);
// the destination key is that of the key under the source tree,
// remapped under the new destination path.
String newDestKey =
dstKey + key.substring(srcKey.length());
Path childDestPath = storeContext.keyToPath(newDestKey);
// mark the source file for deletion on a successful copy.
queueToDelete(childSourcePath, key);
// now begin the single copy
CompletableFuture copy = initiateCopy(child, key,
newDestKey, childDestPath);
activeCopies.add(copy);
bytesCopied.addAndGet(sourceStatus.getLen());
}
// add any markers to delete to the operation so they get cleaned
// incrementally
queueToDelete(markersToDelete);
// and trigger any end of loop operations
endOfLoopActions();
} // end of iteration through the list
// finally process remaining directory markers
copyEmptyDirectoryMarkers(srcKey, dstKey, dirMarkerTracker);
// await the final set of copies and their deletion
// This will notify the renameTracker that these objects
// have been deleted.
completeActiveCopiesAndDeleteSources("final copy and delete");
}
/**
* Operations to perform at the end of every loop iteration.
*
* This may block the thread waiting for copies to complete
* and/or delete a page of data.
*/
private void endOfLoopActions() throws IOException {
if (keysToDelete.size() == pageSize) {
// finish ongoing copies then delete all queued keys.
completeActiveCopiesAndDeleteSources("paged delete");
} else {
if (activeCopies.size() == RENAME_PARALLEL_LIMIT) {
// the limit of active copies has been reached;
// wait for completion or errors to surface.
LOG.debug("Waiting for active copies to complete");
completeActiveCopies("batch threshold reached");
}
}
}
/**
* Process all directory markers at the end of the rename.
* All leaf markers are queued to be copied in the store;
*
* Why not simply create new markers? All the metadata
* gets copied too, so if there was anything relevant then
* it would be preserved.
*
* At the same time: markers aren't valued much and may
* be deleted without any safety checks -so if there was relevant
* data it is at risk of destruction at any point.
* If there are lots of empty directory rename operations taking place,
* the decision to copy the source may need revisiting.
* Be advised though: the costs of the copy not withstanding,
* it is a lot easier to have one single type of scheduled copy operation
* than have copy and touch calls being scheduled.
*
* The duration returned is the time to initiate all copy/delete operations,
* including any blocking waits for active copies and paged deletes
* to execute. There may still be outstanding operations
* queued by this method -the duration may be an underestimate
* of the time this operation actually takes.
*
* @param srcKey source key with trailing /
* @param dstKey dest key with trailing /
* @param dirMarkerTracker tracker of markers
* @return how long it took.
*/
private OperationDuration copyEmptyDirectoryMarkers(
final String srcKey,
final String dstKey,
final DirMarkerTracker dirMarkerTracker) throws IOException {
// directory marker work.
LOG.debug("Copying markers from {}", dirMarkerTracker);
final StoreContext storeContext = getStoreContext();
Map leafMarkers =
dirMarkerTracker.getLeafMarkers();
Map surplus =
dirMarkerTracker.getSurplusMarkers();
// for all leaf markers: copy the original
DurationInfo duration = new DurationInfo(LOG, false,
"copying %d leaf markers with %d surplus not copied",
leafMarkers.size(), surplus.size());
for (DirMarkerTracker.Marker entry: leafMarkers.values()) {
String key = entry.getKey();
String newDestKey =
dstKey + key.substring(srcKey.length());
Path childDestPath = storeContext.keyToPath(newDestKey);
LOG.debug("copying dir marker from {} to {}", key, newDestKey);
activeCopies.add(
initiateCopy(
entry.getStatus(),
key,
newDestKey,
childDestPath));
queueToDelete(entry);
// end of loop
endOfLoopActions();
}
duration.close();
return duration;
}
/**
* Initiate a copy operation in the executor.
* @param source status of the source object.
* @param key source key
* @param newDestKey destination key
* @param childDestPath destination path.
* @return the future.
*/
protected CompletableFuture initiateCopy(
final S3ALocatedFileStatus source,
final String key,
final String newDestKey,
final Path childDestPath) {
S3ObjectAttributes sourceAttributes =
callbacks.createObjectAttributes(
source.getPath(),
source.getEtag(),
source.getVersionId(),
source.getLen());
// queue the copy operation for execution in the thread pool
return submit(getStoreContext().getExecutor(),
callableWithinAuditSpan(getAuditSpan(), () ->
copySource(
key,
sourceAttributes,
callbacks.createReadContext(source),
childDestPath,
newDestKey)));
}
/**
* This is invoked to copy a file or directory marker.
* It may be called in its own thread.
* @param srcKey source key
* @param srcAttributes status of the source object
* @param destination destination as a qualified path.
* @param destinationKey destination key
* @return the destination path.
* @throws IOException failure
*/
@Retries.RetryTranslated
private Path copySource(
final String srcKey,
final S3ObjectAttributes srcAttributes,
final S3AReadOpContext readContext,
final Path destination,
final String destinationKey) throws IOException {
long len = srcAttributes.getLen();
try (DurationInfo ignored = new DurationInfo(LOG, false,
"Copy file from %s to %s (length=%d)", srcKey, destinationKey, len)) {
callbacks.copyFile(srcKey, destinationKey,
srcAttributes, readContext);
}
return destination;
}
/**
* Remove source objects.
* @param keys list of keys to delete
* @throws IOException failure
*/
@Retries.RetryTranslated
private void removeSourceObjects(
final List keys)
throws IOException {
// remove the keys
// list what is being deleted for the interest of anyone
// who is trying to debug why objects are no longer there.
if (LOG.isDebugEnabled()) {
LOG.debug("Initiating delete operation for {} objects", keys.size());
for (DeleteObjectsRequest.KeyVersion key : keys) {
LOG.debug(" {} {}", key.getKey(),
key.getVersion() != null ? key.getVersion() : "");
}
}
Invoker.once("rename " + sourcePath + " to " + destPath,
sourcePath.toString(), () ->
callbacks.removeKeys(
keys,
false
));
}
/**
* Turns a path (relative or otherwise) into an S3 key, adding a trailing
* "/" if the path is not the root and does not already have a "/"
* at the end.
*
* @param key s3 key or ""
* @return the with a trailing "/", or, if it is the root key, "",
*/
private String maybeAddTrailingSlash(String key) {
if (!key.isEmpty() && !key.endsWith("/")) {
return key + '/';
} else {
return key;
}
}
/**
* Convert a passed in exception (expected to be an IOE or AWS exception)
* into an IOException.
* @param ex exception caught
* @return the exception to throw in the failure handler.
*/
protected IOException convertToIOException(final Exception ex) {
if (ex instanceof IOException) {
return (IOException) ex;
} else if (ex instanceof SdkBaseException) {
return translateException("rename " + sourcePath + " to " + destPath,
sourcePath.toString(),
(SdkBaseException) ex);
} else {
// should never happen, but for completeness
return new IOException(ex);
}
}
}