All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.fs.impl.FileSystemMultipartUploader Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.fs.impl;

import java.io.IOException;
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.stream.Collectors;

import org.apache.hadoop.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import org.apache.commons.compress.utils.IOUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.fs.BBPartHandle;
import org.apache.hadoop.fs.BBUploadHandle;
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FSDataOutputStreamBuilder;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.InternalOperations;
import org.apache.hadoop.fs.Options;
import org.apache.hadoop.fs.PartHandle;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.PathHandle;
import org.apache.hadoop.fs.UploadHandle;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.util.functional.FutureIO;

import static org.apache.hadoop.fs.Path.mergePaths;
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;

/**
 * A MultipartUploader that uses the basic FileSystem commands.
 * This is done in three stages:
 * 
    *
  • Init - create a temp {@code _multipart} directory.
  • *
  • PutPart - copying the individual parts of the file to the temp * directory.
  • *
  • Complete - use {@link FileSystem#concat} to merge the files; * and then delete the temp directory.
  • *
*/ @InterfaceAudience.Private @InterfaceStability.Unstable public class FileSystemMultipartUploader extends AbstractMultipartUploader { private static final Logger LOG = LoggerFactory.getLogger( FileSystemMultipartUploader.class); private final FileSystem fs; private final FileSystemMultipartUploaderBuilder builder; private final FsPermission permission; private final long blockSize; private final Options.ChecksumOpt checksumOpt; public FileSystemMultipartUploader( final FileSystemMultipartUploaderBuilder builder, FileSystem fs) { super(builder.getPath()); this.builder = builder; this.fs = fs; blockSize = builder.getBlockSize(); checksumOpt = builder.getChecksumOpt(); permission = builder.getPermission(); } @Override public CompletableFuture startUpload(Path filePath) throws IOException { checkPath(filePath); return FutureIO.eval(() -> { Path collectorPath = createCollectorPath(filePath); fs.mkdirs(collectorPath, FsPermission.getDirDefault()); ByteBuffer byteBuffer = ByteBuffer.wrap( collectorPath.toString().getBytes(StandardCharsets.UTF_8)); return BBUploadHandle.from(byteBuffer); }); } @Override public CompletableFuture putPart(UploadHandle uploadId, int partNumber, Path filePath, InputStream inputStream, long lengthInBytes) throws IOException { checkPutArguments(filePath, inputStream, partNumber, uploadId, lengthInBytes); return FutureIO.eval(() -> innerPutPart(filePath, inputStream, partNumber, uploadId, lengthInBytes)); } private PartHandle innerPutPart(Path filePath, InputStream inputStream, int partNumber, UploadHandle uploadId, long lengthInBytes) throws IOException { byte[] uploadIdByteArray = uploadId.toByteArray(); checkUploadId(uploadIdByteArray); Path collectorPath = new Path(new String(uploadIdByteArray, 0, uploadIdByteArray.length, StandardCharsets.UTF_8)); Path partPath = mergePaths(collectorPath, mergePaths(new Path(Path.SEPARATOR), new Path(partNumber + ".part"))); final FSDataOutputStreamBuilder fileBuilder = fs.createFile(partPath); if (checksumOpt != null) { fileBuilder.checksumOpt(checksumOpt); } if (permission != null) { fileBuilder.permission(permission); } try (FSDataOutputStream fsDataOutputStream = fileBuilder.blockSize(blockSize).build()) { IOUtils.copy(inputStream, fsDataOutputStream, this.builder.getBufferSize()); } finally { cleanupWithLogger(LOG, inputStream); } return BBPartHandle.from(ByteBuffer.wrap( partPath.toString().getBytes(StandardCharsets.UTF_8))); } private Path createCollectorPath(Path filePath) { String uuid = UUID.randomUUID().toString(); return mergePaths(filePath.getParent(), mergePaths(new Path(filePath.getName().split("\\.")[0]), mergePaths(new Path("_multipart_" + uuid), new Path(Path.SEPARATOR)))); } private PathHandle getPathHandle(Path filePath) throws IOException { FileStatus status = fs.getFileStatus(filePath); return fs.getPathHandle(status); } private long totalPartsLen(List partHandles) throws IOException { long totalLen = 0; for (Path p : partHandles) { totalLen += fs.getFileStatus(p).getLen(); } return totalLen; } @Override public CompletableFuture complete( UploadHandle uploadId, Path filePath, Map handleMap) throws IOException { checkPath(filePath); return FutureIO.eval(() -> innerComplete(uploadId, filePath, handleMap)); } /** * The upload complete operation. * @param multipartUploadId the ID of the upload * @param filePath path * @param handleMap map of handles * @return the path handle * @throws IOException failure */ private PathHandle innerComplete( UploadHandle multipartUploadId, Path filePath, Map handleMap) throws IOException { checkPath(filePath); checkUploadId(multipartUploadId.toByteArray()); checkPartHandles(handleMap); List> handles = new ArrayList<>(handleMap.entrySet()); handles.sort(Comparator.comparingInt(Map.Entry::getKey)); List partHandles = handles .stream() .map(pair -> { byte[] byteArray = pair.getValue().toByteArray(); return new Path(new String(byteArray, 0, byteArray.length, StandardCharsets.UTF_8)); }) .collect(Collectors.toList()); int count = partHandles.size(); // built up to identify duplicates -if the size of this set is // below that of the number of parts, then there's a duplicate entry. Set values = new HashSet<>(count); values.addAll(partHandles); Preconditions.checkArgument(values.size() == count, "Duplicate PartHandles"); byte[] uploadIdByteArray = multipartUploadId.toByteArray(); Path collectorPath = new Path(new String(uploadIdByteArray, 0, uploadIdByteArray.length, StandardCharsets.UTF_8)); boolean emptyFile = totalPartsLen(partHandles) == 0; if (emptyFile) { fs.create(filePath).close(); } else { Path filePathInsideCollector = mergePaths(collectorPath, new Path(Path.SEPARATOR + filePath.getName())); fs.create(filePathInsideCollector).close(); fs.concat(filePathInsideCollector, partHandles.toArray(new Path[handles.size()])); new InternalOperations() .rename(fs, filePathInsideCollector, filePath, Options.Rename.OVERWRITE); } fs.delete(collectorPath, true); return getPathHandle(filePath); } @Override public CompletableFuture abort(UploadHandle uploadId, Path filePath) throws IOException { checkPath(filePath); byte[] uploadIdByteArray = uploadId.toByteArray(); checkUploadId(uploadIdByteArray); Path collectorPath = new Path(new String(uploadIdByteArray, 0, uploadIdByteArray.length, StandardCharsets.UTF_8)); return FutureIO.eval(() -> { // force a check for a file existing; raises FNFE if not found fs.getFileStatus(collectorPath); fs.delete(collectorPath, true); return null; }); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy