![JAR search and dependency download from the Maven repository](/logo.png)
org.apache.iceberg.ksyun.ks3.Ks3MultipartOutputStream Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of iceberg-ksyun Show documentation
Show all versions of iceberg-ksyun Show documentation
apache iceberg support ks3
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.ksyun.ks3;
import com.ksyun.ks3.dto.InitiateMultipartUploadResult;
import com.ksyun.ks3.dto.ObjectMetadata;
import com.ksyun.ks3.dto.PartETag;
import com.ksyun.ks3.service.Ks3;
import com.ksyun.ks3.service.request.CompleteMultipartUploadRequest;
import com.ksyun.ks3.service.request.InitiateMultipartUploadRequest;
import com.ksyun.ks3.service.request.PutObjectRequest;
import com.ksyun.ks3.service.request.UploadPartRequest;
import com.ksyun.ks3.utils.BinaryUtils;
import org.apache.commons.codec.digest.DigestUtils;
import org.apache.iceberg.io.PositionOutputStream;
import org.apache.iceberg.ksyun.KsyunProperties;
import org.apache.iceberg.relocated.com.google.common.base.Joiner;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.base.Predicates;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.relocated.com.google.common.collect.Maps;
import org.apache.iceberg.relocated.com.google.common.io.CountingOutputStream;
import org.apache.iceberg.relocated.com.google.common.util.concurrent.MoreExecutors;
import org.apache.iceberg.relocated.com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.iceberg.util.Tasks;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.io.UncheckedIOException;
import java.nio.file.Files;
import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.stream.Collectors;
class Ks3MultipartOutputStream extends PositionOutputStream {
private static final Logger LOG = LoggerFactory.getLogger(Ks3MultipartOutputStream.class);
private static final String digestAlgorithm = "MD5";
private static volatile ExecutorService executorService;
private final StackTraceElement[] createStack;
private final Ks3 client;
private final Ks3URI uri;
private final KsyunProperties ksyunProperties;
// private final Set writeTags;
private CountingOutputStream stream;
private final List stagingFiles = Lists.newArrayList();
private final File stagingDirectory;
private File currentStagingFile;
private String multipartUploadId;
private final Map> multiPartMap = Maps.newHashMap();
// private final Map> multiPartMap = Maps.newHashMap();
private final int multiPartSize;
private final int multiPartThresholdSize;
private final boolean isChecksumEnabled;
private final MessageDigest completeMessageDigest;
private MessageDigest currentPartMessageDigest;
private long pos = 0;
private boolean closed = false;
@SuppressWarnings("StaticAssignmentInConstructor")
Ks3MultipartOutputStream(Ks3 client, Ks3URI uri, KsyunProperties ksyunProperties)
throws IOException {
if (executorService == null) {
synchronized (Ks3MultipartOutputStream.class) {
if (executorService == null) {
executorService = MoreExecutors.getExitingExecutorService(
(ThreadPoolExecutor) Executors.newFixedThreadPool(
ksyunProperties.multipartUploadThreads(),
new ThreadFactoryBuilder()
.setDaemon(true)
.setNameFormat("iceberg-ks3fileio-upload-%d")
.build()));
}
}
}
this.client = client;
this.uri = uri;
this.ksyunProperties = ksyunProperties;
// this.writeTags = ksyunProperties.s3WriteTags();
this.createStack = Thread.currentThread().getStackTrace();
this.multiPartSize = ksyunProperties.s3FileIoMultiPartSize();
this.multiPartThresholdSize = (int) (multiPartSize * ksyunProperties.s3FileIOMultipartThresholdFactor());
this.stagingDirectory = new File(ksyunProperties.s3fileIoStagingDirectory());
this.isChecksumEnabled = ksyunProperties.isS3ChecksumEnabled();
// this.isChecksumEnabled = false;
try {
this.completeMessageDigest = isChecksumEnabled ? MessageDigest.getInstance(digestAlgorithm) : null;
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException("Failed to create message digest needed for s3 checksum checks", e);
}
// System.out.println("-------- multiPartSize = " + multiPartSize +
// ", multiPartThresholdSize = " + multiPartThresholdSize +
// ", stagingDirectory = " + stagingDirectory);
newStream();
}
@Override
public long getPos() {
return pos;
}
@Override
public void flush() throws IOException {
stream.flush();
}
@Override
public void write(int b) throws IOException {
if (stream.getCount() >= multiPartSize) {
newStream();
uploadParts();
}
stream.write(b);
pos += 1;
// switch to multipart upload
if (multipartUploadId == null && pos >= multiPartThresholdSize) {
initializeMultiPartUpload();
uploadParts();
}
}
@Override
public void write(byte[] b, int off, int len) throws IOException {
int remaining = len;
int relativeOffset = off;
// Write the remainder of the part size to the staging file
// and continue to write new staging files if the write is
// larger than the part size.
while (stream.getCount() + remaining > multiPartSize) {
int writeSize = multiPartSize - (int) stream.getCount();
stream.write(b, relativeOffset, writeSize);
remaining -= writeSize;
relativeOffset += writeSize;
newStream();
uploadParts();
}
stream.write(b, relativeOffset, remaining);
pos += len;
// switch to multipart upload
if (multipartUploadId == null && pos >= multiPartThresholdSize) {
initializeMultiPartUpload();
uploadParts();
}
}
private void newStream() throws IOException {
if (stream != null) {
stream.close();
}
createStagingDirectoryIfNotExists();
currentStagingFile = File.createTempFile("ks3fileio-", ".tmp", stagingDirectory);
currentStagingFile.deleteOnExit();
try {
currentPartMessageDigest = isChecksumEnabled ? MessageDigest.getInstance(digestAlgorithm) : null;
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException("Failed to create message digest needed for s3 checksum checks.", e);
}
stagingFiles.add(new FileAndDigest(currentStagingFile, currentPartMessageDigest));
int bufferSize = ksyunProperties.outputBufferSize();
if (isChecksumEnabled) {
DigestOutputStream digestOutputStream;
// if switched over to multipart threshold already, no need to update complete message digest
if (multipartUploadId != null) {
digestOutputStream = new DigestOutputStream(
new BufferedOutputStream(new FileOutputStream(currentStagingFile), bufferSize),
currentPartMessageDigest);
} else {
digestOutputStream = new DigestOutputStream(
new DigestOutputStream(
new BufferedOutputStream(new FileOutputStream(currentStagingFile), bufferSize),
currentPartMessageDigest),
completeMessageDigest);
}
stream = new CountingOutputStream(digestOutputStream);
} else {
stream = new CountingOutputStream(new BufferedOutputStream(new FileOutputStream(currentStagingFile), bufferSize));
}
}
@Override
public void close() throws IOException {
if (closed) {
return;
}
super.close();
closed = true;
try {
stream.close();
completeUploads();
} finally {
cleanUpStagingFiles();
}
}
private void initializeMultiPartUpload() {
// System.out.println("------ initializeMultiPartUpload uri.name() : " + uri.name());
InitiateMultipartUploadRequest request = new InitiateMultipartUploadRequest(uri.bucket(), uri.name());
InitiateMultipartUploadResult result = client.initiateMultipartUpload(request);
multipartUploadId = result.getUploadId();
}
@SuppressWarnings("checkstyle:LocalVariableName")
private void uploadParts() {
// exit if multipart has not been initiated
if (multipartUploadId == null) {
return;
}
stagingFiles.stream()
// do not upload the file currently being written
.filter(f -> closed || !f.file().equals(currentStagingFile))
// do not upload any files that have already been processed
.filter(Predicates.not(f -> multiPartMap.containsKey(f.file())))
.forEach(fileAndDigest -> {
File f = fileAndDigest.file();
UploadPartRequest uploadPartRequest = new UploadPartRequest(uri.bucket(), uri.name(),
multipartUploadId, stagingFiles.indexOf(fileAndDigest) + 1, f, multiPartSize, 0L);
if(fileAndDigest.hasDigest()){
uploadPartRequest.setContentMD5(BinaryUtils.toBase64(fileAndDigest.digest()));
}
CompletableFuture future = CompletableFuture.supplyAsync(
() -> {
PartETag partETag = client.uploadPart(uploadPartRequest);
return partETag;
},
executorService
).whenComplete((result, thrown) -> {
try {
Files.deleteIfExists(f.toPath());
} catch (IOException e) {
LOG.warn("Failed to delete staging file: {}", f, e);
}
if (thrown != null) {
LOG.error("Failed to upload part: {}", uploadPartRequest, thrown);
abortUpload();
}
});
multiPartMap.put(f, future);
});
}
private void completeMultiPartUpload() {
Preconditions.checkState(closed, "Complete upload called on open stream: " + uri);
List completedParts =
multiPartMap.values()
.stream()
.map(CompletableFuture::join)
.sorted(Comparator.comparing(PartETag::getPartNumber))
.collect(Collectors.toList());
CompleteMultipartUploadRequest request = new CompleteMultipartUploadRequest(uri.bucket(), uri.name(), multipartUploadId, completedParts);
Tasks.foreach(request)
.noRetry()
.onFailure((r, thrown) -> {
LOG.error("Failed to complete multipart upload request: {}", r, thrown);
abortUpload();
})
.throwFailureWhenFinished()
.run(client::completeMultipartUpload);
}
private void abortUpload() {
if (multipartUploadId != null) {
try {
client.abortMultipartUpload(uri.bucket(), uri.name(), multipartUploadId);
} finally {
cleanUpStagingFiles();
}
}
}
private void cleanUpStagingFiles() {
Tasks.foreach(stagingFiles.stream().map(FileAndDigest::file))
.suppressFailureWhenFinished()
.onFailure((file, thrown) -> LOG.warn("Failed to delete staging file: {}", file, thrown))
.run(File::delete);
}
private void completeUploads() {
if (multipartUploadId == null) {
InputStream contentStream = new BufferedInputStream(stagingFiles.stream()
.map(FileAndDigest::file)
.map(Ks3MultipartOutputStream::uncheckedInputStream)
.reduce(SequenceInputStream::new)
.orElseGet(() -> new ByteArrayInputStream(new byte[0])));
ObjectMetadata metadata = new ObjectMetadata();
if (isChecksumEnabled) {
metadata.setContentMD5(BinaryUtils.toBase64(completeMessageDigest.digest()));
}
PutObjectRequest request = new PutObjectRequest(uri.bucket(), uri.name(), contentStream, metadata);
client.putObject(request);
} else {
uploadParts();
completeMultiPartUpload();
}
}
private static InputStream uncheckedInputStream(File file) {
try {
return new FileInputStream(file);
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
private void createStagingDirectoryIfNotExists() throws IOException, SecurityException {
if (!stagingDirectory.exists()) {
LOG.info("Staging directory does not exist, trying to create one: {}",
stagingDirectory.getAbsolutePath());
boolean createdStagingDirectory = stagingDirectory.mkdirs();
if (createdStagingDirectory) {
LOG.info("Successfully created staging directory: {}", stagingDirectory.getAbsolutePath());
} else {
if (stagingDirectory.exists()) {
LOG.info("Successfully created staging directory by another process: {}",
stagingDirectory.getAbsolutePath());
} else {
throw new IOException(
"Failed to create staging directory due to some unknown reason: " + stagingDirectory
.getAbsolutePath());
}
}
}
}
@SuppressWarnings("checkstyle:NoFinalizer")
@Override
protected void finalize() throws Throwable {
super.finalize();
if (!closed) {
close(); // releasing resources is more important than printing the warning
String trace = Joiner.on("\n\t").join(
Arrays.copyOfRange(createStack, 1, createStack.length));
LOG.warn("Unclosed output stream created by:\n\t{}", trace);
}
}
private static class FileAndDigest {
private final File file;
private final MessageDigest digest;
FileAndDigest(File file, MessageDigest digest) {
this.file = file;
this.digest = digest;
}
File file() {
return file;
}
byte[] digest() {
return digest.digest();
}
public boolean hasDigest() {
return digest != null;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy