All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.github.nejckorasa.s3.unzip.strategy.NoSplitUnzipStrategy Maven / Gradle / Ivy

package io.github.nejckorasa.s3.unzip.strategy;

import com.amazonaws.services.s3.AmazonS3;
import io.github.nejckorasa.s3.unzip.S3UnzipException;
import io.github.nejckorasa.s3.unzip.S3ZipFile;
import io.github.nejckorasa.s3.upload.S3MultipartUpload;
import lombok.*;
import lombok.extern.slf4j.Slf4j;

import java.io.ByteArrayOutputStream;

import static com.amazonaws.services.s3.internal.Constants.MB;

/**
 * Unzips and uploads a file without splitting (sharding) - it creates a 1:1 mapping between zipped and unzipped file
 *
 * 

File is read in bytes. This strategy should ideally be used for smaller files. * *

Utilizes multipart upload - unzipping is achieved without keeping all data in memory or writing to disk. */ @Slf4j @NoArgsConstructor @AllArgsConstructor(access = AccessLevel.PRIVATE) public class NoSplitUnzipStrategy implements UnzipStrategy { /** * File (shard) size limit, i.e. 100 MB will split the source zip entry into files with size limit of 100 MB * * @see S3MultipartUpload */ @NonNull @With private int uploadPartBytesLimit = 20 * MB; /** * Configuration for S3 multipart upload. Configures {@link S3MultipartUpload}, */ @NonNull private S3MultipartUpload.Config config = S3MultipartUpload.Config.DEFAULT; /** * Creates NoSplitUnzipStrategy with provided configuration for {@link S3MultipartUpload} * * @param config Multipart upload configuration for {@link S3MultipartUpload} */ public NoSplitUnzipStrategy(@NonNull S3MultipartUpload.Config config) { this.config = config; } @Override public void unzip(S3ZipFile zipFile, AmazonS3 s3Client) { String filename = zipFile.filename(); long compressedSize = zipFile.compressedSize(); long size = zipFile.size(); String key = zipFile.key(); var s3MultipartUpload = new S3MultipartUpload(zipFile.getBucketName(), key, s3Client, config); s3MultipartUpload.initialize(); log.info("Unzipping {}, compressed: {} bytes, extracted: {} bytes to {}", filename, compressedSize, size, key); try { int bytesRead; long allBytesRead = 0; long uploadPartBytes = 0; long partNumber = 0; byte[] data = new byte[uploadPartBytesLimit]; var outputStream = new ByteArrayOutputStream(); while ((bytesRead = zipFile.getInputStream().read(data, 0, data.length)) != -1) { outputStream.write(data, 0, bytesRead); if (uploadPartBytes < uploadPartBytesLimit) { uploadPartBytes += bytesRead; continue; } partNumber += 1; allBytesRead += bytesRead; log.debug("Uploading part [{}] for file: {} - Read {} bytes out of {} bytes", partNumber, filename, allBytesRead, size); s3MultipartUpload.uploadPart(outputStream.toByteArray()); outputStream.reset(); uploadPartBytes = 0; } s3MultipartUpload.uploadFinalPart(outputStream.toByteArray()); log.info("Unzipped and uploaded file: {} in {} parts", filename, partNumber); } catch (Throwable t) { s3MultipartUpload.abort(); throw new S3UnzipException("Failed to unzip " + filename, t); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy