com.google.cloud.storage.ChunkSegmenter Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of google-cloud-storage Show documentation
Show all versions of google-cloud-storage Show documentation
Java idiomatic client for Google Cloud Storage.
/*
* Copyright 2022 Google LLC
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.google.cloud.storage;
import com.google.cloud.storage.Crc32cValue.Crc32cLengthKnown;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.math.IntMath;
import com.google.protobuf.ByteString;
import java.math.RoundingMode;
import java.nio.ByteBuffer;
import java.util.ArrayDeque;
import java.util.Deque;
import org.checkerframework.checker.nullness.qual.Nullable;
/**
* When processing a "chunk" of data to write to GCS, we must turn our logical chunk into N
* segments. Each resulting segment will then become an individual message.
*/
final class ChunkSegmenter {
private final Hasher hasher;
private final ByteStringStrategy bss;
private final int maxSegmentSize;
private final int blockSize;
ChunkSegmenter(Hasher hasher, ByteStringStrategy bss, int maxSegmentSize) {
this(hasher, bss, maxSegmentSize, ByteSizeConstants._256KiB);
}
@VisibleForTesting
ChunkSegmenter(Hasher hasher, ByteStringStrategy bss, int maxSegmentSize, int blockSize) {
int mod = maxSegmentSize % blockSize;
Preconditions.checkArgument(
mod == 0,
"maxSegmentSize % blockSize == 0 (%s % %s == %s)",
maxSegmentSize,
blockSize,
mod);
this.hasher = hasher;
this.bss = bss;
this.maxSegmentSize = maxSegmentSize;
this.blockSize = blockSize;
}
Hasher getHasher() {
return hasher;
}
ChunkSegment[] segmentBuffer(ByteBuffer bb) {
return segmentBuffers(new ByteBuffer[] {bb}, 0, 1);
}
/**
* Given {@code bbs}, yield N segments, where each segment is at most {@code maxSegmentSize}
* bytes.
*
* An example:
*
*
* Given a "chunk" consisting of two ByteBuffers, A and B, where A contains 3 MiB and B contains 6.6 MiB
* A: 3 MiB B: 6.6 MiB
* |-----------------------------|-----------------------------------------------------------------|
*
* Produce segments
* S1: 2 MiB S2: 2 MiB S3: 2 MiB S4: 2 MiB S5: 1.6 MiB
* |-------------------|-------------------|-------------------|-------------------|---------------|
*
*
* Each segment will conditionally compute a crc32c value depending upon {@code hasher}.
*
* @see #segmentBuffers(ByteBuffer[], int, int)
*/
ChunkSegment[] segmentBuffers(ByteBuffer[] bbs) {
return segmentBuffers(bbs, 0, bbs.length);
}
ChunkSegment[] segmentBuffers(ByteBuffer[] bbs, int offset, int length) {
return segmentBuffers(bbs, offset, length, true);
}
ChunkSegment[] segmentBuffers(
ByteBuffer[] bbs, int offset, int length, boolean allowUnalignedBlocks) {
// turn this into a single branch, rather than multiple that would need to be checked each
// element of the iteration
if (allowUnalignedBlocks) {
return segmentWithUnaligned(bbs, offset, length);
} else {
return segmentWithoutUnaligned(bbs, offset, length);
}
}
private ChunkSegment[] segmentWithUnaligned(ByteBuffer[] bbs, int offset, int length) {
Deque data = new ArrayDeque<>();
for (int i = offset; i < length; i++) {
ByteBuffer buffer = bbs[i];
int remaining;
while ((remaining = buffer.remaining()) > 0) {
consumeBytes(data, remaining, buffer);
}
}
return data.toArray(new ChunkSegment[0]);
}
private ChunkSegment[] segmentWithoutUnaligned(ByteBuffer[] bbs, int offset, int length) {
Deque data = new ArrayDeque<>();
final long totalRemaining = Buffers.totalRemaining(bbs, offset, length);
long consumedSoFar = 0;
int currentBlockPending = blockSize;
for (int i = offset; i < length; i++) {
ByteBuffer buffer = bbs[i];
int remaining;
while ((remaining = buffer.remaining()) > 0) {
long overallRemaining = totalRemaining - consumedSoFar;
if (overallRemaining < blockSize && currentBlockPending == blockSize) {
break;
}
int numBytesConsumable;
if (remaining >= blockSize) {
int blockCount = IntMath.divide(remaining, blockSize, RoundingMode.DOWN);
numBytesConsumable = blockCount * blockSize;
} else if (currentBlockPending < blockSize) {
numBytesConsumable = currentBlockPending;
currentBlockPending = blockSize;
} else {
numBytesConsumable = remaining;
currentBlockPending = currentBlockPending - remaining;
}
if (numBytesConsumable <= 0) {
continue;
}
consumedSoFar += consumeBytes(data, numBytesConsumable, buffer);
}
}
return data.toArray(new ChunkSegment[0]);
}
private long consumeBytes(Deque data, int numBytesConsumable, ByteBuffer buffer) {
// either no chunk or most recent chunk is full, start a new one
ChunkSegment peekLast = data.peekLast();
if (peekLast == null || peekLast.b.size() == maxSegmentSize) {
int limit = Math.min(numBytesConsumable, maxSegmentSize);
ChunkSegment datum = newSegment(buffer, limit);
data.addLast(datum);
return limit;
} else {
ChunkSegment chunkSoFar = data.pollLast();
//noinspection ConstantConditions -- covered by peekLast check above
int limit = Math.min(numBytesConsumable, maxSegmentSize - chunkSoFar.b.size());
ChunkSegment datum = newSegment(buffer, limit);
ChunkSegment plus = chunkSoFar.concat(datum);
data.addLast(plus);
return limit;
}
}
private ChunkSegment newSegment(ByteBuffer buffer, int limit) {
final ByteBuffer slice = buffer.slice();
slice.limit(limit);
Crc32cLengthKnown hash = hasher.hash(slice::duplicate);
ByteString byteString = bss.apply(slice);
Buffers.position(buffer, buffer.position() + limit);
return new ChunkSegment(byteString, hash);
}
final class ChunkSegment {
private final ByteString b;
@Nullable private final Crc32cLengthKnown crc32c;
private final boolean onlyFullBlocks;
private ChunkSegment(ByteString b, @Nullable Crc32cLengthKnown crc32c) {
this.b = b;
this.onlyFullBlocks = b.size() % blockSize == 0;
this.crc32c = crc32c;
}
public ChunkSegment concat(ChunkSegment other) {
Crc32cLengthKnown newCrc = null;
if (crc32c != null && other.crc32c != null) {
newCrc = crc32c.concat(other.crc32c);
}
ByteString concat = b.concat(other.b);
return new ChunkSegment(concat, newCrc);
}
public ByteString getB() {
return b;
}
@Nullable
public Crc32cLengthKnown getCrc32c() {
return crc32c;
}
public boolean isOnlyFullBlocks() {
return onlyFullBlocks;
}
}
}