Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.execution.buffer;
import com.google.common.collect.ImmutableList;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.common.util.concurrent.SettableFuture;
import com.google.errorprone.annotations.Immutable;
import com.google.errorprone.annotations.ThreadSafe;
import com.google.errorprone.annotations.concurrent.GuardedBy;
import io.airlift.slice.Slice;
import io.airlift.units.DataSize;
import io.trino.execution.buffer.PipelinedOutputBuffers.OutputBufferId;
import io.trino.execution.buffer.SerializedPageReference.PagesReleasedListener;
import java.util.ArrayList;
import java.util.Collection;
import java.util.LinkedList;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Verify.verify;
import static com.google.common.util.concurrent.Futures.immediateFuture;
import static io.trino.execution.buffer.BufferResult.emptyResults;
import static io.trino.execution.buffer.SerializedPageReference.dereferencePages;
import static java.lang.Math.toIntExact;
import static java.util.Objects.requireNonNull;
@ThreadSafe
class ClientBuffer
{
private final String taskInstanceId;
private final OutputBufferId bufferId;
private final PagesReleasedListener onPagesReleased;
private final AtomicLong rowsAdded = new AtomicLong();
private final AtomicLong pagesAdded = new AtomicLong();
private final AtomicLong bufferedBytes = new AtomicLong();
@GuardedBy("this")
private final AtomicLong currentSequenceId = new AtomicLong();
@GuardedBy("this")
private final LinkedList pages = new LinkedList<>();
@GuardedBy("this")
private boolean noMorePages;
// destroyed is set when the client sends a DELETE to the buffer
// this is an acknowledgement that the client has observed the end of the buffer
@GuardedBy("this")
private final AtomicBoolean destroyed = new AtomicBoolean();
@GuardedBy("this")
private PendingRead pendingRead;
public ClientBuffer(String taskInstanceId, OutputBufferId bufferId, PagesReleasedListener onPagesReleased)
{
this.taskInstanceId = requireNonNull(taskInstanceId, "taskInstanceId is null");
this.bufferId = requireNonNull(bufferId, "bufferId is null");
this.onPagesReleased = requireNonNull(onPagesReleased, "onPagesReleased is null");
}
public PipelinedBufferInfo getInfo()
{
//
// NOTE: this code must be lock free so state machine updates do not hang
//
@SuppressWarnings("FieldAccessNotGuarded")
boolean destroyed = this.destroyed.get();
@SuppressWarnings("FieldAccessNotGuarded")
long sequenceId = this.currentSequenceId.get();
// if destroyed the buffered page count must be zero regardless of observation ordering in this lock free code
int bufferedPages = destroyed ? 0 : Math.max(toIntExact(pagesAdded.get() - sequenceId), 0);
return new PipelinedBufferInfo(bufferId, rowsAdded.get(), pagesAdded.get(), bufferedPages, bufferedBytes.get(), sequenceId, destroyed);
}
public boolean isDestroyed()
{
//
// NOTE: this code must be lock free so state machine updates do not hang
//
@SuppressWarnings("FieldAccessNotGuarded")
boolean destroyed = this.destroyed.get();
return destroyed;
}
public void destroy()
{
List removedPages;
PendingRead pendingRead;
synchronized (this) {
removedPages = ImmutableList.copyOf(pages);
pages.clear();
bufferedBytes.getAndSet(0);
noMorePages = true;
destroyed.set(true);
pendingRead = this.pendingRead;
this.pendingRead = null;
}
dereferencePages(removedPages, onPagesReleased);
if (pendingRead != null) {
pendingRead.completeResultFutureWithEmpty();
}
}
public void enqueuePages(Collection pages)
{
PendingRead pendingRead;
synchronized (this) {
// ignore pages after no more pages is set
// this can happen with limit queries
if (noMorePages) {
return;
}
addPages(pages);
pendingRead = this.pendingRead;
this.pendingRead = null;
}
// we just added a page, so process the pending read
if (pendingRead != null) {
processRead(pendingRead);
}
}
private synchronized void addPages(Collection pages)
{
long rowCount = 0;
long bytesAdded = 0;
int pageCount = 0;
for (SerializedPageReference page : pages) {
page.addReference();
pageCount++;
rowCount += page.getPositionCount();
bytesAdded += page.getRetainedSizeInBytes();
}
this.pages.addAll(pages);
rowsAdded.addAndGet(rowCount);
pagesAdded.addAndGet(pageCount);
bufferedBytes.addAndGet(bytesAdded);
}
public ListenableFuture getPages(long sequenceId, DataSize maxSize)
{
return getPages(sequenceId, maxSize, Optional.empty());
}
public ListenableFuture getPages(long sequenceId, DataSize maxSize, Optional pagesSupplier)
{
// acknowledge pages first, out side of locks to not trigger callbacks while holding the lock
acknowledgePages(sequenceId);
// attempt to load some data before processing the read
pagesSupplier.ifPresent(supplier -> loadPagesIfNecessary(supplier, maxSize));
PendingRead oldPendingRead = null;
try {
synchronized (this) {
// save off the old pending read so we can abort it out side of the lock
oldPendingRead = this.pendingRead;
this.pendingRead = null;
// Return results immediately if we have data, there will be no more data, or this is
// an out of order request
if (!pages.isEmpty() || noMorePages || sequenceId != currentSequenceId.get()) {
return immediateFuture(processRead(sequenceId, maxSize));
}
// otherwise, wait for more data to arrive
pendingRead = new PendingRead(taskInstanceId, sequenceId, maxSize);
return pendingRead.getResultFuture();
}
}
finally {
if (oldPendingRead != null) {
// Each buffer is private to a single client, and each client should only have one outstanding
// read. Therefore, we abort the existing read since it was most likely abandoned by the client.
oldPendingRead.completeResultFutureWithEmpty();
}
}
}
public void setNoMorePages()
{
PendingRead pendingRead;
synchronized (this) {
// ignore duplicate calls
if (noMorePages) {
return;
}
noMorePages = true;
pendingRead = this.pendingRead;
this.pendingRead = null;
}
// there will be no more pages, so process the pending read
if (pendingRead != null) {
processRead(pendingRead);
}
}
public void loadPagesIfNecessary(PagesSupplier pagesSupplier)
{
requireNonNull(pagesSupplier, "pagesSupplier is null");
// Get the max size from the current pending read, which may not be the
// same pending read instance by the time pages are loaded but this is
// safe since the size is rechecked before returning pages.
DataSize maxSize;
synchronized (this) {
if (pendingRead == null) {
return;
}
maxSize = pendingRead.getMaxSize();
}
boolean dataAddedOrNoMorePages = loadPagesIfNecessary(pagesSupplier, maxSize);
if (dataAddedOrNoMorePages) {
PendingRead pendingRead;
synchronized (this) {
pendingRead = this.pendingRead;
}
if (pendingRead != null) {
processRead(pendingRead);
}
}
}
/**
* If there no data, attempt to load some from the pages supplier.
*/
private boolean loadPagesIfNecessary(PagesSupplier pagesSupplier, DataSize maxSize)
{
assertNotHoldsLock("Cannot load pages while holding a lock on this");
boolean dataAddedOrNoMorePages;
List pageReferences;
synchronized (this) {
if (noMorePages) {
return false;
}
if (!pages.isEmpty()) {
return false;
}
// The page supplier has incremented the page reference count, and addPages below also increments
// the reference count, so we need to drop the page supplier reference. The call dereferencePage
// is performed outside of synchronized to avoid making a callback while holding a lock.
pageReferences = pagesSupplier.getPages(maxSize);
// add the pages to this buffer, which will increase the reference count
addPages(pageReferences);
// check for no more pages
if (!pagesSupplier.mayHaveMorePages()) {
noMorePages = true;
}
dataAddedOrNoMorePages = !pageReferences.isEmpty() || noMorePages;
}
// sent pages will have an initial reference count, so drop it
dereferencePages(pageReferences, onPagesReleased);
return dataAddedOrNoMorePages;
}
private void processRead(PendingRead pendingRead)
{
assertNotHoldsLock("Cannot process pending read while holding a lock on this");
if (pendingRead.getResultFuture().isDone()) {
return;
}
BufferResult bufferResult = processRead(pendingRead.getSequenceId(), pendingRead.getMaxSize());
pendingRead.getResultFuture().set(bufferResult);
}
/**
* @return a result with at least one page if we have pages in buffer, empty result otherwise
*/
private synchronized BufferResult processRead(long sequenceId, DataSize maxSize)
{
// When pages are added to the partition buffer they are effectively
// assigned an id starting from zero. When a read is processed, the
// "token" is the id of the page to start the read from, so the first
// step of the read is to acknowledge, and drop all pages up to the
// provided sequenceId. Then pages starting from the sequenceId are
// returned with the sequenceId of the next page to read.
//
// Since the buffer API is asynchronous there are a number of problems
// that can occur our of order request (typically from retries due to
// request failures):
// - Request to read pages that have already been acknowledged.
// Simply, send an result with no pages and the requested sequenceId,
// and since the client has already acknowledge the pages, it will
// ignore the out of order response.
// - Request to read after the buffer has been destroyed. When the
// buffer is destroyed all pages are dropped, so the read sequenceId
// appears to be off the end of the queue. Normally a read past the
// end of the queue would be an error, but this specific case is
// detected and handled. The client is sent an empty response with
// the finished flag set and next token is the max acknowledged page
// when the buffer is destroyed.
//
// if request is for pages before the current position, just return an empty result
if (sequenceId < currentSequenceId.get()) {
return emptyResults(taskInstanceId, sequenceId, false);
}
// if this buffer is finished, notify the client of this, so the client
// will destroy this buffer
if (pages.isEmpty() && noMorePages) {
return emptyResults(taskInstanceId, currentSequenceId.get(), true);
}
// if request is for pages after the current position, there is a bug somewhere
// a read call is always proceeded by acknowledge pages, which
// will advance the sequence id to at least the request position, unless
// the buffer is destroyed, and in that case the buffer will be empty with
// no more pages set, which is checked above
verify(sequenceId == currentSequenceId.get(), "Invalid sequence id");
// read the new pages
long maxBytes = maxSize.toBytes();
List result = new ArrayList<>();
long bytes = 0;
for (SerializedPageReference page : pages) {
bytes += page.getRetainedSizeInBytes();
// break (and don't add) if this page would exceed the limit
if (!result.isEmpty() && bytes > maxBytes) {
break;
}
result.add(page.getSerializedPage());
}
return new BufferResult(taskInstanceId, sequenceId, sequenceId + result.size(), false, result);
}
/**
* Drops pages up to the specified sequence id
*/
public void acknowledgePages(long sequenceId)
{
checkArgument(sequenceId >= 0, "Invalid sequence id");
// Fast path early-return without synchronizing
if (destroyed.get() || sequenceId < currentSequenceId.get()) {
return;
}
ImmutableList.Builder removedPages;
synchronized (this) {
if (destroyed.get()) {
return;
}
// if pages have already been acknowledged, just ignore this
long oldCurrentSequenceId = currentSequenceId.get();
if (sequenceId < oldCurrentSequenceId) {
return;
}
int pagesToRemove = toIntExact(sequenceId - oldCurrentSequenceId);
checkArgument(pagesToRemove <= pages.size(), "Invalid sequence id");
removedPages = ImmutableList.builderWithExpectedSize(pagesToRemove);
long bytesRemoved = 0;
for (int i = 0; i < pagesToRemove; i++) {
SerializedPageReference removedPage = pages.removeFirst();
removedPages.add(removedPage);
bytesRemoved += removedPage.getRetainedSizeInBytes();
}
// update current sequence id
verify(currentSequenceId.compareAndSet(oldCurrentSequenceId, oldCurrentSequenceId + pagesToRemove));
// update memory tracking
verify(bufferedBytes.addAndGet(-bytesRemoved) >= 0);
}
// Dereference pages outside of synchronized block to trigger callbacks
dereferencePages(removedPages.build(), onPagesReleased);
}
@SuppressWarnings("checkstyle:IllegalToken")
private void assertNotHoldsLock(String message)
{
assert !Thread.holdsLock(this) : message;
}
@Override
public String toString()
{
@SuppressWarnings("FieldAccessNotGuarded")
long sequenceId = currentSequenceId.get();
@SuppressWarnings("FieldAccessNotGuarded")
boolean destroyed = this.destroyed.get();
return toStringHelper(this)
.add("bufferId", bufferId)
.add("sequenceId", sequenceId)
.add("destroyed", destroyed)
.toString();
}
@Immutable
private static class PendingRead
{
private final String taskInstanceId;
private final long sequenceId;
private final DataSize maxSize;
private final SettableFuture resultFuture = SettableFuture.create();
private PendingRead(String taskInstanceId, long sequenceId, DataSize maxSize)
{
this.taskInstanceId = requireNonNull(taskInstanceId, "taskInstanceId is null");
this.sequenceId = sequenceId;
this.maxSize = maxSize;
}
public long getSequenceId()
{
return sequenceId;
}
public DataSize getMaxSize()
{
return maxSize;
}
public SettableFuture getResultFuture()
{
return resultFuture;
}
public void completeResultFutureWithEmpty()
{
resultFuture.set(emptyResults(taskInstanceId, sequenceId, false));
}
}
public interface PagesSupplier
{
/**
* Gets pages up to the specified size limit or a single page that exceeds the size limit.
*/
List getPages(DataSize maxSize);
/**
* @return true if more pages may be produced; false otherwise
*/
boolean mayHaveMorePages();
}
}