io.trino.plugin.thrift.ThriftIndexPageSource Maven / Gradle / Ivy
The newest version!
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.thrift;
import com.google.common.collect.ImmutableList;
import com.google.common.util.concurrent.ListenableFuture;
import io.airlift.drift.client.DriftClient;
import io.trino.plugin.thrift.api.TrinoThriftId;
import io.trino.plugin.thrift.api.TrinoThriftNullableToken;
import io.trino.plugin.thrift.api.TrinoThriftPageResult;
import io.trino.plugin.thrift.api.TrinoThriftSchemaTableName;
import io.trino.plugin.thrift.api.TrinoThriftService;
import io.trino.plugin.thrift.api.TrinoThriftSplit;
import io.trino.plugin.thrift.api.TrinoThriftSplitBatch;
import io.trino.plugin.thrift.api.TrinoThriftTupleDomain;
import io.trino.spi.Page;
import io.trino.spi.connector.ColumnHandle;
import io.trino.spi.connector.ConnectorPageSource;
import io.trino.spi.connector.RecordSet;
import io.trino.spi.type.Type;
import jakarta.annotation.Nullable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Queue;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.AtomicLong;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.util.concurrent.Futures.nonCancellationPropagating;
import static com.google.common.util.concurrent.MoreExecutors.directExecutor;
import static io.airlift.concurrent.MoreFutures.getFutureValue;
import static io.airlift.concurrent.MoreFutures.toCompletableFuture;
import static io.airlift.concurrent.MoreFutures.whenAnyComplete;
import static io.trino.plugin.thrift.api.TrinoThriftPageResult.fromRecordSet;
import static io.trino.plugin.thrift.util.ThriftExceptions.catchingThriftException;
import static io.trino.plugin.thrift.util.TupleDomainConversion.tupleDomainToThriftTupleDomain;
import static java.lang.Math.min;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.joining;
public class ThriftIndexPageSource
implements ConnectorPageSource
{
private static final int MAX_SPLIT_COUNT = 10_000_000;
private final DriftClient client;
private final Map thriftHeaders;
private final TrinoThriftSchemaTableName schemaTableName;
private final List lookupColumnNames;
private final List outputColumnNames;
private final List outputColumnTypes;
private final TrinoThriftTupleDomain outputConstraint;
private final TrinoThriftPageResult keys;
private final long maxBytesPerResponse;
private final int lookupRequestsConcurrency;
private final AtomicLong readTimeNanos = new AtomicLong(0);
private long completedBytes;
private CompletableFuture> statusFuture;
private ListenableFuture splitFuture;
private ListenableFuture dataSignalFuture;
private final List splits = new ArrayList<>();
private final Queue> dataRequests = new LinkedList<>();
private final Map, RunningSplitContext> contexts;
private final ThriftConnectorStats stats;
private int splitIndex;
private boolean haveSplits;
private boolean finished;
public ThriftIndexPageSource(
DriftClient client,
Map thriftHeaders,
ThriftConnectorStats stats,
ThriftIndexHandle indexHandle,
List lookupColumns,
List outputColumns,
RecordSet keys,
long maxBytesPerResponse,
int lookupRequestsConcurrency)
{
this.client = requireNonNull(client, "client is null");
this.thriftHeaders = requireNonNull(thriftHeaders, "thriftHeaders is null");
this.stats = requireNonNull(stats, "stats is null");
requireNonNull(indexHandle, "indexHandle is null");
this.schemaTableName = new TrinoThriftSchemaTableName(indexHandle.getSchemaTableName());
this.outputConstraint = tupleDomainToThriftTupleDomain(indexHandle.getTupleDomain());
requireNonNull(lookupColumns, "lookupColumns is null");
this.lookupColumnNames = lookupColumns.stream()
.map(ThriftColumnHandle.class::cast)
.map(ThriftColumnHandle::getColumnName)
.collect(toImmutableList());
requireNonNull(outputColumns, "outputColumns is null");
ImmutableList.Builder outputColumnNames = ImmutableList.builder();
ImmutableList.Builder outputColumnTypes = ImmutableList.builder();
for (ColumnHandle columnHandle : outputColumns) {
ThriftColumnHandle thriftColumnHandle = (ThriftColumnHandle) columnHandle;
outputColumnNames.add(thriftColumnHandle.getColumnName());
outputColumnTypes.add(thriftColumnHandle.getColumnType());
}
this.outputColumnNames = outputColumnNames.build();
this.outputColumnTypes = outputColumnTypes.build();
this.keys = fromRecordSet(requireNonNull(keys, "keys is null"));
checkArgument(maxBytesPerResponse > 0, "maxBytesPerResponse is zero or negative");
this.maxBytesPerResponse = maxBytesPerResponse;
checkArgument(lookupRequestsConcurrency >= 1, "lookupRequestsConcurrency is less than one");
this.lookupRequestsConcurrency = lookupRequestsConcurrency;
this.contexts = new HashMap<>(lookupRequestsConcurrency);
}
@Override
public long getCompletedBytes()
{
return completedBytes;
}
@Override
public long getReadTimeNanos()
{
return readTimeNanos.get();
}
@Override
public long getMemoryUsage()
{
return 0;
}
@Override
public CompletableFuture> isBlocked()
{
return statusFuture == null ? NOT_BLOCKED : statusFuture;
}
@Override
public boolean isFinished()
{
return finished;
}
@Override
public Page getNextPage()
{
if (finished) {
return null;
}
if (!loadAllSplits()) {
return null;
}
// check if any data requests were started
if (dataSignalFuture == null) {
// no data requests were started, start a number of initial requests
checkState(contexts.isEmpty() && dataRequests.isEmpty(), "some splits are already started");
if (splits.isEmpty()) {
// all done: no splits
finished = true;
return null;
}
for (int i = 0; i < min(lookupRequestsConcurrency, splits.size()); i++) {
startDataFetchForNextSplit();
}
updateSignalAndStatusFutures();
}
// check if any data request is finished
if (!dataSignalFuture.isDone()) {
// not finished yet
return null;
}
// at least one of data requests completed
ListenableFuture resultFuture = getAndRemoveNextCompletedRequest();
RunningSplitContext resultContext = contexts.remove(resultFuture);
checkState(resultContext != null, "no associated context for the request");
TrinoThriftPageResult pageResult = getFutureValue(resultFuture);
Page page = pageResult.toPage(outputColumnTypes);
if (page != null) {
long pageSize = page.getSizeInBytes();
completedBytes += pageSize;
stats.addIndexPageSize(pageSize);
}
else {
stats.addIndexPageSize(0);
}
if (pageResult.getNextToken() != null) {
// can get more data
sendDataRequest(resultContext, pageResult.getNextToken());
updateSignalAndStatusFutures();
return page;
}
// are there more splits available
if (splitIndex < splits.size()) {
// can send data request for a new split
startDataFetchForNextSplit();
updateSignalAndStatusFutures();
}
else if (!dataRequests.isEmpty()) {
// no more new splits, but some requests are still in progress, wait for them
updateSignalAndStatusFutures();
}
else {
// all done: no more new splits, no requests in progress
dataSignalFuture = null;
statusFuture = null;
finished = true;
}
return page;
}
private boolean loadAllSplits()
{
if (haveSplits) {
return true;
}
// check if request for splits was sent
if (splitFuture == null) {
// didn't start fetching splits, send the first request now
splitFuture = sendSplitRequest(null);
statusFuture = toCompletableFuture(nonCancellationPropagating(splitFuture));
}
if (!splitFuture.isDone()) {
// split request is in progress
return false;
}
// split request is ready
TrinoThriftSplitBatch batch = getFutureValue(splitFuture);
splits.addAll(batch.getSplits());
// check if it's possible to request more splits
if (batch.getNextToken() != null) {
// can get more splits, send request
splitFuture = sendSplitRequest(batch.getNextToken());
statusFuture = toCompletableFuture(nonCancellationPropagating(splitFuture));
return false;
}
// no more splits
splitFuture = null;
statusFuture = null;
haveSplits = true;
return true;
}
private void updateSignalAndStatusFutures()
{
dataSignalFuture = whenAnyComplete(dataRequests);
statusFuture = toCompletableFuture(nonCancellationPropagating(dataSignalFuture));
}
private void startDataFetchForNextSplit()
{
TrinoThriftSplit split = splits.get(splitIndex);
splitIndex++;
RunningSplitContext context = new RunningSplitContext(openClient(split), split);
sendDataRequest(context, null);
}
private ListenableFuture sendSplitRequest(@Nullable TrinoThriftId nextToken)
{
long start = System.nanoTime();
ListenableFuture future = client.get(thriftHeaders).getIndexSplits(
schemaTableName,
lookupColumnNames,
outputColumnNames,
keys,
outputConstraint,
MAX_SPLIT_COUNT,
new TrinoThriftNullableToken(nextToken));
future = catchingThriftException(future);
future.addListener(() -> readTimeNanos.addAndGet(System.nanoTime() - start), directExecutor());
return future;
}
private void sendDataRequest(RunningSplitContext context, @Nullable TrinoThriftId nextToken)
{
long start = System.nanoTime();
ListenableFuture future = context.getClient().getRows(
context.getSplit().getSplitId(),
outputColumnNames,
maxBytesPerResponse,
new TrinoThriftNullableToken(nextToken));
future = catchingThriftException(future);
future.addListener(() -> readTimeNanos.addAndGet(System.nanoTime() - start), directExecutor());
dataRequests.add(future);
contexts.put(future, context);
}
private TrinoThriftService openClient(TrinoThriftSplit split)
{
if (split.getHosts().isEmpty()) {
return client.get(thriftHeaders);
}
String hosts = split.getHosts().stream()
.map(host -> host.toHostAddress().toString())
.collect(joining(","));
return client.get(Optional.of(hosts), thriftHeaders);
}
@Override
public void close()
{
// cancel futures if available
cancelQuietly(splitFuture);
dataRequests.forEach(ThriftIndexPageSource::cancelQuietly);
}
private ListenableFuture getAndRemoveNextCompletedRequest()
{
Iterator> iterator = dataRequests.iterator();
while (iterator.hasNext()) {
ListenableFuture future = iterator.next();
if (future.isDone()) {
iterator.remove();
return future;
}
}
throw new IllegalStateException("No completed splits in the queue");
}
private static void cancelQuietly(Future> future)
{
if (future != null) {
future.cancel(true);
}
}
private static final class RunningSplitContext
{
private final TrinoThriftService client;
private final TrinoThriftSplit split;
public RunningSplitContext(TrinoThriftService client, TrinoThriftSplit split)
{
this.client = client;
this.split = split;
}
public TrinoThriftService getClient()
{
return client;
}
public TrinoThriftSplit getSplit()
{
return split;
}
}
}