Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.operator.index;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.util.concurrent.ListenableFuture;
import com.google.errorprone.annotations.ThreadSafe;
import com.google.errorprone.annotations.concurrent.GuardedBy;
import io.airlift.units.DataSize;
import io.trino.annotation.NotThreadSafe;
import io.trino.execution.ScheduledSplit;
import io.trino.execution.SplitAssignment;
import io.trino.metadata.Split;
import io.trino.operator.Driver;
import io.trino.operator.DriverFactory;
import io.trino.operator.PagesIndex;
import io.trino.operator.PipelineContext;
import io.trino.operator.TaskContext;
import io.trino.operator.join.LookupSource;
import io.trino.spi.Page;
import io.trino.spi.PageBuilder;
import io.trino.spi.connector.CatalogHandle;
import io.trino.spi.connector.CatalogHandle.CatalogVersion;
import io.trino.spi.type.Type;
import io.trino.sql.gen.JoinCompiler;
import io.trino.sql.planner.plan.PlanNodeId;
import io.trino.type.BlockTypeOperators;
import io.trino.type.BlockTypeOperators.BlockPositionEqual;
import java.util.ArrayList;
import java.util.List;
import java.util.OptionalInt;
import java.util.Set;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Predicate;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.trino.spi.connector.CatalogHandle.createRootCatalogHandle;
import static java.util.Objects.requireNonNull;
@ThreadSafe
public class IndexLoader
{
private static final CatalogHandle INDEX_CATALOG_HANDLE = createRootCatalogHandle("$index", new CatalogVersion("index"));
private final BlockingQueue updateRequests = new LinkedBlockingQueue<>();
private final List outputTypes;
private final IndexBuildDriverFactoryProvider indexBuildDriverFactoryProvider;
private final int expectedPositions;
private final DataSize maxIndexMemorySize;
private final IndexJoinLookupStats stats;
private final AtomicReference taskContextReference = new AtomicReference<>();
private final Set lookupSourceInputChannels;
private final List keyOutputChannels;
private final OptionalInt keyOutputHashChannel;
private final List keyTypes;
private final List keyEqualOperators;
private final PagesIndex.Factory pagesIndexFactory;
private final JoinCompiler joinCompiler;
@GuardedBy("this")
private IndexSnapshotLoader indexSnapshotLoader; // Lazily initialized
@GuardedBy("this")
private PipelineContext pipelineContext; // Lazily initialized
@GuardedBy("this")
private final AtomicReference indexSnapshotReference;
public IndexLoader(
Set lookupSourceInputChannels,
List keyOutputChannels,
OptionalInt keyOutputHashChannel,
List outputTypes,
IndexBuildDriverFactoryProvider indexBuildDriverFactoryProvider,
int expectedPositions,
DataSize maxIndexMemorySize,
IndexJoinLookupStats stats,
PagesIndex.Factory pagesIndexFactory,
JoinCompiler joinCompiler,
BlockTypeOperators blockTypeOperators)
{
requireNonNull(lookupSourceInputChannels, "lookupSourceInputChannels is null");
checkArgument(!lookupSourceInputChannels.isEmpty(), "lookupSourceInputChannels must not be empty");
requireNonNull(keyOutputChannels, "keyOutputChannels is null");
checkArgument(!keyOutputChannels.isEmpty(), "keyOutputChannels must not be empty");
requireNonNull(keyOutputHashChannel, "keyOutputHashChannel is null");
checkArgument(lookupSourceInputChannels.size() <= keyOutputChannels.size(), "Lookup channels must supply a subset of the actual index columns");
requireNonNull(outputTypes, "outputTypes is null");
requireNonNull(indexBuildDriverFactoryProvider, "indexBuildDriverFactoryProvider is null");
requireNonNull(maxIndexMemorySize, "maxIndexMemorySize is null");
requireNonNull(stats, "stats is null");
requireNonNull(pagesIndexFactory, "pagesIndexFactory is null");
requireNonNull(joinCompiler, "joinCompiler is null");
this.lookupSourceInputChannels = ImmutableSet.copyOf(lookupSourceInputChannels);
this.keyOutputChannels = ImmutableList.copyOf(keyOutputChannels);
this.keyOutputHashChannel = keyOutputHashChannel;
this.outputTypes = ImmutableList.copyOf(outputTypes);
this.indexBuildDriverFactoryProvider = indexBuildDriverFactoryProvider;
this.expectedPositions = expectedPositions;
this.maxIndexMemorySize = maxIndexMemorySize;
this.stats = stats;
this.pagesIndexFactory = pagesIndexFactory;
this.joinCompiler = joinCompiler;
this.keyTypes = keyOutputChannels.stream()
.map(outputTypes::get)
.collect(toImmutableList());
this.keyEqualOperators = keyTypes.stream()
.map(blockTypeOperators::getEqualOperator)
.collect(toImmutableList());
// start with an empty source
this.indexSnapshotReference = new AtomicReference<>(new IndexSnapshot(new EmptyLookupSource(), new EmptyLookupSource()));
}
// This is a ghetto way to acquire a TaskContext at runtime (unavailable at planning)
public void setContext(TaskContext taskContext)
{
taskContextReference.compareAndSet(null, taskContext);
}
public List getOutputTypes()
{
return outputTypes;
}
public IndexSnapshot getIndexSnapshot()
{
return indexSnapshotReference.get();
}
public IndexedData getIndexedDataForKeys(int position, Page indexPage)
{
// Normalize the indexBlocks so that they only encompass the unloaded positions
int totalPositions = indexPage.getPositionCount();
int remainingPositions = totalPositions - position;
return getIndexedDataForKeys(indexPage.getRegion(position, remainingPositions));
}
private IndexedData getIndexedDataForKeys(Page indexPage)
{
UpdateRequest myUpdateRequest = new UpdateRequest(indexPage);
updateRequests.add(myUpdateRequest);
synchronized (this) {
if (!myUpdateRequest.isFinished()) {
stats.recordIndexJoinLookup();
initializeStateIfNecessary();
List requests = new ArrayList<>();
updateRequests.drainTo(requests);
try {
long initialCacheSizeInBytes = indexSnapshotLoader.getCacheSizeInBytes();
// TODO: add heuristic to jump to load strategy that is most likely to succeed
// Try to load all the requests
if (indexSnapshotLoader.load(requests)) {
return myUpdateRequest.getFinishedIndexSnapshot();
}
// Retry again if there was initial data (load failures will clear the cache automatically)
if (initialCacheSizeInBytes > 0 && indexSnapshotLoader.load(requests)) {
stats.recordSuccessfulIndexJoinLookupByCacheReset();
return myUpdateRequest.getFinishedIndexSnapshot();
}
}
catch (Throwable t) {
// Mark requests as failed since they will not be requeued
for (UpdateRequest request : requests) {
request.failed(t);
}
throw t;
}
// Try loading just my request
if (requests.size() > 1) {
// Add all other requests back into the queue
requests.stream()
.filter(Predicate.isEqual(myUpdateRequest).negate())
.forEach(updateRequests::add);
if (indexSnapshotLoader.load(ImmutableList.of(myUpdateRequest))) {
stats.recordSuccessfulIndexJoinLookupBySingleRequest();
return myUpdateRequest.getFinishedIndexSnapshot();
}
}
// Repeatedly decrease the number of rows to load by a factor of 10
int totalPositions = indexPage.getPositionCount();
int attemptedPositions = totalPositions / 10;
while (attemptedPositions > 1) {
myUpdateRequest = new UpdateRequest(indexPage.getRegion(0, attemptedPositions));
if (indexSnapshotLoader.load(ImmutableList.of(myUpdateRequest))) {
stats.recordSuccessfulIndexJoinLookupByLimitedRequest();
return myUpdateRequest.getFinishedIndexSnapshot();
}
attemptedPositions /= 10;
}
// Just load the single index key in a streaming fashion (no caching)
stats.recordStreamedIndexJoinLookup();
return streamIndexDataForSingleKey(myUpdateRequest);
}
}
// return the snapshot from the update request as another thread may have already flushed the request
return myUpdateRequest.getFinishedIndexSnapshot();
}
public IndexedData streamIndexDataForSingleKey(UpdateRequest updateRequest)
{
Page indexKeyTuple = updateRequest.getPage().getRegion(0, 1);
PageBuffer pageBuffer = new PageBuffer(100);
DriverFactory driverFactory = indexBuildDriverFactoryProvider.createStreaming(pageBuffer, indexKeyTuple);
Driver driver = driverFactory.createDriver(pipelineContext.addDriverContext());
PageRecordSet pageRecordSet = new PageRecordSet(keyTypes, indexKeyTuple);
PlanNodeId planNodeId = driverFactory.getSourceId().get();
ScheduledSplit split = new ScheduledSplit(0, planNodeId, new Split(INDEX_CATALOG_HANDLE, new IndexSplit(pageRecordSet)));
driver.updateSplitAssignment(new SplitAssignment(planNodeId, ImmutableSet.of(split), true));
return new StreamingIndexedData(outputTypes, keyEqualOperators, indexKeyTuple, pageBuffer, driver);
}
private synchronized void initializeStateIfNecessary()
{
if (pipelineContext == null) {
TaskContext taskContext = taskContextReference.get();
checkState(taskContext != null, "Task context must be set before index can be built");
pipelineContext = taskContext.addPipelineContext(indexBuildDriverFactoryProvider.getPipelineId(), true, true, false);
}
if (indexSnapshotLoader == null) {
indexSnapshotLoader = new IndexSnapshotLoader(
indexBuildDriverFactoryProvider,
pipelineContext,
indexSnapshotReference,
lookupSourceInputChannels,
keyTypes,
keyOutputChannels,
keyOutputHashChannel,
expectedPositions,
maxIndexMemorySize,
pagesIndexFactory,
joinCompiler);
}
}
@NotThreadSafe
private static class IndexSnapshotLoader
{
private final DriverFactory driverFactory;
private final PipelineContext pipelineContext;
private final Set lookupSourceInputChannels;
private final Set allInputChannels;
private final List indexTypes;
private final AtomicReference indexSnapshotReference;
private final JoinCompiler joinCompiler;
private final IndexSnapshotBuilder indexSnapshotBuilder;
private IndexSnapshotLoader(
IndexBuildDriverFactoryProvider indexBuildDriverFactoryProvider,
PipelineContext pipelineContext,
AtomicReference indexSnapshotReference,
Set lookupSourceInputChannels,
List indexTypes,
List keyOutputChannels,
OptionalInt keyOutputHashChannel,
int expectedPositions,
DataSize maxIndexMemorySize,
PagesIndex.Factory pagesIndexFactory,
JoinCompiler joinCompiler)
{
this.pipelineContext = pipelineContext;
this.indexSnapshotReference = indexSnapshotReference;
this.lookupSourceInputChannels = lookupSourceInputChannels;
this.indexTypes = indexTypes;
this.joinCompiler = joinCompiler;
List outputTypes = indexBuildDriverFactoryProvider.getOutputTypes();
this.indexSnapshotBuilder = new IndexSnapshotBuilder(
outputTypes,
keyOutputChannels,
keyOutputHashChannel,
pipelineContext.addDriverContext(),
maxIndexMemorySize,
expectedPositions,
pagesIndexFactory);
this.driverFactory = indexBuildDriverFactoryProvider.createSnapshot(pipelineContext.getPipelineId(), this.indexSnapshotBuilder);
ImmutableSet.Builder builder = ImmutableSet.builder();
for (int i = 0; i < indexTypes.size(); i++) {
builder.add(i);
}
this.allInputChannels = builder.build();
}
public long getCacheSizeInBytes()
{
return indexSnapshotBuilder.getMemoryInBytes();
}
public boolean load(List requests)
{
// Generate a RecordSet that only presents index keys that have not been cached and are deduped based on lookupSourceInputChannels
UnloadedIndexKeyRecordSet recordSetForLookupSource = new UnloadedIndexKeyRecordSet(pipelineContext.getSession(), indexSnapshotReference.get(), lookupSourceInputChannels, indexTypes, requests, joinCompiler);
// Drive index lookup to produce the output (landing in indexSnapshotBuilder)
try (Driver driver = driverFactory.createDriver(pipelineContext.addDriverContext())) {
PlanNodeId sourcePlanNodeId = driverFactory.getSourceId().get();
ScheduledSplit split = new ScheduledSplit(0, sourcePlanNodeId, new Split(INDEX_CATALOG_HANDLE, new IndexSplit(recordSetForLookupSource)));
driver.updateSplitAssignment(new SplitAssignment(sourcePlanNodeId, ImmutableSet.of(split), true));
while (!driver.isFinished()) {
ListenableFuture process = driver.processUntilBlocked();
checkState(process.isDone(), "Driver should never block");
}
}
if (indexSnapshotBuilder.isMemoryExceeded()) {
clearCachedData();
return false;
}
// Generate a RecordSet that presents unique index keys that have not been cached
UnloadedIndexKeyRecordSet indexKeysRecordSet = (lookupSourceInputChannels.equals(allInputChannels))
? recordSetForLookupSource
: new UnloadedIndexKeyRecordSet(pipelineContext.getSession(), indexSnapshotReference.get(), allInputChannels, indexTypes, requests, joinCompiler);
// Create lookup source with new data
IndexSnapshot newValue = indexSnapshotBuilder.createIndexSnapshot(indexKeysRecordSet);
if (newValue == null) {
clearCachedData();
return false;
}
indexSnapshotReference.set(newValue);
for (UpdateRequest request : requests) {
request.finished(newValue);
}
return true;
}
private void clearCachedData()
{
indexSnapshotReference.set(new IndexSnapshot(new EmptyLookupSource(), new EmptyLookupSource()));
indexSnapshotBuilder.reset();
}
}
private static class EmptyLookupSource
implements LookupSource
{
@Override
public boolean isEmpty()
{
return true;
}
@Override
public long getJoinPositionCount()
{
return 0;
}
@Override
public long getInMemorySizeInBytes()
{
return 0;
}
@Override
public long joinPositionWithinPartition(long joinPosition)
{
throw new UnsupportedOperationException();
}
@Override
public long getJoinPosition(int position, Page page, Page allChannelsPage, long rawHash)
{
return IndexSnapshot.UNLOADED_INDEX_KEY;
}
@Override
public long getJoinPosition(int position, Page hashChannelsPage, Page allChannelsPage)
{
return IndexSnapshot.UNLOADED_INDEX_KEY;
}
@Override
public long getNextJoinPosition(long currentJoinPosition, int probePosition, Page allProbeChannelsPage)
{
return IndexSnapshot.UNLOADED_INDEX_KEY;
}
@Override
public boolean isJoinPositionEligible(long currentJoinPosition, int probePosition, Page allProbeChannelsPage)
{
return true;
}
@Override
public void appendTo(long position, PageBuilder pageBuilder, int outputChannelOffset)
{
throw new UnsupportedOperationException();
}
@Override
public void close()
{
}
}
}