io.trino.plugin.hudi.split.HudiBackgroundSplitLoader Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.hudi.split;
import com.google.common.util.concurrent.Futures;
import io.trino.plugin.hive.util.AsyncQueue;
import io.trino.plugin.hudi.HudiTableHandle;
import io.trino.plugin.hudi.partition.HudiPartitionInfoLoader;
import io.trino.plugin.hudi.query.HudiDirectoryLister;
import io.trino.spi.TrinoException;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.connector.ConnectorSplit;
import java.util.ArrayList;
import java.util.Deque;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Executor;
import java.util.concurrent.Future;
import static io.trino.plugin.hudi.HudiErrorCode.HUDI_CANNOT_OPEN_SPLIT;
import static io.trino.plugin.hudi.HudiSessionProperties.getSplitGeneratorParallelism;
import static java.util.Objects.requireNonNull;
public class HudiBackgroundSplitLoader
implements Runnable
{
private final HudiDirectoryLister hudiDirectoryLister;
private final AsyncQueue asyncQueue;
private final Executor splitGeneratorExecutor;
private final int splitGeneratorNumThreads;
private final HudiSplitFactory hudiSplitFactory;
private final List partitions;
public HudiBackgroundSplitLoader(
ConnectorSession session,
HudiTableHandle tableHandle,
HudiDirectoryLister hudiDirectoryLister,
AsyncQueue asyncQueue,
Executor splitGeneratorExecutor,
HudiSplitWeightProvider hudiSplitWeightProvider,
List partitions)
{
this.hudiDirectoryLister = requireNonNull(hudiDirectoryLister, "hudiDirectoryLister is null");
this.asyncQueue = requireNonNull(asyncQueue, "asyncQueue is null");
this.splitGeneratorExecutor = requireNonNull(splitGeneratorExecutor, "splitGeneratorExecutorService is null");
this.splitGeneratorNumThreads = getSplitGeneratorParallelism(session);
this.hudiSplitFactory = new HudiSplitFactory(tableHandle, hudiSplitWeightProvider);
this.partitions = requireNonNull(partitions, "partitions is null");
}
@Override
public void run()
{
Deque partitionQueue = new ConcurrentLinkedDeque<>(partitions);
List splitGeneratorList = new ArrayList<>();
List splitGeneratorFutures = new ArrayList<>();
// Start a number of partition split generators to generate the splits in parallel
for (int i = 0; i < splitGeneratorNumThreads; i++) {
HudiPartitionInfoLoader generator = new HudiPartitionInfoLoader(hudiDirectoryLister, hudiSplitFactory, asyncQueue, partitionQueue);
splitGeneratorList.add(generator);
splitGeneratorFutures.add(Futures.submit(generator, splitGeneratorExecutor));
}
for (HudiPartitionInfoLoader generator : splitGeneratorList) {
// Let the split generator stop once the partition queue is empty
generator.stopRunning();
}
// Wait for all split generators to finish
for (Future future : splitGeneratorFutures) {
try {
future.get();
}
catch (InterruptedException | ExecutionException e) {
throw new TrinoException(HUDI_CANNOT_OPEN_SPLIT, "Error generating Hudi split", e);
}
}
asyncQueue.finish();
}
}