
io.trino.execution.scheduler.faulttolerant.ArbitraryDistributionSplitAssigner Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.execution.scheduler.faulttolerant;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.ListMultimap;
import io.trino.exchange.SpoolingExchangeInput;
import io.trino.metadata.Split;
import io.trino.spi.HostAddress;
import io.trino.spi.SplitWeight;
import io.trino.spi.connector.CatalogHandle;
import io.trino.spi.exchange.ExchangeSourceHandle;
import io.trino.split.RemoteSplit;
import io.trino.sql.planner.plan.PlanNodeId;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import static com.google.common.base.MoreObjects.toStringHelper;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.base.Verify.verify;
import static io.trino.operator.ExchangeOperator.REMOTE_CATALOG_HANDLE;
import static java.lang.Math.ceil;
import static java.lang.Math.min;
import static java.lang.Math.round;
import static java.util.Objects.requireNonNull;
class ArbitraryDistributionSplitAssigner
implements SplitAssigner
{
private final Optional catalogRequirement;
private final Set partitionedSources;
private final Set replicatedSources;
private final Set allSources;
private final int adaptiveGrowthPeriod;
private final double adaptiveGrowthFactor;
private final long minTargetPartitionSizeInBytes;
private final long maxTargetPartitionSizeInBytes;
private final long standardSplitSizeInBytes;
private final int maxTaskSplitCount;
private int nextPartitionId;
private int adaptiveCounter;
private long targetPartitionSizeInBytes;
private long roundedTargetPartitionSizeInBytes;
private final List allAssignments = new ArrayList<>();
private final Map, PartitionAssignment> openAssignments = new HashMap<>();
private final Set completedSources = new HashSet<>();
private final ListMultimap replicatedSplits = ArrayListMultimap.create();
private boolean noMoreReplicatedSplits;
ArbitraryDistributionSplitAssigner(
Optional catalogRequirement,
Set partitionedSources,
Set replicatedSources,
int adaptiveGrowthPeriod,
double adaptiveGrowthFactor,
long minTargetPartitionSizeInBytes,
long maxTargetPartitionSizeInBytes,
long standardSplitSizeInBytes,
int maxTaskSplitCount)
{
this.catalogRequirement = requireNonNull(catalogRequirement, "catalogRequirement is null");
this.partitionedSources = ImmutableSet.copyOf(requireNonNull(partitionedSources, "partitionedSources is null"));
this.replicatedSources = ImmutableSet.copyOf(requireNonNull(replicatedSources, "replicatedSources is null"));
allSources = ImmutableSet.builder()
.addAll(partitionedSources)
.addAll(replicatedSources)
.build();
this.adaptiveGrowthPeriod = adaptiveGrowthPeriod;
this.adaptiveGrowthFactor = adaptiveGrowthFactor;
this.minTargetPartitionSizeInBytes = minTargetPartitionSizeInBytes;
this.maxTargetPartitionSizeInBytes = maxTargetPartitionSizeInBytes;
this.standardSplitSizeInBytes = standardSplitSizeInBytes;
this.maxTaskSplitCount = maxTaskSplitCount;
this.targetPartitionSizeInBytes = minTargetPartitionSizeInBytes;
this.roundedTargetPartitionSizeInBytes = minTargetPartitionSizeInBytes;
}
@Override
public AssignmentResult assign(PlanNodeId planNodeId, ListMultimap splits, boolean noMoreSplits)
{
for (Split split : splits.values()) {
Optional splitCatalogRequirement = Optional.of(split.getCatalogHandle())
.filter(catalog -> !catalog.getType().isInternal() && !catalog.equals(REMOTE_CATALOG_HANDLE));
checkArgument(
catalogRequirement.isEmpty() || catalogRequirement.equals(splitCatalogRequirement),
"unexpected split catalog requirement: %s",
splitCatalogRequirement);
}
if (replicatedSources.contains(planNodeId)) {
return assignReplicatedSplits(planNodeId, ImmutableList.copyOf(splits.values()), noMoreSplits);
}
return assignPartitionedSplits(planNodeId, ImmutableList.copyOf(splits.values()), noMoreSplits);
}
@Override
public AssignmentResult finish()
{
checkState(!allAssignments.isEmpty(), "allAssignments is not expected to be empty");
return AssignmentResult.builder().build();
}
private AssignmentResult assignReplicatedSplits(PlanNodeId planNodeId, List splits, boolean noMoreSplits)
{
AssignmentResult.Builder assignment = AssignmentResult.builder();
replicatedSplits.putAll(planNodeId, splits);
for (PartitionAssignment partitionAssignment : allAssignments) {
assignment.updatePartition(new PartitionUpdate(
partitionAssignment.getPartitionId(),
planNodeId,
false,
singleSourcePartition(SINGLE_SOURCE_PARTITION_ID, splits),
noMoreSplits));
}
if (noMoreSplits) {
completedSources.add(planNodeId);
if (completedSources.containsAll(replicatedSources)) {
noMoreReplicatedSplits = true;
}
}
if (noMoreReplicatedSplits) {
for (PartitionAssignment partitionAssignment : allAssignments) {
if (partitionAssignment.isFull()) {
assignment.sealPartition(partitionAssignment.getPartitionId());
}
}
}
if (completedSources.containsAll(allSources)) {
if (allAssignments.isEmpty()) {
// at least a single partition is expected to be created
allAssignments.add(new PartitionAssignment(0));
assignment.addPartition(new Partition(0, new NodeRequirements(catalogRequirement, ImmutableSet.of())));
for (PlanNodeId replicatedSourceId : replicatedSources) {
assignment.updatePartition(new PartitionUpdate(
0,
replicatedSourceId,
false,
singleSourcePartition(SINGLE_SOURCE_PARTITION_ID, replicatedSplits.get(replicatedSourceId)),
true));
}
for (PlanNodeId partitionedSourceId : partitionedSources) {
assignment.updatePartition(new PartitionUpdate(
0,
partitionedSourceId,
false,
ImmutableListMultimap.of(),
true));
}
assignment.sealPartition(0);
}
else {
for (PartitionAssignment partitionAssignment : allAssignments) {
// set noMoreSplits for partitioned sources
if (!partitionAssignment.isFull()) {
for (PlanNodeId partitionedSourceNodeId : partitionedSources) {
assignment.updatePartition(new PartitionUpdate(
partitionAssignment.getPartitionId(),
partitionedSourceNodeId,
false,
singleSourcePartition(0, ImmutableList.of()),
true));
}
// seal partition
assignment.sealPartition(partitionAssignment.getPartitionId());
}
}
}
replicatedSplits.clear();
// no more partitions will be created
assignment.setNoMorePartitions();
}
return assignment.build();
}
private ListMultimap singleSourcePartition(int sourcePartitionId, List splits)
{
ImmutableListMultimap.Builder builder = ImmutableListMultimap.builder();
builder.putAll(0, splits);
return builder.build();
}
private AssignmentResult assignPartitionedSplits(PlanNodeId planNodeId, List splits, boolean noMoreSplits)
{
AssignmentResult.Builder assignment = AssignmentResult.builder();
for (Split split : splits) {
Optional hostRequirement = getHostRequirement(split);
PartitionAssignment partitionAssignment = openAssignments.get(hostRequirement);
long splitSizeInBytes = getSplitSizeInBytes(split);
if (partitionAssignment != null && ((partitionAssignment.getAssignedDataSizeInBytes() + splitSizeInBytes > roundedTargetPartitionSizeInBytes)
|| (partitionAssignment.getAssignedSplitCount() + 1 > maxTaskSplitCount))) {
partitionAssignment.setFull(true);
for (PlanNodeId partitionedSourceNodeId : partitionedSources) {
assignment.updatePartition(new PartitionUpdate(
partitionAssignment.getPartitionId(),
partitionedSourceNodeId,
false,
ImmutableListMultimap.of(),
true));
}
if (completedSources.containsAll(replicatedSources)) {
assignment.sealPartition(partitionAssignment.getPartitionId());
}
partitionAssignment = null;
openAssignments.remove(hostRequirement);
adaptiveCounter++;
if (adaptiveCounter >= adaptiveGrowthPeriod) {
targetPartitionSizeInBytes = (long) min(maxTargetPartitionSizeInBytes, ceil(targetPartitionSizeInBytes * adaptiveGrowthFactor));
// round to a multiple of minTargetPartitionSizeInBytes so work will be evenly distributed among drivers of a task
roundedTargetPartitionSizeInBytes = round(targetPartitionSizeInBytes * 1.0 / minTargetPartitionSizeInBytes) * minTargetPartitionSizeInBytes;
verify(roundedTargetPartitionSizeInBytes > 0, "roundedTargetPartitionSizeInBytes %s not positive", roundedTargetPartitionSizeInBytes);
adaptiveCounter = 0;
}
}
if (partitionAssignment == null) {
partitionAssignment = new PartitionAssignment(nextPartitionId++);
allAssignments.add(partitionAssignment);
openAssignments.put(hostRequirement, partitionAssignment);
assignment.addPartition(new Partition(
partitionAssignment.getPartitionId(),
new NodeRequirements(catalogRequirement, hostRequirement.map(ImmutableSet::of).orElseGet(ImmutableSet::of))));
for (PlanNodeId replicatedSourceId : replicatedSources) {
assignment.updatePartition(new PartitionUpdate(
partitionAssignment.getPartitionId(),
replicatedSourceId,
false,
singleSourcePartition(SINGLE_SOURCE_PARTITION_ID, replicatedSplits.get(replicatedSourceId)),
completedSources.contains(replicatedSourceId)));
}
}
assignment.updatePartition(new PartitionUpdate(
partitionAssignment.getPartitionId(),
planNodeId,
true,
singleSourcePartition(SINGLE_SOURCE_PARTITION_ID, ImmutableList.of(split)),
false));
partitionAssignment.assignSplit(splitSizeInBytes);
}
if (noMoreSplits) {
completedSources.add(planNodeId);
}
if (completedSources.containsAll(allSources)) {
if (allAssignments.isEmpty()) {
// at least a single partition is expected to be created
allAssignments.add(new PartitionAssignment(0));
assignment.addPartition(new Partition(0, new NodeRequirements(catalogRequirement, ImmutableSet.of())));
for (PlanNodeId replicatedSourceId : replicatedSources) {
assignment.updatePartition(new PartitionUpdate(
0,
replicatedSourceId,
false,
singleSourcePartition(SINGLE_SOURCE_PARTITION_ID, replicatedSplits.get(replicatedSourceId)),
true));
}
for (PlanNodeId partitionedSourceId : partitionedSources) {
assignment.updatePartition(new PartitionUpdate(
0,
partitionedSourceId,
false,
ImmutableListMultimap.of(),
true));
}
assignment.sealPartition(0);
}
else {
for (PartitionAssignment partitionAssignment : openAssignments.values()) {
// set noMoreSplits for partitioned sources
for (PlanNodeId partitionedSourceNodeId : partitionedSources) {
assignment.updatePartition(new PartitionUpdate(
partitionAssignment.getPartitionId(),
partitionedSourceNodeId,
false,
ImmutableListMultimap.of(),
true));
}
// seal partition
assignment.sealPartition(partitionAssignment.getPartitionId());
}
openAssignments.clear();
}
replicatedSplits.clear();
// no more partitions will be created
assignment.setNoMorePartitions();
}
return assignment.build();
}
@Override
public String toString()
{
return toStringHelper(this)
.add("catalogRequirement", catalogRequirement)
.add("partitionedSources", partitionedSources)
.add("replicatedSources", replicatedSources)
.add("allSources", allSources)
.add("adaptiveGrowthPeriod", adaptiveGrowthPeriod)
.add("adaptiveGrowthFactor", adaptiveGrowthFactor)
.add("minTargetPartitionSizeInBytes", minTargetPartitionSizeInBytes)
.add("maxTargetPartitionSizeInBytes", maxTargetPartitionSizeInBytes)
.add("standardSplitSizeInBytes", standardSplitSizeInBytes)
.add("maxTaskSplitCount", maxTaskSplitCount)
.add("nextPartitionId", nextPartitionId)
.add("adaptiveCounter", adaptiveCounter)
.add("targetPartitionSizeInBytes", targetPartitionSizeInBytes)
.add("roundedTargetPartitionSizeInBytes", roundedTargetPartitionSizeInBytes)
.add("allAssignments", allAssignments)
.add("openAssignments", openAssignments)
.add("completedSources", completedSources)
.add("replicatedSplits.size()", replicatedSplits.size())
.add("noMoreReplicatedSplits", noMoreReplicatedSplits)
.toString();
}
private Optional getHostRequirement(Split split)
{
if (split.getConnectorSplit().isRemotelyAccessible()) {
return Optional.empty();
}
List addresses = split.getAddresses();
checkArgument(!addresses.isEmpty(), "split is not remotely accessible but the list of hosts is empty: %s", split);
HostAddress selectedAddress = null;
long selectedAssignmentDataSize = Long.MAX_VALUE;
for (HostAddress address : addresses) {
PartitionAssignment assignment = openAssignments.get(Optional.of(address));
if (assignment == null) {
// prioritize unused addresses
selectedAddress = address;
break;
}
if (assignment.getAssignedDataSizeInBytes() < selectedAssignmentDataSize) {
// otherwise prioritize the smallest assignment
selectedAddress = address;
selectedAssignmentDataSize = assignment.getAssignedDataSizeInBytes();
}
}
verify(selectedAddress != null, "selectedAddress is null");
return Optional.of(selectedAddress);
}
private long getSplitSizeInBytes(Split split)
{
if (split.getCatalogHandle().equals(REMOTE_CATALOG_HANDLE)) {
RemoteSplit remoteSplit = (RemoteSplit) split.getConnectorSplit();
SpoolingExchangeInput exchangeInput = (SpoolingExchangeInput) remoteSplit.getExchangeInput();
long size = 0;
for (ExchangeSourceHandle handle : exchangeInput.getExchangeSourceHandles()) {
size += handle.getDataSizeInBytes();
}
return size;
}
return round(((split.getSplitWeight().getRawValue() * 1.0) / SplitWeight.standard().getRawValue()) * standardSplitSizeInBytes);
}
private static class PartitionAssignment
{
private final int partitionId;
private long assignedDataSizeInBytes;
private int assignedSplitCount;
private boolean full;
private PartitionAssignment(int partitionId)
{
this.partitionId = partitionId;
}
public int getPartitionId()
{
return partitionId;
}
public void assignSplit(long sizeInBytes)
{
assignedDataSizeInBytes += sizeInBytes;
assignedSplitCount++;
}
public long getAssignedDataSizeInBytes()
{
return assignedDataSizeInBytes;
}
public int getAssignedSplitCount()
{
return assignedSplitCount;
}
public boolean isFull()
{
return full;
}
public void setFull(boolean full)
{
this.full = full;
}
@Override
public String toString()
{
return toStringHelper(this)
.add("partitionId", partitionId)
.add("assignedDataSizeInBytes", assignedDataSizeInBytes)
.add("assignedSplitCount", assignedSplitCount)
.add("full", full)
.toString();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy