All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.operator.MarkDistinctOperator Maven / Gradle / Ivy

There is a newer version: 468
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.operator;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.google.common.primitives.Ints;
import io.trino.memory.context.LocalMemoryContext;
import io.trino.spi.Page;
import io.trino.spi.block.Block;
import io.trino.spi.type.Type;
import io.trino.sql.planner.plan.PlanNodeId;

import java.util.Collection;
import java.util.List;
import java.util.Optional;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkState;
import static io.trino.spi.type.BooleanType.BOOLEAN;
import static java.util.Objects.requireNonNull;

public class MarkDistinctOperator
        implements Operator
{
    public static class MarkDistinctOperatorFactory
            implements OperatorFactory
    {
        private final int operatorId;
        private final PlanNodeId planNodeId;
        private final Optional hashChannel;
        private final List markDistinctChannels;
        private final List types;
        private final FlatHashStrategyCompiler hashStrategyCompiler;
        private boolean closed;

        public MarkDistinctOperatorFactory(
                int operatorId,
                PlanNodeId planNodeId,
                List sourceTypes,
                Collection markDistinctChannels,
                Optional hashChannel,
                FlatHashStrategyCompiler hashStrategyCompiler)
        {
            this.operatorId = operatorId;
            this.planNodeId = requireNonNull(planNodeId, "planNodeId is null");
            this.markDistinctChannels = ImmutableList.copyOf(requireNonNull(markDistinctChannels, "markDistinctChannels is null"));
            checkArgument(!markDistinctChannels.isEmpty(), "markDistinctChannels is empty");
            this.hashChannel = requireNonNull(hashChannel, "hashChannel is null");
            this.hashStrategyCompiler = requireNonNull(hashStrategyCompiler, "hashStrategyCompiler is null");
            this.types = ImmutableList.builder()
                    .addAll(sourceTypes)
                    .add(BOOLEAN)
                    .build();
        }

        @Override
        public Operator createOperator(DriverContext driverContext)
        {
            checkState(!closed, "Factory is already closed");
            OperatorContext operatorContext = driverContext.addOperatorContext(operatorId, planNodeId, MarkDistinctOperator.class.getSimpleName());
            return new MarkDistinctOperator(operatorContext, types, markDistinctChannels, hashChannel, hashStrategyCompiler);
        }

        @Override
        public void noMoreOperators()
        {
            closed = true;
        }

        @Override
        public OperatorFactory duplicate()
        {
            return new MarkDistinctOperatorFactory(operatorId, planNodeId, types.subList(0, types.size() - 1), markDistinctChannels, hashChannel, hashStrategyCompiler);
        }
    }

    private final OperatorContext operatorContext;
    private final MarkDistinctHash markDistinctHash;
    private final LocalMemoryContext localUserMemoryContext;
    private final int[] markDistinctChannels;

    private Page inputPage;
    private boolean finishing;

    // for yield when memory is not available
    private Work unfinishedWork;

    public MarkDistinctOperator(OperatorContext operatorContext, List types, List markDistinctChannels, Optional hashChannel, FlatHashStrategyCompiler hashStrategyCompiler)
    {
        this.operatorContext = requireNonNull(operatorContext, "operatorContext is null");

        requireNonNull(hashChannel, "hashChannel is null");
        requireNonNull(markDistinctChannels, "markDistinctChannels is null");

        ImmutableList.Builder distinctTypes = ImmutableList.builder();
        for (int channel : markDistinctChannels) {
            distinctTypes.add(types.get(channel));
        }
        if (hashChannel.isPresent()) {
            this.markDistinctChannels = new int[markDistinctChannels.size() + 1];
            for (int i = 0; i < markDistinctChannels.size(); i++) {
                this.markDistinctChannels[i] = markDistinctChannels.get(i);
            }
            this.markDistinctChannels[markDistinctChannels.size()] = hashChannel.get();
        }
        else {
            this.markDistinctChannels = Ints.toArray(markDistinctChannels);
        }

        this.markDistinctHash = new MarkDistinctHash(operatorContext.getSession(), distinctTypes.build(), hashChannel.isPresent(), hashStrategyCompiler, this::updateMemoryReservation);
        this.localUserMemoryContext = operatorContext.localUserMemoryContext();
    }

    @Override
    public OperatorContext getOperatorContext()
    {
        return operatorContext;
    }

    @Override
    public void finish()
    {
        finishing = true;
    }

    @Override
    public boolean isFinished()
    {
        return finishing && !hasUnfinishedInput();
    }

    @Override
    public boolean needsInput()
    {
        return !finishing && !hasUnfinishedInput();
    }

    @Override
    public void addInput(Page page)
    {
        requireNonNull(page, "page is null");
        checkState(needsInput());

        inputPage = page;

        unfinishedWork = markDistinctHash.markDistinctRows(page.getColumns(markDistinctChannels));
        updateMemoryReservation();
    }

    @Override
    public Page getOutput()
    {
        if (unfinishedWork == null) {
            return null;
        }

        if (!unfinishedWork.process()) {
            return null;
        }

        // add the new boolean column to the page
        Page outputPage = inputPage.appendColumn(unfinishedWork.getResult());

        unfinishedWork = null;
        inputPage = null;

        updateMemoryReservation();
        return outputPage;
    }

    private boolean hasUnfinishedInput()
    {
        return inputPage != null || unfinishedWork != null;
    }

    /**
     * Update memory usage.
     *
     * @return true to if the reservation is within the limit
     */
    // TODO: update in the interface now that the new memory tracking framework is landed
    // Essentially we would love to have clean interfaces to support both pushing and pulling memory usage
    // The following implementation is a hybrid model, where the push model is going to call the pull model causing reentrancy
    private boolean updateMemoryReservation()
    {
        // Operator/driver will be blocked on memory after we call localUserMemoryContext.setBytes().
        // If memory is not available, once we return, this operator will be blocked until memory is available.
        localUserMemoryContext.setBytes(markDistinctHash.getEstimatedSize());
        // If memory is not available, inform the caller that we cannot proceed for allocation.
        return operatorContext.isWaitingForMemory().isDone();
    }

    @VisibleForTesting
    public int getCapacity()
    {
        return markDistinctHash.getCapacity();
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy