All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.plugin.hive.HiveUpdatablePageSource Maven / Gradle / Ivy

There is a newer version: 468
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.plugin.hive;

import com.google.common.collect.ImmutableList;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import io.trino.plugin.hive.acid.AcidOperation;
import io.trino.plugin.hive.orc.OrcFileWriter;
import io.trino.plugin.hive.orc.OrcFileWriterFactory;
import io.trino.spi.Page;
import io.trino.spi.TrinoException;
import io.trino.spi.block.Block;
import io.trino.spi.block.LongArrayBlock;
import io.trino.spi.block.RowBlock;
import io.trino.spi.block.RunLengthEncodedBlock;
import io.trino.spi.connector.ConnectorPageSource;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.connector.UpdatablePageSource;
import io.trino.spi.type.TypeManager;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;

import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.concurrent.CompletableFuture;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Verify.verify;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_CLOSE_ERROR;
import static io.trino.plugin.hive.PartitionAndStatementId.CODEC;
import static io.trino.spi.predicate.Utils.nativeValueToBlock;
import static io.trino.spi.type.BigintType.BIGINT;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.CompletableFuture.completedFuture;

public class HiveUpdatablePageSource
        extends AbstractHiveAcidWriters
        implements UpdatablePageSource
{
    // The channel numbers of the child blocks in the RowBlock passed to deleteRows()
    public static final int ORIGINAL_TRANSACTION_CHANNEL = 0;
    public static final int ROW_ID_CHANNEL = 1;
    public static final int BUCKET_CHANNEL = 2;
    // Used for UPDATE operations
    public static final int ROW_CHANNEL = 3;
    public static final int ACID_ROW_STRUCT_COLUMN_ID = 6;

    private final String partitionName;
    private final ConnectorPageSource hivePageSource;
    private final AcidOperation updateKind;
    private final Block hiveRowTypeNullsBlock;
    private final long writeId;
    private final Optional> dependencyChannels;

    private long maxWriteId;
    private long rowCount;
    private long insertRowCounter;

    private boolean closed;

    public HiveUpdatablePageSource(
            HiveTableHandle hiveTableHandle,
            String partitionName,
            int statementId,
            ConnectorPageSource hivePageSource,
            TypeManager typeManager,
            OptionalInt bucketNumber,
            Path bucketPath,
            boolean originalFile,
            OrcFileWriterFactory orcFileWriterFactory,
            Configuration configuration,
            ConnectorSession session,
            HiveType hiveRowType,
            List dependencyColumns,
            AcidOperation updateKind)
    {
        super(hiveTableHandle.getTransaction(), statementId, bucketNumber, bucketPath, originalFile, orcFileWriterFactory, configuration, session, hiveRowType, updateKind);
        this.partitionName = requireNonNull(partitionName, "partitionName is null");
        this.hivePageSource = requireNonNull(hivePageSource, "hivePageSource is null");
        this.updateKind = requireNonNull(updateKind, "updateKind is null");
        this.hiveRowTypeNullsBlock = nativeValueToBlock(hiveRowType.getType(typeManager), null);
        checkArgument(hiveTableHandle.isInAcidTransaction(), "Not in a transaction; hiveTableHandle: %s", hiveTableHandle);
        this.writeId = hiveTableHandle.getWriteId();
        if (updateKind == AcidOperation.UPDATE) {
            this.dependencyChannels = Optional.of(hiveTableHandle.getUpdateProcessor()
                    .orElseThrow(() -> new IllegalArgumentException("updateProcessor not present"))
                    .makeDependencyChannelNumbers(dependencyColumns));
        }
        else {
            this.dependencyChannels = Optional.empty();
        }
    }

    @Override
    public void deleteRows(Block rowIds)
    {
        List blocks = rowIds.getChildren();
        checkArgument(blocks.size() == 3, "The rowId block for DELETE should have 3 children, but has %s", blocks.size());
        deleteRowsInternal(rowIds);
    }

    private void deleteRowsInternal(Block rowIds)
    {
        int positionCount = rowIds.getPositionCount();
        List blocks = rowIds.getChildren();
        Block[] blockArray = {
                new RunLengthEncodedBlock(DELETE_OPERATION_BLOCK, positionCount),
                blocks.get(ORIGINAL_TRANSACTION_CHANNEL),
                blocks.get(BUCKET_CHANNEL),
                blocks.get(ROW_ID_CHANNEL),
                RunLengthEncodedBlock.create(BIGINT, writeId, positionCount),
                new RunLengthEncodedBlock(hiveRowTypeNullsBlock, positionCount),
        };
        Page deletePage = new Page(blockArray);

        Block block = blocks.get(ORIGINAL_TRANSACTION_CHANNEL);
        for (int index = 0; index < positionCount; index++) {
            maxWriteId = Math.max(maxWriteId, block.getLong(index, 0));
        }

        lazyInitializeDeleteFileWriter();
        deleteFileWriter.orElseThrow(() -> new IllegalArgumentException("deleteFileWriter not present")).appendRows(deletePage);
        rowCount += positionCount;
    }

    @Override
    public void updateRows(Page page, List columnValueAndRowIdChannels)
    {
        int positionCount = page.getPositionCount();
        verify(positionCount > 0, "Unexpected empty page"); // should be filtered out by engine

        HiveUpdateProcessor updateProcessor = transaction.getUpdateProcessor().orElseThrow(() -> new IllegalArgumentException("updateProcessor not present"));
        RowBlock acidRowBlock = updateProcessor.getAcidRowBlock(page, columnValueAndRowIdChannels);

        List blocks = acidRowBlock.getChildren();
        checkArgument(blocks.size() == 3 || blocks.size() == 4, "The rowId block for UPDATE should have 3 or 4 children, but has %s", blocks.size());
        deleteRowsInternal(acidRowBlock);

        Block mergedColumnsBlock = updateProcessor.createMergedColumnsBlock(page, columnValueAndRowIdChannels);

        Block currentTransactionBlock = RunLengthEncodedBlock.create(BIGINT, writeId, positionCount);
        Block[] blockArray = {
                new RunLengthEncodedBlock(INSERT_OPERATION_BLOCK, positionCount),
                currentTransactionBlock,
                blocks.get(BUCKET_CHANNEL),
                createRowIdBlock(positionCount),
                currentTransactionBlock,
                mergedColumnsBlock,
        };

        Page insertPage = new Page(blockArray);
        lazyInitializeInsertFileWriter();
        insertFileWriter.orElseThrow(() -> new IllegalArgumentException("insertFileWriter not present")).appendRows(insertPage);
    }

    Block createRowIdBlock(int positionCount)
    {
        long[] rowIds = new long[positionCount];
        for (int index = 0; index < positionCount; index++) {
            rowIds[index] = insertRowCounter++;
        }
        return new LongArrayBlock(positionCount, Optional.empty(), rowIds);
    }

    @Override
    public CompletableFuture> finish()
    {
        if (deleteFileWriter.isEmpty()) {
            return completedFuture(ImmutableList.of());
        }
        OrcFileWriter deleteWriter = (OrcFileWriter) deleteFileWriter.get();
        deleteWriter.setMaxWriteId(maxWriteId);
        deleteWriter.commit();

        Optional deltaDirectoryString;
        switch (updateKind) {
            case DELETE:
                deltaDirectoryString = Optional.empty();
                break;

            case UPDATE:
                OrcFileWriter insertWriter = (OrcFileWriter) insertFileWriter.get();
                insertWriter.setMaxWriteId(maxWriteId);
                insertWriter.commit();
                checkArgument(deltaDirectory.isPresent(), "deltaDirectory not present");
                deltaDirectoryString = Optional.of(deltaDirectory.get().toString());
                break;

            default:
                throw new IllegalArgumentException("Unknown UpdateKind " + updateKind);
        }
        Slice fragment = Slices.wrappedBuffer(CODEC.toJsonBytes(new PartitionAndStatementId(
                partitionName,
                statementId,
                rowCount,
                deleteDeltaDirectory.toString(),
                deltaDirectoryString)));
        return completedFuture(ImmutableList.of(fragment));
    }

    @Override
    public long getCompletedBytes()
    {
        return hivePageSource.getCompletedBytes();
    }

    @Override
    public long getReadTimeNanos()
    {
        return hivePageSource.getReadTimeNanos();
    }

    @Override
    public boolean isFinished()
    {
        return closed;
    }

    @Override
    public Page getNextPage()
    {
        Page page = hivePageSource.getNextPage();
        if (page == null) {
            close();
            return null;
        }
        if (transaction.isUpdate()) {
            HiveUpdateProcessor updateProcessor = transaction.getUpdateProcessor().orElseThrow(() -> new IllegalArgumentException("updateProcessor not present"));
            List channels = dependencyChannels.orElseThrow(() -> new IllegalArgumentException("dependencyChannels not present"));
            return updateProcessor.removeNonDependencyColumns(page, channels);
        }
        else {
            return page;
        }
    }

    @Override
    public long getSystemMemoryUsage()
    {
        return hivePageSource.getSystemMemoryUsage();
    }

    @Override
    public void close()
    {
        if (closed) {
            return;
        }
        closed = true;

        try {
            hivePageSource.close();
        }
        catch (Exception e) {
            throw new TrinoException(HIVE_WRITER_CLOSE_ERROR, e);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy