io.trino.plugin.hive.HiveUpdatablePageSource Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.trino.plugin.hive;
import com.google.common.collect.ImmutableList;
import io.airlift.slice.Slice;
import io.airlift.slice.Slices;
import io.trino.plugin.hive.acid.AcidOperation;
import io.trino.plugin.hive.orc.OrcFileWriter;
import io.trino.plugin.hive.orc.OrcFileWriterFactory;
import io.trino.spi.Page;
import io.trino.spi.TrinoException;
import io.trino.spi.block.Block;
import io.trino.spi.block.LongArrayBlock;
import io.trino.spi.block.RowBlock;
import io.trino.spi.block.RunLengthEncodedBlock;
import io.trino.spi.connector.ConnectorPageSource;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.connector.UpdatablePageSource;
import io.trino.spi.type.TypeManager;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import java.util.Collection;
import java.util.List;
import java.util.Optional;
import java.util.OptionalInt;
import java.util.concurrent.CompletableFuture;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Verify.verify;
import static io.trino.plugin.hive.HiveErrorCode.HIVE_WRITER_CLOSE_ERROR;
import static io.trino.plugin.hive.PartitionAndStatementId.CODEC;
import static io.trino.spi.predicate.Utils.nativeValueToBlock;
import static io.trino.spi.type.BigintType.BIGINT;
import static java.util.Objects.requireNonNull;
import static java.util.concurrent.CompletableFuture.completedFuture;
public class HiveUpdatablePageSource
extends AbstractHiveAcidWriters
implements UpdatablePageSource
{
// The channel numbers of the child blocks in the RowBlock passed to deleteRows()
public static final int ORIGINAL_TRANSACTION_CHANNEL = 0;
public static final int ROW_ID_CHANNEL = 1;
public static final int BUCKET_CHANNEL = 2;
// Used for UPDATE operations
public static final int ROW_CHANNEL = 3;
public static final int ACID_ROW_STRUCT_COLUMN_ID = 6;
private final String partitionName;
private final ConnectorPageSource hivePageSource;
private final AcidOperation updateKind;
private final Block hiveRowTypeNullsBlock;
private final long writeId;
private final Optional> dependencyChannels;
private long maxWriteId;
private long rowCount;
private long insertRowCounter;
private boolean closed;
public HiveUpdatablePageSource(
HiveTableHandle hiveTableHandle,
String partitionName,
int statementId,
ConnectorPageSource hivePageSource,
TypeManager typeManager,
OptionalInt bucketNumber,
Path bucketPath,
boolean originalFile,
OrcFileWriterFactory orcFileWriterFactory,
Configuration configuration,
ConnectorSession session,
HiveType hiveRowType,
List dependencyColumns,
AcidOperation updateKind)
{
super(hiveTableHandle.getTransaction(), statementId, bucketNumber, bucketPath, originalFile, orcFileWriterFactory, configuration, session, hiveRowType, updateKind);
this.partitionName = requireNonNull(partitionName, "partitionName is null");
this.hivePageSource = requireNonNull(hivePageSource, "hivePageSource is null");
this.updateKind = requireNonNull(updateKind, "updateKind is null");
this.hiveRowTypeNullsBlock = nativeValueToBlock(hiveRowType.getType(typeManager), null);
checkArgument(hiveTableHandle.isInAcidTransaction(), "Not in a transaction; hiveTableHandle: %s", hiveTableHandle);
this.writeId = hiveTableHandle.getWriteId();
if (updateKind == AcidOperation.UPDATE) {
this.dependencyChannels = Optional.of(hiveTableHandle.getUpdateProcessor()
.orElseThrow(() -> new IllegalArgumentException("updateProcessor not present"))
.makeDependencyChannelNumbers(dependencyColumns));
}
else {
this.dependencyChannels = Optional.empty();
}
}
@Override
public void deleteRows(Block rowIds)
{
List blocks = rowIds.getChildren();
checkArgument(blocks.size() == 3, "The rowId block for DELETE should have 3 children, but has %s", blocks.size());
deleteRowsInternal(rowIds);
}
private void deleteRowsInternal(Block rowIds)
{
int positionCount = rowIds.getPositionCount();
List blocks = rowIds.getChildren();
Block[] blockArray = {
new RunLengthEncodedBlock(DELETE_OPERATION_BLOCK, positionCount),
blocks.get(ORIGINAL_TRANSACTION_CHANNEL),
blocks.get(BUCKET_CHANNEL),
blocks.get(ROW_ID_CHANNEL),
RunLengthEncodedBlock.create(BIGINT, writeId, positionCount),
new RunLengthEncodedBlock(hiveRowTypeNullsBlock, positionCount),
};
Page deletePage = new Page(blockArray);
Block block = blocks.get(ORIGINAL_TRANSACTION_CHANNEL);
for (int index = 0; index < positionCount; index++) {
maxWriteId = Math.max(maxWriteId, block.getLong(index, 0));
}
lazyInitializeDeleteFileWriter();
deleteFileWriter.orElseThrow(() -> new IllegalArgumentException("deleteFileWriter not present")).appendRows(deletePage);
rowCount += positionCount;
}
@Override
public void updateRows(Page page, List columnValueAndRowIdChannels)
{
int positionCount = page.getPositionCount();
verify(positionCount > 0, "Unexpected empty page"); // should be filtered out by engine
HiveUpdateProcessor updateProcessor = transaction.getUpdateProcessor().orElseThrow(() -> new IllegalArgumentException("updateProcessor not present"));
RowBlock acidRowBlock = updateProcessor.getAcidRowBlock(page, columnValueAndRowIdChannels);
List blocks = acidRowBlock.getChildren();
checkArgument(blocks.size() == 3 || blocks.size() == 4, "The rowId block for UPDATE should have 3 or 4 children, but has %s", blocks.size());
deleteRowsInternal(acidRowBlock);
Block mergedColumnsBlock = updateProcessor.createMergedColumnsBlock(page, columnValueAndRowIdChannels);
Block currentTransactionBlock = RunLengthEncodedBlock.create(BIGINT, writeId, positionCount);
Block[] blockArray = {
new RunLengthEncodedBlock(INSERT_OPERATION_BLOCK, positionCount),
currentTransactionBlock,
blocks.get(BUCKET_CHANNEL),
createRowIdBlock(positionCount),
currentTransactionBlock,
mergedColumnsBlock,
};
Page insertPage = new Page(blockArray);
lazyInitializeInsertFileWriter();
insertFileWriter.orElseThrow(() -> new IllegalArgumentException("insertFileWriter not present")).appendRows(insertPage);
}
Block createRowIdBlock(int positionCount)
{
long[] rowIds = new long[positionCount];
for (int index = 0; index < positionCount; index++) {
rowIds[index] = insertRowCounter++;
}
return new LongArrayBlock(positionCount, Optional.empty(), rowIds);
}
@Override
public CompletableFuture> finish()
{
if (deleteFileWriter.isEmpty()) {
return completedFuture(ImmutableList.of());
}
OrcFileWriter deleteWriter = (OrcFileWriter) deleteFileWriter.get();
deleteWriter.setMaxWriteId(maxWriteId);
deleteWriter.commit();
Optional deltaDirectoryString;
switch (updateKind) {
case DELETE:
deltaDirectoryString = Optional.empty();
break;
case UPDATE:
OrcFileWriter insertWriter = (OrcFileWriter) insertFileWriter.get();
insertWriter.setMaxWriteId(maxWriteId);
insertWriter.commit();
checkArgument(deltaDirectory.isPresent(), "deltaDirectory not present");
deltaDirectoryString = Optional.of(deltaDirectory.get().toString());
break;
default:
throw new IllegalArgumentException("Unknown UpdateKind " + updateKind);
}
Slice fragment = Slices.wrappedBuffer(CODEC.toJsonBytes(new PartitionAndStatementId(
partitionName,
statementId,
rowCount,
deleteDeltaDirectory.toString(),
deltaDirectoryString)));
return completedFuture(ImmutableList.of(fragment));
}
@Override
public long getCompletedBytes()
{
return hivePageSource.getCompletedBytes();
}
@Override
public long getReadTimeNanos()
{
return hivePageSource.getReadTimeNanos();
}
@Override
public boolean isFinished()
{
return closed;
}
@Override
public Page getNextPage()
{
Page page = hivePageSource.getNextPage();
if (page == null) {
close();
return null;
}
if (transaction.isUpdate()) {
HiveUpdateProcessor updateProcessor = transaction.getUpdateProcessor().orElseThrow(() -> new IllegalArgumentException("updateProcessor not present"));
List channels = dependencyChannels.orElseThrow(() -> new IllegalArgumentException("dependencyChannels not present"));
return updateProcessor.removeNonDependencyColumns(page, channels);
}
else {
return page;
}
}
@Override
public long getSystemMemoryUsage()
{
return hivePageSource.getSystemMemoryUsage();
}
@Override
public void close()
{
if (closed) {
return;
}
closed = true;
try {
hivePageSource.close();
}
catch (Exception e) {
throw new TrinoException(HIVE_WRITER_CLOSE_ERROR, e);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy