All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.facebook.presto.iceberg.delete.IcebergDeletePageSink Maven / Gradle / Ivy

The newest version!
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.facebook.presto.iceberg.delete;

import com.facebook.airlift.json.JsonCodec;
import com.facebook.presto.common.Page;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.RunLengthEncodedBlock;
import com.facebook.presto.common.type.VarcharType;
import com.facebook.presto.hive.HdfsContext;
import com.facebook.presto.hive.HdfsEnvironment;
import com.facebook.presto.iceberg.CommitTaskData;
import com.facebook.presto.iceberg.FileFormat;
import com.facebook.presto.iceberg.IcebergFileWriter;
import com.facebook.presto.iceberg.IcebergFileWriterFactory;
import com.facebook.presto.iceberg.MetricsWrapper;
import com.facebook.presto.iceberg.PartitionData;
import com.facebook.presto.spi.ConnectorPageSink;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.PrestoException;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import io.airlift.slice.Slice;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.iceberg.MetricsConfig;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.PartitionSpecParser;
import org.apache.iceberg.Schema;
import org.apache.iceberg.io.LocationProvider;

import java.util.Collection;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;

import static com.facebook.presto.common.Utils.nativeValueToBlock;
import static com.facebook.presto.hive.util.ConfigurationUtils.toJobConf;
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_BAD_DATA;
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_ROLLBACK_ERROR;
import static com.facebook.presto.iceberg.IcebergUtil.partitionDataFromJson;
import static io.airlift.slice.Slices.utf8Slice;
import static io.airlift.slice.Slices.wrappedBuffer;
import static java.util.Objects.requireNonNull;
import static java.util.UUID.randomUUID;
import static java.util.concurrent.CompletableFuture.completedFuture;
import static org.apache.iceberg.MetadataColumns.DELETE_FILE_PATH;
import static org.apache.iceberg.MetadataColumns.DELETE_FILE_POS;
import static org.apache.iceberg.TableProperties.DEFAULT_WRITE_METRICS_MODE;

public class IcebergDeletePageSink
        implements ConnectorPageSink
{
    private final PartitionSpec partitionSpec;
    private final Optional partitionData;
    private final IcebergFileWriterFactory fileWriterFactory;
    private final HdfsEnvironment hdfsEnvironment;
    private final HdfsContext hdfsContext;
    private final JobConf jobConf;
    private final JsonCodec jsonCodec;
    private final ConnectorSession session;
    private final String dataFile;
    private final FileFormat fileFormat;
    private Path outputPath;

    private final IcebergPositionDeleteWriter positionDeleteWriter;

    private long writtenBytes;
    private long validationCpuNanos;
    private static final MetricsConfig FULL_METRICS_CONFIG = MetricsConfig.fromProperties(ImmutableMap.of(DEFAULT_WRITE_METRICS_MODE, "full"));

    public IcebergDeletePageSink(
            Schema outputSchema,
            String partitionSpecAsJson,
            Optional partitionDataAsJson,
            LocationProvider locationProvider,
            IcebergFileWriterFactory fileWriterFactory,
            HdfsEnvironment hdfsEnvironment,
            HdfsContext hdfsContext,
            JsonCodec jsonCodec,
            ConnectorSession session,
            String dataFile,
            FileFormat fileFormat)
    {
        this.fileWriterFactory = requireNonNull(fileWriterFactory, "fileWriterFactory is null");
        this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
        this.hdfsContext = requireNonNull(hdfsContext, "hdfsContext is null");
        this.jobConf = toJobConf(hdfsEnvironment.getConfiguration(hdfsContext, new Path(locationProvider.newDataLocation("delete-file"))));
        this.jsonCodec = requireNonNull(jsonCodec, "jsonCodec is null");
        this.session = requireNonNull(session, "session is null");
        this.dataFile = requireNonNull(dataFile, "dataFile is null");
        this.fileFormat = requireNonNull(fileFormat, "fileFormat is null");
        this.partitionSpec = PartitionSpecParser.fromJson(outputSchema, partitionSpecAsJson);
        this.partitionData = partitionDataFromJson(partitionSpec, partitionDataAsJson);
        String fileName = fileFormat.addExtension(String.format("delete_file_%s", randomUUID().toString()));
        this.outputPath = partitionData.map(partition -> new Path(locationProvider.newDataLocation(partitionSpec, partition, fileName)))
                .orElse(new Path(locationProvider.newDataLocation(fileName)));
        this.positionDeleteWriter = new IcebergPositionDeleteWriter();
    }

    @Override
    public long getCompletedBytes()
    {
        return writtenBytes;
    }

    @Override
    public long getSystemMemoryUsage()
    {
        return positionDeleteWriter.getSystemMemoryUsage();
    }

    @Override
    public long getValidationCpuNanos()
    {
        return validationCpuNanos;
    }

    @Override
    public CompletableFuture appendPage(Page page)
    {
        positionDeleteWriter.appendPage(page);
        return NOT_BLOCKED;
    }

    @Override
    public CompletableFuture> finish()
    {
        ImmutableList.Builder commitTasks = ImmutableList.builder();

        IcebergFileWriter writer = positionDeleteWriter.getWriter();

        writer.commit();

        CommitTaskData task = new CommitTaskData(
                outputPath.toString(),
                writer.getFileSizeInBytes(),
                new MetricsWrapper(writer.getMetrics()),
                partitionSpec.specId(),
                partitionData.map(PartitionData::toJson),
                fileFormat,
                dataFile);

        commitTasks.add(wrappedBuffer(jsonCodec.toJsonBytes(task)));

        writtenBytes = writer.getWrittenBytes();
        validationCpuNanos = writer.getValidationCpuNanos();

        return completedFuture(commitTasks.build());
    }

    @Override
    public void abort()
    {
        try {
            positionDeleteWriter.getWriter().rollback();
        }
        catch (Throwable t) {
            throw new PrestoException(ICEBERG_ROLLBACK_ERROR, "Exception during rollback");
        }
    }

    private class IcebergPositionDeleteWriter
    {
        private final Schema positionDeleteSchema = new Schema(DELETE_FILE_PATH, DELETE_FILE_POS);
        private final IcebergFileWriter writer;

        public IcebergPositionDeleteWriter()
        {
            this.writer = createWriter();
        }

        public void appendPage(Page page)
        {
            if (page.getChannelCount() == 1) {
                hdfsEnvironment.doAs(session.getUser(), () -> {
                    Block[] blocks = new Block[2];
                    blocks[0] = new RunLengthEncodedBlock(nativeValueToBlock(VarcharType.VARCHAR, utf8Slice(dataFile)), page.getPositionCount());
                    blocks[1] = page.getBlock(0);
                    writer.appendRows(new Page(blocks));
                });
            }
            else {
                throw new PrestoException(ICEBERG_BAD_DATA, "Expecting Page with one channel but got " + page.getChannelCount());
            }
        }

        public IcebergFileWriter getWriter()
        {
            return writer;
        }

        public long getSystemMemoryUsage()
        {
            return writer.getSystemMemoryUsage();
        }

        private IcebergFileWriter createWriter()
        {
            return fileWriterFactory.createFileWriter(
                    outputPath,
                    positionDeleteSchema,
                    jobConf,
                    session,
                    hdfsContext,
                    fileFormat,
                    FULL_METRICS_CONFIG);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy