com.facebook.presto.iceberg.IcebergPageSink Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of presto-iceberg Show documentation
Show all versions of presto-iceberg Show documentation
Presto - Iceberg Connector
The newest version!
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.iceberg;
import com.facebook.airlift.json.JsonCodec;
import com.facebook.presto.common.Page;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.BlockBuilder;
import com.facebook.presto.common.function.SqlFunctionProperties;
import com.facebook.presto.common.type.BigintType;
import com.facebook.presto.common.type.BooleanType;
import com.facebook.presto.common.type.DateType;
import com.facebook.presto.common.type.DecimalType;
import com.facebook.presto.common.type.DoubleType;
import com.facebook.presto.common.type.IntegerType;
import com.facebook.presto.common.type.RealType;
import com.facebook.presto.common.type.SmallintType;
import com.facebook.presto.common.type.TimeType;
import com.facebook.presto.common.type.TimestampType;
import com.facebook.presto.common.type.TinyintType;
import com.facebook.presto.common.type.Type;
import com.facebook.presto.common.type.VarbinaryType;
import com.facebook.presto.common.type.VarcharType;
import com.facebook.presto.hive.HdfsContext;
import com.facebook.presto.hive.HdfsEnvironment;
import com.facebook.presto.iceberg.PartitionTransforms.ColumnTransform;
import com.facebook.presto.spi.ConnectorPageSink;
import com.facebook.presto.spi.ConnectorSession;
import com.facebook.presto.spi.PageIndexer;
import com.facebook.presto.spi.PageIndexerFactory;
import com.facebook.presto.spi.PrestoException;
import com.google.common.collect.ImmutableList;
import io.airlift.slice.Slice;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapred.JobConf;
import org.apache.iceberg.MetricsConfig;
import org.apache.iceberg.PartitionField;
import org.apache.iceberg.PartitionSpec;
import org.apache.iceberg.Schema;
import org.apache.iceberg.Table;
import org.apache.iceberg.io.LocationProvider;
import java.time.Instant;
import java.time.LocalDateTime;
import java.time.ZoneId;
import java.time.ZoneOffset;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.CompletableFuture;
import java.util.function.Function;
import static com.facebook.presto.common.type.Decimals.readBigDecimal;
import static com.facebook.presto.hive.util.ConfigurationUtils.toJobConf;
import static com.facebook.presto.iceberg.IcebergErrorCode.ICEBERG_TOO_MANY_OPEN_PARTITIONS;
import static com.facebook.presto.iceberg.PartitionTransforms.getColumnTransform;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Verify.verify;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static io.airlift.slice.Slices.wrappedBuffer;
import static java.lang.Float.intBitsToFloat;
import static java.lang.Math.toIntExact;
import static java.lang.String.format;
import static java.util.Objects.requireNonNull;
import static java.util.UUID.randomUUID;
import static java.util.concurrent.CompletableFuture.completedFuture;
import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.NANOSECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
public class IcebergPageSink
implements ConnectorPageSink
{
private static final int MAX_PAGE_POSITIONS = 4096;
@SuppressWarnings({"FieldCanBeLocal", "FieldMayBeStatic"})
private final int maxOpenWriters;
private final Schema outputSchema;
private final PartitionSpec partitionSpec;
private final LocationProvider locationProvider;
private final IcebergFileWriterFactory fileWriterFactory;
private final HdfsEnvironment hdfsEnvironment;
private final HdfsContext hdfsContext;
private final JobConf jobConf;
private final JsonCodec jsonCodec;
private final ConnectorSession session;
private final FileFormat fileFormat;
private final PagePartitioner pagePartitioner;
private final List writers = new ArrayList<>();
private long writtenBytes;
private long systemMemoryUsage;
private long validationCpuNanos;
private Table table;
public IcebergPageSink(
Table table,
LocationProvider locationProvider,
IcebergFileWriterFactory fileWriterFactory,
PageIndexerFactory pageIndexerFactory,
HdfsEnvironment hdfsEnvironment,
HdfsContext hdfsContext,
List inputColumns,
JsonCodec jsonCodec,
ConnectorSession session,
FileFormat fileFormat,
int maxOpenWriters)
{
requireNonNull(inputColumns, "inputColumns is null");
this.table = requireNonNull(table, "table is null");
this.outputSchema = table.schema();
this.partitionSpec = table.spec();
this.locationProvider = requireNonNull(locationProvider, "locationProvider is null");
this.fileWriterFactory = requireNonNull(fileWriterFactory, "fileWriterFactory is null");
this.hdfsEnvironment = requireNonNull(hdfsEnvironment, "hdfsEnvironment is null");
this.hdfsContext = requireNonNull(hdfsContext, "hdfsContext is null");
this.jobConf = toJobConf(hdfsEnvironment.getConfiguration(hdfsContext, new Path(locationProvider.newDataLocation("data-file"))));
this.jsonCodec = requireNonNull(jsonCodec, "jsonCodec is null");
this.session = requireNonNull(session, "session is null");
this.fileFormat = requireNonNull(fileFormat, "fileFormat is null");
this.maxOpenWriters = maxOpenWriters;
this.pagePartitioner = new PagePartitioner(pageIndexerFactory,
toPartitionColumns(inputColumns, partitionSpec),
session);
}
@Override
public long getCompletedBytes()
{
return writtenBytes;
}
@Override
public long getSystemMemoryUsage()
{
return systemMemoryUsage;
}
@Override
public long getValidationCpuNanos()
{
return validationCpuNanos;
}
@Override
public CompletableFuture> appendPage(Page page)
{
hdfsEnvironment.doAs(session.getUser(), () -> doAppend(page));
return NOT_BLOCKED;
}
@Override
public CompletableFuture> finish()
{
Collection commitTasks = new ArrayList<>();
for (WriteContext context : writers) {
context.getWriter().commit();
CommitTaskData task = new CommitTaskData(
context.getPath().toString(),
context.writer.getFileSizeInBytes(),
new MetricsWrapper(context.writer.getMetrics()),
partitionSpec.specId(),
context.getPartitionData().map(PartitionData::toJson),
fileFormat,
null);
commitTasks.add(wrappedBuffer(jsonCodec.toJsonBytes(task)));
}
writtenBytes = writers.stream()
.mapToLong(writer -> writer.getWriter().getWrittenBytes())
.sum();
validationCpuNanos = writers.stream()
.mapToLong(writer -> writer.getWriter().getValidationCpuNanos())
.sum();
return completedFuture(commitTasks);
}
@Override
public void abort()
{
RuntimeException error = null;
for (WriteContext context : writers) {
try {
if (context != null) {
context.getWriter().rollback();
}
}
catch (Throwable t) {
if (error == null) {
error = new RuntimeException("Exception during rollback");
}
error.addSuppressed(t);
}
}
if (error != null) {
throw error;
}
}
private void doAppend(Page page)
{
while (page.getPositionCount() > MAX_PAGE_POSITIONS) {
Page chunk = page.getRegion(0, MAX_PAGE_POSITIONS);
page = page.getRegion(MAX_PAGE_POSITIONS, page.getPositionCount() - MAX_PAGE_POSITIONS);
writePage(chunk);
}
writePage(page);
}
private void writePage(Page page)
{
int[] writerIndexes = getWriterIndexes(page);
// position count for each writer
int[] sizes = new int[writers.size()];
for (int index : writerIndexes) {
sizes[index]++;
}
// record which positions are used by which writer
int[][] writerPositions = new int[writers.size()][];
int[] counts = new int[writers.size()];
for (int position = 0; position < page.getPositionCount(); position++) {
int index = writerIndexes[position];
int count = counts[index];
if (count == 0) {
writerPositions[index] = new int[sizes[index]];
}
writerPositions[index][count] = position;
counts[index]++;
}
// invoke the writers
for (int index = 0; index < writerPositions.length; index++) {
int[] positions = writerPositions[index];
if (positions == null) {
continue;
}
// if write is partitioned across multiple writers, filter page using dictionary blocks
Page pageForWriter = page;
if (positions.length != page.getPositionCount()) {
verify(positions.length == counts[index]);
pageForWriter = pageForWriter.getPositions(positions, 0, positions.length);
}
IcebergFileWriter writer = writers.get(index).getWriter();
long currentWritten = writer.getWrittenBytes();
long currentMemory = writer.getSystemMemoryUsage();
writer.appendRows(pageForWriter);
writtenBytes += (writer.getWrittenBytes() - currentWritten);
systemMemoryUsage += (writer.getSystemMemoryUsage() - currentMemory);
}
}
private int[] getWriterIndexes(Page page)
{
int[] writerIndexes = pagePartitioner.partitionPage(page);
if (pagePartitioner.getMaxIndex() >= maxOpenWriters) {
throw new PrestoException(ICEBERG_TOO_MANY_OPEN_PARTITIONS, format("Exceeded limit of %s open writers for partitions", maxOpenWriters));
}
// expand writers list to new size
while (writers.size() <= pagePartitioner.getMaxIndex()) {
writers.add(null);
}
// create missing writers
Page transformedPage = pagePartitioner.getTransformedPage();
for (int position = 0; position < page.getPositionCount(); position++) {
int writerIndex = writerIndexes[position];
if (writers.get(writerIndex) != null) {
continue;
}
Optional partitionData = getPartitionData(pagePartitioner.getColumns(), transformedPage, position);
WriteContext writer = createWriter(partitionData);
writers.set(writerIndex, writer);
}
verify(writers.size() == pagePartitioner.getMaxIndex() + 1);
verify(!writers.contains(null));
return writerIndexes;
}
private WriteContext createWriter(Optional partitionData)
{
String fileName = fileFormat.addExtension(randomUUID().toString());
Path outputPath = partitionData.map(partition -> new Path(locationProvider.newDataLocation(partitionSpec, partition, fileName)))
.orElse(new Path(locationProvider.newDataLocation(fileName)));
IcebergFileWriter writer = fileWriterFactory.createFileWriter(
outputPath,
outputSchema,
jobConf,
session,
hdfsContext,
fileFormat,
MetricsConfig.forTable(table));
return new WriteContext(writer, outputPath, partitionData);
}
private Optional getPartitionData(List columns, Page transformedPage, int position)
{
if (columns.isEmpty()) {
return Optional.empty();
}
Object[] values = new Object[columns.size()];
for (int i = 0; i < columns.size(); i++) {
PartitionColumn column = columns.get(i);
Block block = transformedPage.getBlock(i);
Type type = column.getResultType();
values[i] = getIcebergValue(block, position, type);
}
return Optional.of(new PartitionData(values));
}
public static Object getIcebergValue(Block block, int position, Type type)
{
if (block.isNull(position)) {
return null;
}
if (type instanceof BigintType) {
return type.getLong(block, position);
}
if (type instanceof IntegerType || type instanceof SmallintType || type instanceof TinyintType || type instanceof DateType) {
return toIntExact(type.getLong(block, position));
}
if (type instanceof BooleanType) {
return type.getBoolean(block, position);
}
if (type instanceof DecimalType) {
return readBigDecimal((DecimalType) type, block, position);
}
if (type instanceof RealType) {
return intBitsToFloat(toIntExact(type.getLong(block, position)));
}
if (type instanceof DoubleType) {
return type.getDouble(block, position);
}
if (type instanceof VarbinaryType) {
return type.getSlice(block, position).getBytes();
}
if (type instanceof VarcharType) {
return type.getSlice(block, position).toStringUtf8();
}
if (type instanceof TimestampType) {
long timestamp = type.getLong(block, position);
return ((TimestampType) type).getPrecision() == MILLISECONDS ? MILLISECONDS.toMicros(timestamp) : timestamp;
}
if (type instanceof TimeType) {
long time = type.getLong(block, position);
return MILLISECONDS.toMicros(time);
}
throw new UnsupportedOperationException("Type not supported as partition column: " + type.getDisplayName());
}
public static Object adjustTimestampForPartitionTransform(SqlFunctionProperties functionProperties, Type type, Object value)
{
if (type instanceof TimestampType && functionProperties.isLegacyTimestamp()) {
long timestampValue = (long) value;
TimestampType timestampType = (TimestampType) type;
Instant instant = Instant.ofEpochSecond(timestampType.getPrecision().toSeconds(timestampValue),
timestampType.getPrecision().toNanos(timestampValue % timestampType.getPrecision().convert(1, SECONDS)));
LocalDateTime localDateTime = instant
.atZone(ZoneId.of(functionProperties.getTimeZoneKey().getId()))
.toLocalDateTime();
return timestampType.getPrecision().convert(localDateTime.toEpochSecond(ZoneOffset.UTC), SECONDS) +
timestampType.getPrecision().convert(localDateTime.getNano(), NANOSECONDS);
}
return value;
}
private static List toPartitionColumns(List handles, PartitionSpec partitionSpec)
{
Map idChannels = new HashMap<>();
for (int i = 0; i < handles.size(); i++) {
idChannels.put(handles.get(i).getId(), i);
}
return partitionSpec.fields().stream()
.map(field -> {
Integer channel = idChannels.get(field.sourceId());
checkArgument(channel != null, "partition field not found: %s", field);
Type inputType = handles.get(channel).getType();
ColumnTransform transform = getColumnTransform(field, inputType);
return new PartitionColumn(field, channel, inputType, transform.getType(), transform.getTransform());
})
.collect(toImmutableList());
}
private static class WriteContext
{
private final IcebergFileWriter writer;
private final Path path;
private final Optional partitionData;
public WriteContext(IcebergFileWriter writer, Path path, Optional partitionData)
{
this.writer = requireNonNull(writer, "writer is null");
this.path = requireNonNull(path, "path is null");
this.partitionData = requireNonNull(partitionData, "partitionData is null");
}
public IcebergFileWriter getWriter()
{
return writer;
}
public Path getPath()
{
return path;
}
public Optional getPartitionData()
{
return partitionData;
}
}
private static class PagePartitioner
{
private final PageIndexer pageIndexer;
private final List columns;
private final ConnectorSession session;
private Page transformedPage;
public PagePartitioner(PageIndexerFactory pageIndexerFactory,
List columns,
ConnectorSession session)
{
this.pageIndexer = pageIndexerFactory.createPageIndexer(columns.stream()
.map(PartitionColumn::getResultType)
.collect(toImmutableList()));
this.columns = ImmutableList.copyOf(columns);
this.session = session;
}
public int[] partitionPage(Page page)
{
Block[] blocks = new Block[columns.size()];
for (int i = 0; i < columns.size(); i++) {
PartitionColumn column = columns.get(i);
Block block = adjustBlockIfNecessary(column, page.getBlock(column.getSourceChannel()));
blocks[i] = column.getBlockTransform().apply(block);
}
this.transformedPage = new Page(page.getPositionCount(), blocks);
return pageIndexer.indexPage(transformedPage);
}
public Page getTransformedPage()
{
return this.transformedPage;
}
public int getMaxIndex()
{
return pageIndexer.getMaxIndex();
}
public List getColumns()
{
return columns;
}
private Block adjustBlockIfNecessary(PartitionColumn column, Block block)
{
// adjust legacy timestamp value to compatible with Iceberg non-identity transform calculation
if (column.sourceType instanceof TimestampType && session.getSqlFunctionProperties().isLegacyTimestamp() && !column.getField().transform().isIdentity()) {
TimestampType timestampType = (TimestampType) column.sourceType;
BlockBuilder blockBuilder = timestampType.createBlockBuilder(null, block.getPositionCount());
for (int t = 0; t < block.getPositionCount(); t++) {
if (block.isNull(t)) {
blockBuilder.appendNull();
}
else {
long adjustedTimestampValue = (long) adjustTimestampForPartitionTransform(
session.getSqlFunctionProperties(),
timestampType,
timestampType.getLong(block, t));
timestampType.writeLong(blockBuilder, adjustedTimestampValue);
}
}
return blockBuilder.build();
}
return block;
}
}
private static class PartitionColumn
{
private final PartitionField field;
private final int sourceChannel;
private final Type sourceType;
private final Type resultType;
private final Function blockTransform;
public PartitionColumn(PartitionField field, int sourceChannel, Type sourceType, Type resultType, Function blockTransform)
{
this.field = requireNonNull(field, "field is null");
this.sourceChannel = sourceChannel;
this.sourceType = requireNonNull(sourceType, "sourceType is null");
this.resultType = requireNonNull(resultType, "resultType is null");
this.blockTransform = requireNonNull(blockTransform, "blockTransform is null");
}
public PartitionField getField()
{
return field;
}
public int getSourceChannel()
{
return sourceChannel;
}
public Type getSourceType()
{
return sourceType;
}
public Type getResultType()
{
return resultType;
}
public Function getBlockTransform()
{
return blockTransform;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy