net.snowflake.ingest.streaming.internal.ParquetFlusher Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of snowflake-ingest-sdk Show documentation
Snowflake Ingest SDK
The newest version!
/*
 * Copyright (c) 2022-2024 Snowflake Computing Inc. All rights reserved.
 */

package net.snowflake.ingest.streaming.internal;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import net.snowflake.ingest.utils.Constants;
import net.snowflake.ingest.utils.ErrorCode;
import net.snowflake.ingest.utils.Logging;
import net.snowflake.ingest.utils.Pair;
import net.snowflake.ingest.utils.SFException;
import org.apache.parquet.Preconditions;
import org.apache.parquet.column.ParquetProperties;
import org.apache.parquet.hadoop.SnowflakeParquetWriter;
import org.apache.parquet.schema.MessageType;

/**
 * Converts {@link ChannelData} buffered in {@link RowBuffer} to the Parquet format for faster
 * processing.
 */
public class ParquetFlusher implements Flusher {
  private static final Logging logger = new Logging(ParquetFlusher.class);
  private final MessageType schema;
  private final long maxChunkSizeInBytes;
  private final Optional maxRowGroups;

  private final Constants.BdecParquetCompression bdecParquetCompression;
  private final ParquetProperties.WriterVersion parquetWriterVersion;
  private final boolean enableDictionaryEncoding;
  private final boolean enableIcebergStreaming;

  /** Construct parquet flusher from its schema. */
  public ParquetFlusher(
      MessageType schema,
      long maxChunkSizeInBytes,
      Optional maxRowGroups,
      Constants.BdecParquetCompression bdecParquetCompression,
      ParquetProperties.WriterVersion parquetWriterVersion,
      boolean enableDictionaryEncoding,
      boolean enableIcebergStreaming) {
    this.schema = schema;
    this.maxChunkSizeInBytes = maxChunkSizeInBytes;
    this.maxRowGroups = maxRowGroups;
    this.bdecParquetCompression = bdecParquetCompression;
    this.parquetWriterVersion = parquetWriterVersion;
    this.enableDictionaryEncoding = enableDictionaryEncoding;
    this.enableIcebergStreaming = enableIcebergStreaming;
  }

  @Override
  public SerializationResult serialize(
      List> channelsDataPerTable,
      String filePath,
      long chunkStartOffset)
      throws IOException {
    return serializeFromJavaObjects(channelsDataPerTable, filePath, chunkStartOffset);
  }

  private SerializationResult serializeFromJavaObjects(
      List> channelsDataPerTable,
      String filePath,
      long chunkStartOffset)
      throws IOException {
    List channelsMetadataList = new ArrayList<>();
    long rowCount = 0L;
    float chunkEstimatedUncompressedSize = 0f;
    String firstChannelFullyQualifiedTableName = null;
    Map columnEpStatsMapCombined = null;
    List> rows = null;
    SnowflakeParquetWriter parquetWriter;
    ByteArrayOutputStream mergedData = new ByteArrayOutputStream();
    Pair chunkMinMaxInsertTimeInMs = null;

    for (ChannelData data : channelsDataPerTable) {
      // Create channel metadata
      ChannelMetadata channelMetadata =
          ChannelMetadata.builder()
              .setOwningChannelFromContext(data.getChannelContext())
              .setRowSequencer(data.getRowSequencer())
              .setOffsetToken(data.getEndOffsetToken())
              .setStartOffsetToken(data.getStartOffsetToken())
              .build();
      // Add channel metadata to the metadata list
      channelsMetadataList.add(channelMetadata);

      logger.logDebug(
          "Parquet Flusher: Start building channel={}, rowCount={}, bufferSize={} in blob={}",
          data.getChannelContext().getFullyQualifiedName(),
          data.getRowCount(),
          data.getBufferSize(),
          filePath);

      if (rows == null) {
        columnEpStatsMapCombined = data.getColumnEps();
        rows = new ArrayList<>();
        firstChannelFullyQualifiedTableName = data.getChannelContext().getFullyQualifiedTableName();
        chunkMinMaxInsertTimeInMs = data.getMinMaxInsertTimeInMs();
      } else {
        // This method assumes that channelsDataPerTable is grouped by table. We double-check
        // here and throw an error if the assumption is violated
        if (!data.getChannelContext()
            .getFullyQualifiedTableName()
            .equals(firstChannelFullyQualifiedTableName)) {
          throw new SFException(ErrorCode.INVALID_DATA_IN_CHUNK);
        }

        columnEpStatsMapCombined =
            ChannelData.getCombinedColumnStatsMap(columnEpStatsMapCombined, data.getColumnEps());
        chunkMinMaxInsertTimeInMs =
            ChannelData.getCombinedMinMaxInsertTimeInMs(
                chunkMinMaxInsertTimeInMs, data.getMinMaxInsertTimeInMs());
      }

      rows.addAll(data.getVectors().rows);

      rowCount += data.getRowCount();
      chunkEstimatedUncompressedSize += data.getBufferSize();

      logger.logDebug(
          "Parquet Flusher: Finish building channel={}, rowCount={}, bufferSize={} in blob={}",
          data.getChannelContext().getFullyQualifiedName(),
          data.getRowCount(),
          data.getBufferSize(),
          filePath);
    }

    Map metadata = channelsDataPerTable.get(0).getVectors().metadata;
    addFileIdToMetadata(filePath, chunkStartOffset, metadata);
    parquetWriter =
        new SnowflakeParquetWriter(
            mergedData,
            schema,
            metadata,
            firstChannelFullyQualifiedTableName,
            maxChunkSizeInBytes,
            maxRowGroups,
            bdecParquetCompression,
            parquetWriterVersion,
            enableDictionaryEncoding);
    rows.forEach(parquetWriter::writeRow);
    parquetWriter.close();

    this.verifyRowCounts(parquetWriter, rowCount, channelsDataPerTable, rows.size());

    return new SerializationResult(
        channelsMetadataList,
        columnEpStatsMapCombined,
        rowCount,
        chunkEstimatedUncompressedSize,
        mergedData,
        chunkMinMaxInsertTimeInMs,
        parquetWriter.getExtendedMetadataSize());
  }

  private void addFileIdToMetadata(
      String filePath, long chunkStartOffset, Map metadata) {
    // We insert the filename in the file itself as metadata so that streams can work on replicated
    // mixed tables. For a more detailed discussion on the topic see SNOW-561447 and
    // http://go/streams-on-replicated-mixed-tables,  and
    // http://go/managed-iceberg-replication-change-tracking
    // Using chunk offset as suffix ensures that for interleaved tables, the file
    // id key is unique for each chunk. Each chunk is logically a separate Parquet file that happens
    // to be bundled together.
    if (chunkStartOffset == 0) {
      metadata.put(
          enableIcebergStreaming
              ? Constants.ASSIGNED_FULL_FILE_NAME_KEY
              : Constants.PRIMARY_FILE_ID_KEY,
          StreamingIngestUtils.getShortname(filePath));
    } else {
      Preconditions.checkState(
          !enableIcebergStreaming, "Iceberg streaming is not supported with non-zero offsets");
      String shortName = StreamingIngestUtils.getShortname(filePath);
      final String[] parts = shortName.split("\\.");
      Preconditions.checkState(parts.length == 2, "Invalid file name format");
      metadata.put(
          Constants.PRIMARY_FILE_ID_KEY,
          String.format("%s_%d.%s", parts[0], chunkStartOffset, parts[1]));
    }
  }

  /**
   * Validates that rows count in metadata matches the row count in Parquet footer and the row count
   * written by the parquet writer
   *
   * @param writer Parquet writer writing the data
   * @param channelsDataPerTable Channel data
   * @param totalMetadataRowCount Row count calculated during metadata collection
   * @param javaSerializationTotalRowCount Total row count when java object serialization is used.
   *     Used only for logging purposes if there is a mismatch.
   */
  private void verifyRowCounts(
      SnowflakeParquetWriter writer,
      long totalMetadataRowCount,
      List> channelsDataPerTable,
      long javaSerializationTotalRowCount) {
    long parquetTotalRowsWritten = writer.getRowsWritten();

    List parquetFooterRowsPerBlock = writer.getRowCountsFromFooter();
    long parquetTotalRowsInFooter = 0;
    for (long perBlockCount : parquetFooterRowsPerBlock) {
      parquetTotalRowsInFooter += perBlockCount;
    }

    if (parquetTotalRowsInFooter != totalMetadataRowCount
        || parquetTotalRowsWritten != totalMetadataRowCount) {

      final String perChannelRowCountsInMetadata =
          channelsDataPerTable.stream()
              .map(x -> String.valueOf(x.getRowCount()))
              .collect(Collectors.joining(","));

      final String channelNames =
          channelsDataPerTable.stream()
              .map(x -> String.valueOf(x.getChannelContext().getName()))
              .collect(Collectors.joining(","));

      final String perBlockRowCountsInFooter =
          parquetFooterRowsPerBlock.stream().map(String::valueOf).collect(Collectors.joining(","));

      final long channelsCountInMetadata = channelsDataPerTable.size();

      throw new SFException(
          ErrorCode.INTERNAL_ERROR,
          String.format(
              "The number of rows in Parquet does not match the number of rows in metadata. "
                  + "parquetTotalRowsInFooter=%d "
                  + "totalMetadataRowCount=%d "
                  + "parquetTotalRowsWritten=%d "
                  + "perChannelRowCountsInMetadata=%s "
                  + "perBlockRowCountsInFooter=%s "
                  + "channelsCountInMetadata=%d "
                  + "countOfSerializedJavaObjects=%d "
                  + "channelNames=%s",
              parquetTotalRowsInFooter,
              totalMetadataRowCount,
              parquetTotalRowsWritten,
              perChannelRowCountsInMetadata,
              perBlockRowCountsInFooter,
              channelsCountInMetadata,
              javaSerializationTotalRowCount,
              channelNames));
    }
  }
}