All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.snowflake.ingest.streaming.internal.ParquetChunkData Maven / Gradle / Ivy

There is a newer version: 3.0.0
Show newest version
/*
 * Copyright (c) 2022 Snowflake Computing Inc. All rights reserved.
 */

package net.snowflake.ingest.streaming.internal;

import java.io.ByteArrayOutputStream;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.parquet.hadoop.BdecParquetWriter;

/** Parquet data holder to buffer rows. */
public class ParquetChunkData {
  // buffered rows serialized into Java objects. Needed for the Parquet w/o memory optimization.
  final List> rows;

  final BdecParquetWriter parquetWriter;
  final ByteArrayOutputStream output;
  final Map metadata;

  /**
   * Construct parquet data chunk.
   *
   * @param rows buffered row data as a list
   * @param parquetWriter buffered parquet row data
   * @param output byte array file output
   * @param metadata chunk metadata
   */
  public ParquetChunkData(
      List> rows,
      BdecParquetWriter parquetWriter,
      ByteArrayOutputStream output,
      Map metadata) {
    this.rows = rows;
    this.parquetWriter = parquetWriter;
    this.output = output;
    // create a defensive copy of the parameter map because the argument map passed here
    // may currently be shared across multiple threads.
    this.metadata = createDefensiveCopy(metadata);
  }

  private Map createDefensiveCopy(final Map metadata) {
    final Map copy = new HashMap<>(metadata);
    for (String k : metadata.keySet()) {
      copy.put(k, metadata.get(k));
    }
    return copy;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy