All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.scio.parquet.avro.ParquetAvroFileBasedSink Maven / Gradle / Ivy

/*
 * Copyright 2017 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.spotify.scio.parquet.avro;

import com.spotify.scio.parquet.BeamOutputFile;
import com.spotify.scio.parquet.WriterUtils;
import java.nio.channels.WritableByteChannel;
import org.apache.avro.Schema;
import org.apache.beam.sdk.io.FileBasedSink;
import org.apache.beam.sdk.io.fs.ResourceId;
import org.apache.beam.sdk.io.hadoop.SerializableConfiguration;
import org.apache.beam.sdk.options.ValueProvider;
import org.apache.beam.sdk.util.MimeTypes;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.avro.AvroParquetWriter;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;

public class ParquetAvroFileBasedSink extends FileBasedSink {

  private final String schemaString;
  private final SerializableConfiguration conf;
  private final CompressionCodecName compression;

  public ParquetAvroFileBasedSink(
      ValueProvider baseOutputFileName,
      FileBasedSink.DynamicDestinations dynamicDestinations,
      Schema schema,
      Configuration conf,
      CompressionCodecName compression) {
    super(baseOutputFileName, dynamicDestinations);
    this.schemaString = schema.toString();
    this.conf = new SerializableConfiguration(conf);
    this.compression = compression;
  }

  @Override
  public FileBasedSink.WriteOperation createWriteOperation() {
    return new ParquetAvroWriteOperation(this, schemaString, conf, compression);
  }

  // =======================================================================
  // WriteOperation
  // =======================================================================

  static class ParquetAvroWriteOperation extends WriteOperation {

    private final String schemaString;
    private final SerializableConfiguration conf;
    private final CompressionCodecName compression;

    public ParquetAvroWriteOperation(
        FileBasedSink sink,
        String schemaString,
        SerializableConfiguration conf,
        CompressionCodecName compression) {
      super(sink);
      this.schemaString = schemaString;
      this.conf = conf;
      this.compression = compression;
    }

    @Override
    public Writer createWriter() throws Exception {
      return new ParquetAvroWriter<>(
          this, new Schema.Parser().parse(schemaString), conf, compression);
    }
  }

  // =======================================================================
  // Writer
  // =======================================================================

  static class ParquetAvroWriter extends FileBasedSink.Writer {

    private final Schema schema;
    private final SerializableConfiguration conf;
    private final CompressionCodecName compression;
    private ParquetWriter writer;

    public ParquetAvroWriter(
        WriteOperation writeOperation,
        Schema schema,
        SerializableConfiguration conf,
        CompressionCodecName compression) {
      super(writeOperation, MimeTypes.BINARY);
      this.schema = schema;
      this.conf = conf;
      this.compression = compression;
    }

    @Override
    protected void prepareWrite(WritableByteChannel channel) throws Exception {
      BeamOutputFile outputFile = BeamOutputFile.of(channel);
      Configuration configuration = conf.get();
      AvroParquetWriter.Builder builder =
          AvroParquetWriter.builder(outputFile).withSchema(schema);
      writer = WriterUtils.build(builder, configuration, compression);
    }

    @Override
    public void write(T value) throws Exception {
      writer.write(value);
    }

    @Override
    protected void finishWrite() throws Exception {
      writer.close();
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy