All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.parquet.tensorflow.TensorflowExampleParquetWriter Maven / Gradle / Ivy

/*
 * Copyright 2023 Spotify AB
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.spotify.parquet.tensorflow;

import java.io.IOException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.column.ParquetProperties;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.api.WriteSupport;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import org.apache.parquet.io.OutputFile;
import org.tensorflow.metadata.v0.Schema;
import org.tensorflow.proto.example.Example;

public class TensorflowExampleParquetWriter extends ParquetWriter {

  public static Builder builder(OutputFile file) {
    return new Builder(file);
  }

  TensorflowExampleParquetWriter(
      Path file,
      WriteSupport writeSupport,
      CompressionCodecName compressionCodecName,
      int blockSize,
      int pageSize,
      boolean enableDictionary,
      boolean enableValidation,
      ParquetProperties.WriterVersion writerVersion,
      Configuration conf)
      throws IOException {
    super(
        file,
        writeSupport,
        compressionCodecName,
        blockSize,
        pageSize,
        pageSize,
        enableDictionary,
        enableValidation,
        writerVersion,
        conf);
  }

  private static WriteSupport writeSupport(Configuration conf, Schema schema) {
    return new TensorflowExampleWriteSupport(
        new TensorflowExampleSchemaConverter(conf).convert(schema), schema);
  }

  public static class Builder extends ParquetWriter.Builder {
    private Schema schema;

    protected Builder(OutputFile file) {
      super(file);
    }

    @Override
    protected Builder self() {
      return this;
    }

    public Builder withSchema(Schema schema) {
      this.schema = schema;
      return this;
    }

    @Override
    protected WriteSupport getWriteSupport(Configuration conf) {
      return TensorflowExampleParquetWriter.writeSupport(conf, schema);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy