All Downloads are FREE. Search and download functionalities are using the official Maven repository.

me.lyh.parquet.tensorflow.ExampleParquetWriter Maven / Gradle / Ivy

The newest version!
package me.lyh.parquet.tensorflow;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.parquet.hadoop.ParquetWriter;
import org.apache.parquet.hadoop.api.WriteSupport;
import org.apache.parquet.io.OutputFile;
import org.apache.parquet.schema.MessageType;
import org.apache.parquet.schema.MessageTypeParser;
import org.tensorflow.proto.example.Example;

public class ExampleParquetWriter {
  private ExampleParquetWriter() {}

  public static Builder builder(Path path) {
    return new Builder(path);
  }

  public static Builder builder(OutputFile file) {
    return new Builder(file);
  }

  public static class Builder extends ParquetWriter.Builder {
    private Schema schema;

    protected Builder(Path path) {
      super(path);
    }

    protected Builder(OutputFile file) {
      super(file);
    }

    @Override
    protected Builder self() {
      return this;
    }

    public Builder withSchema(Schema schema) {
      this.schema = schema;
      return this;
    }

    @Override
    protected WriteSupport getWriteSupport(Configuration conf) {
      if (schema == null) {
        String schemaString = conf.get(ExampleParquetOutputFormat.SCHEMA_KEY);
        MessageType parquet = MessageTypeParser.parseMessageType(schemaString);
        schema = Schema.fromParquet(parquet);
      }
      return new ExampleWriteSupport(schema);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy