All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netease.arctic.spark.writer.UnkeyedUpsertSparkWriter Maven / Gradle / Ivy

The newest version!
package com.netease.arctic.spark.writer;

import com.netease.arctic.data.ChangeAction;
import com.netease.arctic.io.writer.OutputFileFactory;
import com.netease.arctic.io.writer.SortedPosDeleteWriter;
import com.netease.arctic.spark.SparkInternalRowCastWrapper;
import com.netease.arctic.spark.SparkInternalRowWrapper;
import com.netease.arctic.spark.io.ArcticSparkBaseTaskWriter;
import com.netease.arctic.table.ArcticTable;
import com.netease.arctic.shade.org.apache.iceberg.DataFile;
import com.netease.arctic.shade.org.apache.iceberg.DeleteFile;
import com.netease.arctic.shade.org.apache.iceberg.FileFormat;
import com.netease.arctic.shade.org.apache.iceberg.PartitionKey;
import com.netease.arctic.shade.org.apache.iceberg.Schema;
import com.netease.arctic.shade.org.apache.iceberg.StructLike;
import com.netease.arctic.shade.org.apache.iceberg.io.FileAppenderFactory;
import com.netease.arctic.shade.org.apache.iceberg.io.TaskWriter;
import com.netease.arctic.shade.org.apache.iceberg.io.WriteResult;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Lists;
import com.netease.arctic.shade.org.apache.iceberg.spark.SparkSchemaUtil;
import com.netease.arctic.shade.org.apache.spark.sql.catalyst.InternalRow;

import java.io.IOException;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static org.apache.spark.sql.types.DataTypes.IntegerType;
import static org.apache.spark.sql.types.DataTypes.StringType;

public class UnkeyedUpsertSparkWriter implements TaskWriter {

  private final List completedDeleteFiles = Lists.newArrayList();
  private final List completedDataFiles = Lists.newArrayList();

  private final FileAppenderFactory appenderFactory;
  private final OutputFileFactory fileFactory;
  private final FileFormat format;
  private final Schema schema;
  private final ArcticTable table;
  private final ArcticSparkBaseTaskWriter writer;
  private final Map> writerMap = new HashMap<>();
  private boolean closed = false;

  public UnkeyedUpsertSparkWriter(ArcticTable table,
                                  FileAppenderFactory appenderFactory,
                                  OutputFileFactory fileFactory,
                                  FileFormat format, Schema schema,
                                  ArcticSparkBaseTaskWriter writer) {
    this.table = table;
    this.appenderFactory = appenderFactory;
    this.fileFactory = fileFactory;
    this.format = format;
    this.schema = schema;
    this.writer = writer;
  }

  @Override
  public void write(T row) throws IOException {
    if (closed) {
      throw new IllegalStateException("Pos-delete writer for table " + table.id().toString() + " already closed");
    }

    SparkInternalRowCastWrapper internalRow = (SparkInternalRowCastWrapper) row;
    StructLike structLike = new SparkInternalRowWrapper(SparkSchemaUtil.convert(schema)).wrap(internalRow.getRow());
    PartitionKey partitionKey = new PartitionKey(table.spec(), schema);
    partitionKey.partition(structLike);
    if (writerMap.get(partitionKey) == null) {
      SortedPosDeleteWriter writer = new SortedPosDeleteWriter<>(appenderFactory,
          fileFactory, table.io(),
          format, partitionKey);
      writerMap.putIfAbsent(partitionKey, writer);
    }
    if (internalRow.getChangeAction() == ChangeAction.DELETE) {
      SortedPosDeleteWriter deleteWriter = writerMap.get(partitionKey);
      int numFields = internalRow.getRow().numFields();
      Object file = internalRow.getRow().get(numFields - 2, StringType);
      Object pos = internalRow.getRow().get(numFields - 1, IntegerType);
      deleteWriter.delete(file.toString(), Long.parseLong(pos.toString()), null);
    } else {
      this.writer.write(internalRow.getRow());
    }
  }

  @Override
  public void abort() throws IOException {
  }

  @Override
  public WriteResult complete() throws IOException {
    for (Map.Entry> entry : writerMap.entrySet()) {
      completedDeleteFiles.addAll(entry.getValue().complete());
    }
    close();
    completedDataFiles.addAll(Arrays.asList(writer.complete().dataFiles()));
    return WriteResult.builder()
        .addDeleteFiles(completedDeleteFiles)
        .addDataFiles(completedDataFiles).build();
  }

  @Override
  public void close() throws IOException {
    this.closed = true;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy