All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netease.arctic.io.reader.BaseIcebergPosDeleteReader Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.netease.arctic.io.reader;

import com.netease.arctic.io.ArcticFileIO;
import com.netease.arctic.shade.org.apache.iceberg.Accessor;
import com.netease.arctic.shade.org.apache.iceberg.DeleteFile;
import com.netease.arctic.shade.org.apache.iceberg.MetadataColumns;
import com.netease.arctic.shade.org.apache.iceberg.Schema;
import com.netease.arctic.shade.org.apache.iceberg.StructLike;
import com.netease.arctic.shade.org.apache.iceberg.data.Record;
import com.netease.arctic.shade.org.apache.iceberg.data.parquet.GenericParquetReaders;
import com.netease.arctic.shade.org.apache.iceberg.io.CloseableIterable;
import com.netease.arctic.shade.org.apache.iceberg.io.InputFile;
import com.netease.arctic.shade.org.apache.iceberg.parquet.Parquet;
import com.netease.arctic.shade.org.apache.iceberg.relocated.com.google.common.collect.Lists;

import java.util.List;

/**
 * Reader for positional delete files.
 */
public class BaseIcebergPosDeleteReader {

  private static final Schema POS_DELETE_SCHEMA = new Schema(
      MetadataColumns.DELETE_FILE_PATH,
      MetadataColumns.DELETE_FILE_POS);
  private static final Accessor FILENAME_ACCESSOR = POS_DELETE_SCHEMA
      .accessorForField(MetadataColumns.DELETE_FILE_PATH.fieldId());
  private static final Accessor POSITION_ACCESSOR = POS_DELETE_SCHEMA
      .accessorForField(MetadataColumns.DELETE_FILE_POS.fieldId());

  protected final ArcticFileIO fileIO;
  protected final List posDeleteFiles;

  public BaseIcebergPosDeleteReader(ArcticFileIO fileIO, List posDeleteFiles) {
    this.fileIO = fileIO;
    this.posDeleteFiles = posDeleteFiles;
  }

  public CloseableIterable readDeletes() {
    List> deletes = Lists.transform(posDeleteFiles, this::readDelete);
    return CloseableIterable.concat(deletes);
  }

  public String readPath(Record record) {
    return (String) FILENAME_ACCESSOR.get(record);
  }

  public Long readPos(Record record) {
    return (Long) POSITION_ACCESSOR.get(record);
  }

  private CloseableIterable readDelete(DeleteFile deleteFile) {
    InputFile input = fileIO.newInputFile(deleteFile.path().toString());
    switch (deleteFile.format()) {
      case PARQUET:
        Parquet.ReadBuilder builder = Parquet.read(input)
            .project(POS_DELETE_SCHEMA)
            .reuseContainers()
            .createReaderFunc(fileSchema -> GenericParquetReaders.buildReader(POS_DELETE_SCHEMA, fileSchema));

        return fileIO.doAs(builder::build);
      default:
        throw new UnsupportedOperationException(String.format(
            "Cannot read deletes, %s is not a supported format: %s", deleteFile.format().name(), deleteFile.path()));
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy