All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netease.arctic.shade.org.apache.iceberg.V1Metadata Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package com.netease.arctic.shade.org.apache.iceberg;

import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;
import com.netease.arctic.shade.org.apache.avro.generic.IndexedRecord;
import com.netease.arctic.shade.org.apache.iceberg.avro.AvroSchemaUtil;
import com.netease.arctic.shade.org.apache.iceberg.types.Types;

import static com.netease.arctic.shade.org.apache.iceberg.types.Types.NestedField.required;

class V1Metadata {
  private V1Metadata() {
  }

  static final Schema MANIFEST_LIST_SCHEMA = new Schema(
      ManifestFile.PATH, ManifestFile.LENGTH, ManifestFile.SPEC_ID, ManifestFile.SNAPSHOT_ID,
      ManifestFile.ADDED_FILES_COUNT, ManifestFile.EXISTING_FILES_COUNT, ManifestFile.DELETED_FILES_COUNT,
      ManifestFile.PARTITION_SUMMARIES,
      ManifestFile.ADDED_ROWS_COUNT, ManifestFile.EXISTING_ROWS_COUNT, ManifestFile.DELETED_ROWS_COUNT);

  /**
   * A wrapper class to write any ManifestFile implementation to Avro using the v1 schema.
   *
   * This is used to maintain compatibility with v1 by writing manifest list files with the old schema, instead of
   * writing a sequence number into metadata files in v1 tables.
   */
  static class IndexedManifestFile implements ManifestFile, IndexedRecord {
    private static final com.netease.arctic.shade.org.apache.avro.Schema AVRO_SCHEMA =
        AvroSchemaUtil.convert(MANIFEST_LIST_SCHEMA, "manifest_file");

    private ManifestFile wrapped = null;

    public ManifestFile wrap(ManifestFile file) {
      this.wrapped = file;
      return this;
    }

    @Override
    public com.netease.arctic.shade.org.apache.avro.Schema getSchema() {
      return AVRO_SCHEMA;
    }

    @Override
    public void put(int i, Object v) {
      throw new UnsupportedOperationException("Cannot modify IndexedManifestFile wrapper via put");
    }

    @Override
    public Object get(int pos) {
      switch (pos) {
        case 0:
          return path();
        case 1:
          return length();
        case 2:
          return partitionSpecId();
        case 3:
          return snapshotId();
        case 4:
          return addedFilesCount();
        case 5:
          return existingFilesCount();
        case 6:
          return deletedFilesCount();
        case 7:
          return partitions();
        case 8:
          return addedRowsCount();
        case 9:
          return existingRowsCount();
        case 10:
          return deletedRowsCount();
        default:
          throw new UnsupportedOperationException("Unknown field ordinal: " + pos);
      }
    }

    @Override
    public String path() {
      return wrapped.path();
    }

    @Override
    public long length() {
      return wrapped.length();
    }

    @Override
    public int partitionSpecId() {
      return wrapped.partitionSpecId();
    }

    @Override
    public ManifestContent content() {
      return wrapped.content();
    }

    @Override
    public long sequenceNumber() {
      return wrapped.sequenceNumber();
    }

    @Override
    public long minSequenceNumber() {
      return wrapped.minSequenceNumber();
    }

    @Override
    public Long snapshotId() {
      return wrapped.snapshotId();
    }

    @Override
    public boolean hasAddedFiles() {
      return wrapped.hasAddedFiles();
    }

    @Override
    public Integer addedFilesCount() {
      return wrapped.addedFilesCount();
    }

    @Override
    public Long addedRowsCount() {
      return wrapped.addedRowsCount();
    }

    @Override
    public boolean hasExistingFiles() {
      return wrapped.hasExistingFiles();
    }

    @Override
    public Integer existingFilesCount() {
      return wrapped.existingFilesCount();
    }

    @Override
    public Long existingRowsCount() {
      return wrapped.existingRowsCount();
    }

    @Override
    public boolean hasDeletedFiles() {
      return wrapped.hasDeletedFiles();
    }

    @Override
    public Integer deletedFilesCount() {
      return wrapped.deletedFilesCount();
    }

    @Override
    public Long deletedRowsCount() {
      return wrapped.deletedRowsCount();
    }

    @Override
    public List partitions() {
      return wrapped.partitions();
    }

    @Override
    public ManifestFile copy() {
      return wrapped.copy();
    }
  }

  static Schema entrySchema(Types.StructType partitionType) {
    return wrapFileSchema(dataFileSchema(partitionType));
  }

  static Schema wrapFileSchema(Types.StructType fileSchema) {
    // this is used to build projection schemas
    return new Schema(
        ManifestEntry.STATUS, ManifestEntry.SNAPSHOT_ID,
        required(ManifestEntry.DATA_FILE_ID, "data_file", fileSchema));
  }

  private static final Types.NestedField BLOCK_SIZE = required(105, "block_size_in_bytes", Types.LongType.get());

  static Types.StructType dataFileSchema(Types.StructType partitionType) {
    return Types.StructType.of(
        DataFile.FILE_PATH,
        DataFile.FILE_FORMAT,
        required(DataFile.PARTITION_ID, DataFile.PARTITION_NAME, partitionType),
        DataFile.RECORD_COUNT,
        DataFile.FILE_SIZE,
        BLOCK_SIZE,
        DataFile.COLUMN_SIZES,
        DataFile.VALUE_COUNTS,
        DataFile.NULL_VALUE_COUNTS,
        DataFile.NAN_VALUE_COUNTS,
        DataFile.LOWER_BOUNDS,
        DataFile.UPPER_BOUNDS,
        DataFile.KEY_METADATA,
        DataFile.SPLIT_OFFSETS,
        DataFile.SORT_ORDER_ID
    );
  }

  /**
   * Wrapper used to write a ManifestEntry to v1 metadata.
   */
  static class IndexedManifestEntry implements ManifestEntry, IndexedRecord {
    private final com.netease.arctic.shade.org.apache.avro.Schema avroSchema;
    private final IndexedDataFile fileWrapper;
    private ManifestEntry wrapped = null;

    IndexedManifestEntry(Types.StructType partitionType) {
      this.avroSchema = AvroSchemaUtil.convert(entrySchema(partitionType), "manifest_entry");
      this.fileWrapper = new IndexedDataFile(avroSchema.getField("data_file").schema());
    }

    public IndexedManifestEntry wrap(ManifestEntry entry) {
      this.wrapped = entry;
      return this;
    }

    @Override
    public com.netease.arctic.shade.org.apache.avro.Schema getSchema() {
      return avroSchema;
    }

    @Override
    public void put(int i, Object v) {
      throw new UnsupportedOperationException("Cannot modify IndexedManifestEntry wrapper via put");
    }

    @Override
    public Object get(int i) {
      switch (i) {
        case 0:
          return wrapped.status().id();
        case 1:
          return wrapped.snapshotId();
        case 2:
          DataFile file = wrapped.file();
          if (file != null) {
            return fileWrapper.wrap(file);
          }
          return null;
        default:
          throw new UnsupportedOperationException("Unknown field ordinal: " + i);
      }
    }

    @Override
    public Status status() {
      return wrapped.status();
    }

    @Override
    public Long snapshotId() {
      return wrapped.snapshotId();
    }

    @Override
    public void setSnapshotId(long snapshotId) {
      wrapped.setSnapshotId(snapshotId);
    }

    @Override
    public Long sequenceNumber() {
      return wrapped.sequenceNumber();
    }

    @Override
    public void setSequenceNumber(long sequenceNumber) {
      wrapped.setSequenceNumber(sequenceNumber);
    }

    @Override
    public DataFile file() {
      return wrapped.file();
    }

    @Override
    public ManifestEntry copy() {
      return wrapped.copy();
    }

    @Override
    public ManifestEntry copyWithoutStats() {
      return wrapped.copyWithoutStats();
    }
  }

  static class IndexedDataFile implements DataFile, IndexedRecord {
    private static final long DEFAULT_BLOCK_SIZE = 64 * 1024 * 1024;

    private final com.netease.arctic.shade.org.apache.avro.Schema avroSchema;
    private final IndexedStructLike partitionWrapper;
    private DataFile wrapped = null;

    IndexedDataFile(com.netease.arctic.shade.org.apache.avro.Schema avroSchema) {
      this.avroSchema = avroSchema;
      this.partitionWrapper = new IndexedStructLike(avroSchema.getField("partition").schema());
    }

    IndexedDataFile wrap(DataFile file) {
      this.wrapped = file;
      return this;
    }

    @Override
    public Object get(int pos) {
      switch (pos) {
        case 0:
          return wrapped.path().toString();
        case 1:
          return wrapped.format() != null ? wrapped.format().toString() : null;
        case 2:
          return partitionWrapper.wrap(wrapped.partition());
        case 3:
          return wrapped.recordCount();
        case 4:
          return wrapped.fileSizeInBytes();
        case 5:
          return DEFAULT_BLOCK_SIZE;
        case 6:
          return wrapped.columnSizes();
        case 7:
          return wrapped.valueCounts();
        case 8:
          return wrapped.nullValueCounts();
        case 9:
          return wrapped.nanValueCounts();
        case 10:
          return wrapped.lowerBounds();
        case 11:
          return wrapped.upperBounds();
        case 12:
          return wrapped.keyMetadata();
        case 13:
          return wrapped.splitOffsets();
        case 14:
          return wrapped.sortOrderId();
      }
      throw new IllegalArgumentException("Unknown field ordinal: " + pos);
    }

    @Override
    public void put(int i, Object v) {
      throw new UnsupportedOperationException("Cannot modify IndexedDataFile wrapper via put");
    }

    @Override
    public com.netease.arctic.shade.org.apache.avro.Schema getSchema() {
      return avroSchema;
    }

    @Override
    public Long pos() {
      return null;
    }

    @Override
    public int specId() {
      return wrapped.specId();
    }

    @Override
    public FileContent content() {
      return wrapped.content();
    }

    @Override
    public CharSequence path() {
      return wrapped.path();
    }

    @Override
    public FileFormat format() {
      return wrapped.format();
    }

    @Override
    public StructLike partition() {
      return wrapped.partition();
    }

    @Override
    public long recordCount() {
      return wrapped.recordCount();
    }

    @Override
    public long fileSizeInBytes() {
      return wrapped.fileSizeInBytes();
    }

    @Override
    public Map columnSizes() {
      return wrapped.columnSizes();
    }

    @Override
    public Map valueCounts() {
      return wrapped.valueCounts();
    }

    @Override
    public Map nullValueCounts() {
      return wrapped.nullValueCounts();
    }

    @Override
    public Map nanValueCounts() {
      return wrapped.nanValueCounts();
    }

    @Override
    public Map lowerBounds() {
      return wrapped.lowerBounds();
    }

    @Override
    public Map upperBounds() {
      return wrapped.upperBounds();
    }

    @Override
    public ByteBuffer keyMetadata() {
      return wrapped.keyMetadata();
    }

    @Override
    public List splitOffsets() {
      return wrapped.splitOffsets();
    }

    @Override
    public Integer sortOrderId() {
      return wrapped.sortOrderId();
    }

    @Override
    public DataFile copy() {
      return wrapped.copy();
    }

    @Override
    public DataFile copyWithoutStats() {
      return wrapped.copyWithoutStats();
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy