org.apache.iceberg.V1Metadata Maven / Gradle / Ivy
Show all versions of iceberg-core Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import static org.apache.iceberg.types.Types.NestedField.required;
import java.nio.ByteBuffer;
import java.util.List;
import java.util.Map;
import org.apache.avro.generic.IndexedRecord;
import org.apache.iceberg.avro.AvroSchemaUtil;
import org.apache.iceberg.types.Types;
class V1Metadata {
private V1Metadata() {}
static final Schema MANIFEST_LIST_SCHEMA =
new Schema(
ManifestFile.PATH,
ManifestFile.LENGTH,
ManifestFile.SPEC_ID,
ManifestFile.SNAPSHOT_ID,
ManifestFile.ADDED_FILES_COUNT,
ManifestFile.EXISTING_FILES_COUNT,
ManifestFile.DELETED_FILES_COUNT,
ManifestFile.PARTITION_SUMMARIES,
ManifestFile.ADDED_ROWS_COUNT,
ManifestFile.EXISTING_ROWS_COUNT,
ManifestFile.DELETED_ROWS_COUNT);
/**
* A wrapper class to write any ManifestFile implementation to Avro using the v1 schema.
*
* This is used to maintain compatibility with v1 by writing manifest list files with the old
* schema, instead of writing a sequence number into metadata files in v1 tables.
*/
static class IndexedManifestFile implements ManifestFile, IndexedRecord {
private static final org.apache.avro.Schema AVRO_SCHEMA =
AvroSchemaUtil.convert(MANIFEST_LIST_SCHEMA, "manifest_file");
private ManifestFile wrapped = null;
public ManifestFile wrap(ManifestFile file) {
this.wrapped = file;
return this;
}
@Override
public org.apache.avro.Schema getSchema() {
return AVRO_SCHEMA;
}
@Override
public void put(int i, Object v) {
throw new UnsupportedOperationException("Cannot modify IndexedManifestFile wrapper via put");
}
@Override
public Object get(int pos) {
switch (pos) {
case 0:
return path();
case 1:
return length();
case 2:
return partitionSpecId();
case 3:
return snapshotId();
case 4:
return addedFilesCount();
case 5:
return existingFilesCount();
case 6:
return deletedFilesCount();
case 7:
return partitions();
case 8:
return addedRowsCount();
case 9:
return existingRowsCount();
case 10:
return deletedRowsCount();
default:
throw new UnsupportedOperationException("Unknown field ordinal: " + pos);
}
}
@Override
public String path() {
return wrapped.path();
}
@Override
public long length() {
return wrapped.length();
}
@Override
public int partitionSpecId() {
return wrapped.partitionSpecId();
}
@Override
public ManifestContent content() {
return wrapped.content();
}
@Override
public long sequenceNumber() {
return wrapped.sequenceNumber();
}
@Override
public long minSequenceNumber() {
return wrapped.minSequenceNumber();
}
@Override
public Long snapshotId() {
return wrapped.snapshotId();
}
@Override
public boolean hasAddedFiles() {
return wrapped.hasAddedFiles();
}
@Override
public Integer addedFilesCount() {
return wrapped.addedFilesCount();
}
@Override
public Long addedRowsCount() {
return wrapped.addedRowsCount();
}
@Override
public boolean hasExistingFiles() {
return wrapped.hasExistingFiles();
}
@Override
public Integer existingFilesCount() {
return wrapped.existingFilesCount();
}
@Override
public Long existingRowsCount() {
return wrapped.existingRowsCount();
}
@Override
public boolean hasDeletedFiles() {
return wrapped.hasDeletedFiles();
}
@Override
public Integer deletedFilesCount() {
return wrapped.deletedFilesCount();
}
@Override
public Long deletedRowsCount() {
return wrapped.deletedRowsCount();
}
@Override
public List partitions() {
return wrapped.partitions();
}
@Override
public ManifestFile copy() {
return wrapped.copy();
}
}
static Schema entrySchema(Types.StructType partitionType) {
return wrapFileSchema(dataFileSchema(partitionType));
}
static Schema wrapFileSchema(Types.StructType fileSchema) {
// this is used to build projection schemas
return new Schema(
ManifestEntry.STATUS,
ManifestEntry.SNAPSHOT_ID,
required(ManifestEntry.DATA_FILE_ID, "data_file", fileSchema));
}
private static final Types.NestedField BLOCK_SIZE =
required(105, "block_size_in_bytes", Types.LongType.get());
static Types.StructType dataFileSchema(Types.StructType partitionType) {
return Types.StructType.of(
DataFile.FILE_PATH,
DataFile.FILE_FORMAT,
required(DataFile.PARTITION_ID, DataFile.PARTITION_NAME, partitionType),
DataFile.RECORD_COUNT,
DataFile.FILE_SIZE,
BLOCK_SIZE,
DataFile.COLUMN_SIZES,
DataFile.VALUE_COUNTS,
DataFile.NULL_VALUE_COUNTS,
DataFile.NAN_VALUE_COUNTS,
DataFile.LOWER_BOUNDS,
DataFile.UPPER_BOUNDS,
DataFile.KEY_METADATA,
DataFile.SPLIT_OFFSETS,
DataFile.SORT_ORDER_ID);
}
/** Wrapper used to write a ManifestEntry to v1 metadata. */
static class IndexedManifestEntry implements ManifestEntry, IndexedRecord {
private final org.apache.avro.Schema avroSchema;
private final IndexedDataFile fileWrapper;
private ManifestEntry wrapped = null;
IndexedManifestEntry(Types.StructType partitionType) {
this.avroSchema = AvroSchemaUtil.convert(entrySchema(partitionType), "manifest_entry");
this.fileWrapper = new IndexedDataFile(avroSchema.getField("data_file").schema());
}
public IndexedManifestEntry wrap(ManifestEntry entry) {
this.wrapped = entry;
return this;
}
@Override
public org.apache.avro.Schema getSchema() {
return avroSchema;
}
@Override
public void put(int i, Object v) {
throw new UnsupportedOperationException("Cannot modify IndexedManifestEntry wrapper via put");
}
@Override
public Object get(int i) {
switch (i) {
case 0:
return wrapped.status().id();
case 1:
return wrapped.snapshotId();
case 2:
DataFile file = wrapped.file();
if (file != null) {
return fileWrapper.wrap(file);
}
return null;
default:
throw new UnsupportedOperationException("Unknown field ordinal: " + i);
}
}
@Override
public Status status() {
return wrapped.status();
}
@Override
public Long snapshotId() {
return wrapped.snapshotId();
}
@Override
public void setSnapshotId(long snapshotId) {
wrapped.setSnapshotId(snapshotId);
}
@Override
public Long dataSequenceNumber() {
return wrapped.dataSequenceNumber();
}
@Override
public void setDataSequenceNumber(long dataSequenceNumber) {
wrapped.setDataSequenceNumber(dataSequenceNumber);
}
@Override
public Long fileSequenceNumber() {
return wrapped.fileSequenceNumber();
}
@Override
public void setFileSequenceNumber(long fileSequenceNumber) {
wrapped.setFileSequenceNumber(fileSequenceNumber);
}
@Override
public DataFile file() {
return wrapped.file();
}
@Override
public ManifestEntry copy() {
return wrapped.copy();
}
@Override
public ManifestEntry copyWithoutStats() {
return wrapped.copyWithoutStats();
}
}
static class IndexedDataFile implements DataFile, IndexedRecord {
private static final long DEFAULT_BLOCK_SIZE = 64 * 1024 * 1024;
private final org.apache.avro.Schema avroSchema;
private final IndexedStructLike partitionWrapper;
private DataFile wrapped = null;
IndexedDataFile(org.apache.avro.Schema avroSchema) {
this.avroSchema = avroSchema;
this.partitionWrapper = new IndexedStructLike(avroSchema.getField("partition").schema());
}
IndexedDataFile wrap(DataFile file) {
this.wrapped = file;
return this;
}
@Override
public Object get(int pos) {
switch (pos) {
case 0:
return wrapped.path().toString();
case 1:
return wrapped.format() != null ? wrapped.format().toString() : null;
case 2:
return partitionWrapper.wrap(wrapped.partition());
case 3:
return wrapped.recordCount();
case 4:
return wrapped.fileSizeInBytes();
case 5:
return DEFAULT_BLOCK_SIZE;
case 6:
return wrapped.columnSizes();
case 7:
return wrapped.valueCounts();
case 8:
return wrapped.nullValueCounts();
case 9:
return wrapped.nanValueCounts();
case 10:
return wrapped.lowerBounds();
case 11:
return wrapped.upperBounds();
case 12:
return wrapped.keyMetadata();
case 13:
return wrapped.splitOffsets();
case 14:
return wrapped.sortOrderId();
}
throw new IllegalArgumentException("Unknown field ordinal: " + pos);
}
@Override
public void put(int i, Object v) {
throw new UnsupportedOperationException("Cannot modify IndexedDataFile wrapper via put");
}
@Override
public org.apache.avro.Schema getSchema() {
return avroSchema;
}
@Override
public Long pos() {
return null;
}
@Override
public int specId() {
return wrapped.specId();
}
@Override
public FileContent content() {
return wrapped.content();
}
@Override
public CharSequence path() {
return wrapped.path();
}
@Override
public FileFormat format() {
return wrapped.format();
}
@Override
public StructLike partition() {
return wrapped.partition();
}
@Override
public long recordCount() {
return wrapped.recordCount();
}
@Override
public long fileSizeInBytes() {
return wrapped.fileSizeInBytes();
}
@Override
public Map columnSizes() {
return wrapped.columnSizes();
}
@Override
public Map valueCounts() {
return wrapped.valueCounts();
}
@Override
public Map nullValueCounts() {
return wrapped.nullValueCounts();
}
@Override
public Map nanValueCounts() {
return wrapped.nanValueCounts();
}
@Override
public Map lowerBounds() {
return wrapped.lowerBounds();
}
@Override
public Map upperBounds() {
return wrapped.upperBounds();
}
@Override
public ByteBuffer keyMetadata() {
return wrapped.keyMetadata();
}
@Override
public List splitOffsets() {
return wrapped.splitOffsets();
}
@Override
public Integer sortOrderId() {
return wrapped.sortOrderId();
}
@Override
public Long dataSequenceNumber() {
return wrapped.dataSequenceNumber();
}
@Override
public Long fileSequenceNumber() {
return wrapped.fileSequenceNumber();
}
@Override
public DataFile copy() {
return wrapped.copy();
}
@Override
public DataFile copyWithoutStats() {
return wrapped.copyWithoutStats();
}
}
}