org.apache.iceberg.DataFile Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of iceberg-api Show documentation
Show all versions of iceberg-api Show documentation
A table format for huge analytic datasets
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import java.util.List;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.BinaryType;
import org.apache.iceberg.types.Types.IntegerType;
import org.apache.iceberg.types.Types.ListType;
import org.apache.iceberg.types.Types.LongType;
import org.apache.iceberg.types.Types.MapType;
import org.apache.iceberg.types.Types.StringType;
import org.apache.iceberg.types.Types.StructType;
import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;
/**
* Interface for data files listed in a table manifest.
*/
public interface DataFile extends ContentFile {
// fields for adding delete data files
Types.NestedField CONTENT = optional(134, "content", IntegerType.get(),
"Contents of the file: 0=data, 1=position deletes, 2=equality deletes");
Types.NestedField FILE_PATH = required(100, "file_path", StringType.get(), "Location URI with FS scheme");
Types.NestedField FILE_FORMAT = required(101, "file_format", StringType.get(),
"File format name: avro, orc, or parquet");
Types.NestedField RECORD_COUNT = required(103, "record_count", LongType.get(), "Number of records in the file");
Types.NestedField FILE_SIZE = required(104, "file_size_in_bytes", LongType.get(), "Total file size in bytes");
Types.NestedField COLUMN_SIZES = optional(108, "column_sizes", MapType.ofRequired(117, 118,
IntegerType.get(), LongType.get()), "Map of column id to total size on disk");
Types.NestedField VALUE_COUNTS = optional(109, "value_counts", MapType.ofRequired(119, 120,
IntegerType.get(), LongType.get()), "Map of column id to total count, including null and NaN");
Types.NestedField NULL_VALUE_COUNTS = optional(110, "null_value_counts", MapType.ofRequired(121, 122,
IntegerType.get(), LongType.get()), "Map of column id to null value count");
Types.NestedField NAN_VALUE_COUNTS = optional(137, "nan_value_counts", MapType.ofRequired(138, 139,
IntegerType.get(), LongType.get()), "Map of column id to number of NaN values in the column");
Types.NestedField LOWER_BOUNDS = optional(125, "lower_bounds", MapType.ofRequired(126, 127,
IntegerType.get(), BinaryType.get()), "Map of column id to lower bound");
Types.NestedField UPPER_BOUNDS = optional(128, "upper_bounds", MapType.ofRequired(129, 130,
IntegerType.get(), BinaryType.get()), "Map of column id to upper bound");
Types.NestedField KEY_METADATA = optional(131, "key_metadata", BinaryType.get(), "Encryption key metadata blob");
Types.NestedField SPLIT_OFFSETS = optional(132, "split_offsets", ListType.ofRequired(133, LongType.get()),
"Splittable offsets");
Types.NestedField EQUALITY_IDS = optional(135, "equality_ids", ListType.ofRequired(136, IntegerType.get()),
"Equality comparison field IDs");
Types.NestedField SORT_ORDER_ID = optional(140, "sort_order_id", IntegerType.get(), "Sort order ID");
Types.NestedField SPEC_ID = optional(141, "spec_id", IntegerType.get(), "Partition spec ID");
int PARTITION_ID = 102;
String PARTITION_NAME = "partition";
String PARTITION_DOC = "Partition data tuple, schema based on the partition spec";
// NEXT ID TO ASSIGN: 142
static StructType getType(StructType partitionType) {
// IDs start at 100 to leave room for changes to ManifestEntry
return StructType.of(
CONTENT,
FILE_PATH,
FILE_FORMAT,
SPEC_ID,
required(PARTITION_ID, PARTITION_NAME, partitionType, PARTITION_DOC),
RECORD_COUNT,
FILE_SIZE,
COLUMN_SIZES,
VALUE_COUNTS,
NULL_VALUE_COUNTS,
NAN_VALUE_COUNTS,
LOWER_BOUNDS,
UPPER_BOUNDS,
KEY_METADATA,
SPLIT_OFFSETS,
EQUALITY_IDS,
SORT_ORDER_ID
);
}
/**
* @return the content stored in the file; one of DATA, POSITION_DELETES, or EQUALITY_DELETES
*/
@Override
default FileContent content() {
return FileContent.DATA;
}
@Override
default List equalityFieldIds() {
return null;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy