All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.DataFile Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.iceberg;

import java.util.List;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.types.Types.BinaryType;
import org.apache.iceberg.types.Types.IntegerType;
import org.apache.iceberg.types.Types.ListType;
import org.apache.iceberg.types.Types.LongType;
import org.apache.iceberg.types.Types.MapType;
import org.apache.iceberg.types.Types.StringType;
import org.apache.iceberg.types.Types.StructType;

import static org.apache.iceberg.types.Types.NestedField.optional;
import static org.apache.iceberg.types.Types.NestedField.required;

/**
 * Interface for data files listed in a table manifest.
 */
public interface DataFile extends ContentFile {
  // fields for adding delete data files
  Types.NestedField CONTENT = optional(134, "content", IntegerType.get(),
      "Contents of the file: 0=data, 1=position deletes, 2=equality deletes");
  Types.NestedField FILE_PATH = required(100, "file_path", StringType.get(), "Location URI with FS scheme");
  Types.NestedField FILE_FORMAT = required(101, "file_format", StringType.get(),
      "File format name: avro, orc, or parquet");
  Types.NestedField RECORD_COUNT = required(103, "record_count", LongType.get(), "Number of records in the file");
  Types.NestedField FILE_SIZE = required(104, "file_size_in_bytes", LongType.get(), "Total file size in bytes");
  Types.NestedField COLUMN_SIZES = optional(108, "column_sizes", MapType.ofRequired(117, 118,
      IntegerType.get(), LongType.get()), "Map of column id to total size on disk");
  Types.NestedField VALUE_COUNTS = optional(109, "value_counts", MapType.ofRequired(119, 120,
      IntegerType.get(), LongType.get()), "Map of column id to total count, including null and NaN");
  Types.NestedField NULL_VALUE_COUNTS = optional(110, "null_value_counts", MapType.ofRequired(121, 122,
      IntegerType.get(), LongType.get()), "Map of column id to null value count");
  Types.NestedField NAN_VALUE_COUNTS = optional(137, "nan_value_counts", MapType.ofRequired(138, 139,
      IntegerType.get(), LongType.get()), "Map of column id to number of NaN values in the column");
  Types.NestedField LOWER_BOUNDS = optional(125, "lower_bounds", MapType.ofRequired(126, 127,
      IntegerType.get(), BinaryType.get()), "Map of column id to lower bound");
  Types.NestedField UPPER_BOUNDS = optional(128, "upper_bounds", MapType.ofRequired(129, 130,
      IntegerType.get(), BinaryType.get()), "Map of column id to upper bound");
  Types.NestedField KEY_METADATA = optional(131, "key_metadata", BinaryType.get(), "Encryption key metadata blob");
  Types.NestedField SPLIT_OFFSETS = optional(132, "split_offsets", ListType.ofRequired(133, LongType.get()),
      "Splittable offsets");
  Types.NestedField EQUALITY_IDS = optional(135, "equality_ids", ListType.ofRequired(136, IntegerType.get()),
      "Equality comparison field IDs");
  Types.NestedField SORT_ORDER_ID = optional(140, "sort_order_id", IntegerType.get(), "Sort order ID");
  Types.NestedField SPEC_ID = optional(141, "spec_id", IntegerType.get(), "Partition spec ID");

  int PARTITION_ID = 102;
  String PARTITION_NAME = "partition";
  String PARTITION_DOC = "Partition data tuple, schema based on the partition spec";
  // NEXT ID TO ASSIGN: 142

  static StructType getType(StructType partitionType) {
    // IDs start at 100 to leave room for changes to ManifestEntry
    return StructType.of(
        CONTENT,
        FILE_PATH,
        FILE_FORMAT,
        SPEC_ID,
        required(PARTITION_ID, PARTITION_NAME, partitionType, PARTITION_DOC),
        RECORD_COUNT,
        FILE_SIZE,
        COLUMN_SIZES,
        VALUE_COUNTS,
        NULL_VALUE_COUNTS,
        NAN_VALUE_COUNTS,
        LOWER_BOUNDS,
        UPPER_BOUNDS,
        KEY_METADATA,
        SPLIT_OFFSETS,
        EQUALITY_IDS,
        SORT_ORDER_ID
    );
  }

  /**
   * @return the content stored in the file; one of DATA, POSITION_DELETES, or EQUALITY_DELETES
   */
  @Override
  default FileContent content() {
    return FileContent.DATA;
  }

  @Override
  default List equalityFieldIds() {
    return null;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy