org.apache.iceberg.spark.SparkSQLProperties Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of iceberg-spark-3.5_2.13 Show documentation
A table format for huge analytic datasets
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.iceberg.spark;

import java.time.Duration;

public class SparkSQLProperties {

  private SparkSQLProperties() {}

  // Controls whether vectorized reads are enabled
  public static final String VECTORIZATION_ENABLED = "spark.sql.iceberg.vectorization.enabled";

  // Controls whether to perform the nullability check during writes
  public static final String CHECK_NULLABILITY = "spark.sql.iceberg.check-nullability";
  public static final boolean CHECK_NULLABILITY_DEFAULT = true;

  // Controls whether to check the order of fields during writes
  public static final String CHECK_ORDERING = "spark.sql.iceberg.check-ordering";
  public static final boolean CHECK_ORDERING_DEFAULT = true;

  // Controls whether to preserve the existing grouping of data while planning splits
  public static final String PRESERVE_DATA_GROUPING =
      "spark.sql.iceberg.planning.preserve-data-grouping";
  public static final boolean PRESERVE_DATA_GROUPING_DEFAULT = false;

  // Controls whether to push down aggregate (MAX/MIN/COUNT) to Iceberg
  public static final String AGGREGATE_PUSH_DOWN_ENABLED =
      "spark.sql.iceberg.aggregate-push-down.enabled";
  public static final boolean AGGREGATE_PUSH_DOWN_ENABLED_DEFAULT = true;

  // Controls write distribution mode
  public static final String DISTRIBUTION_MODE = "spark.sql.iceberg.distribution-mode";

  // Controls the WAP ID used for write-audit-publish workflow.
  // When set, new snapshots will be staged with this ID in snapshot summary.
  public static final String WAP_ID = "spark.wap.id";

  // Controls the WAP branch used for write-audit-publish workflow.
  // When set, new snapshots will be committed to this branch.
  public static final String WAP_BRANCH = "spark.wap.branch";

  // Controls write compress options
  public static final String COMPRESSION_CODEC = "spark.sql.iceberg.compression-codec";
  public static final String COMPRESSION_LEVEL = "spark.sql.iceberg.compression-level";
  public static final String COMPRESSION_STRATEGY = "spark.sql.iceberg.compression-strategy";

  // Overrides the data planning mode
  public static final String DATA_PLANNING_MODE = "spark.sql.iceberg.data-planning-mode";

  // Overrides the delete planning mode
  public static final String DELETE_PLANNING_MODE = "spark.sql.iceberg.delete-planning-mode";

  // Overrides the advisory partition size
  public static final String ADVISORY_PARTITION_SIZE = "spark.sql.iceberg.advisory-partition-size";

  // Controls whether to report locality information to Spark while allocating input partitions
  public static final String LOCALITY = "spark.sql.iceberg.locality.enabled";

  public static final String EXECUTOR_CACHE_ENABLED = "spark.sql.iceberg.executor-cache.enabled";
  public static final boolean EXECUTOR_CACHE_ENABLED_DEFAULT = true;

  public static final String EXECUTOR_CACHE_TIMEOUT = "spark.sql.iceberg.executor-cache.timeout";
  public static final Duration EXECUTOR_CACHE_TIMEOUT_DEFAULT = Duration.ofMinutes(10);

  public static final String EXECUTOR_CACHE_MAX_ENTRY_SIZE =
      "spark.sql.iceberg.executor-cache.max-entry-size";
  public static final long EXECUTOR_CACHE_MAX_ENTRY_SIZE_DEFAULT = 64 * 1024 * 1024; // 64 MB

  public static final String EXECUTOR_CACHE_MAX_TOTAL_SIZE =
      "spark.sql.iceberg.executor-cache.max-total-size";
  public static final long EXECUTOR_CACHE_MAX_TOTAL_SIZE_DEFAULT = 128 * 1024 * 1024; // 128 MB

  // Controls whether to merge schema during write operation
  public static final String MERGE_SCHEMA = "spark.sql.iceberg.merge-schema";
  public static final boolean MERGE_SCHEMA_DEFAULT = false;

  public static final String EXECUTOR_CACHE_LOCALITY_ENABLED =
      "spark.sql.iceberg.executor-cache.locality.enabled";
  public static final boolean EXECUTOR_CACHE_LOCALITY_ENABLED_DEFAULT = false;

  // Controls whether to report available column statistics to Spark for query optimization.
  public static final String REPORT_COLUMN_STATS = "spark.sql.iceberg.report-column-stats";
  public static final boolean REPORT_COLUMN_STATS_DEFAULT = true;
}