org.apache.iceberg.spark.source.SparkTable Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of iceberg-spark3 Show documentation
Show all versions of iceberg-spark3 Show documentation
A table format for huge analytic datasets
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg.spark.source;
import java.util.Map;
import java.util.Set;
import org.apache.iceberg.Table;
import org.apache.iceberg.TableProperties;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.expressions.Expression;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableMap;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableSet;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.iceberg.spark.Spark3Util;
import org.apache.iceberg.spark.SparkFilters;
import org.apache.iceberg.spark.SparkSchemaUtil;
import org.apache.spark.sql.SparkSession;
import org.apache.spark.sql.connector.catalog.SupportsDelete;
import org.apache.spark.sql.connector.catalog.SupportsRead;
import org.apache.spark.sql.connector.catalog.SupportsWrite;
import org.apache.spark.sql.connector.catalog.TableCapability;
import org.apache.spark.sql.connector.expressions.Transform;
import org.apache.spark.sql.connector.read.ScanBuilder;
import org.apache.spark.sql.connector.write.LogicalWriteInfo;
import org.apache.spark.sql.connector.write.WriteBuilder;
import org.apache.spark.sql.sources.Filter;
import org.apache.spark.sql.types.StructType;
import org.apache.spark.sql.util.CaseInsensitiveStringMap;
public class SparkTable implements org.apache.spark.sql.connector.catalog.Table,
SupportsRead, SupportsWrite, SupportsDelete {
private static final Set RESERVED_PROPERTIES = Sets.newHashSet("provider", "format", "current-snapshot-id");
private static final Set CAPABILITIES = ImmutableSet.of(
TableCapability.BATCH_READ,
TableCapability.BATCH_WRITE,
TableCapability.STREAMING_WRITE,
TableCapability.OVERWRITE_BY_FILTER,
TableCapability.OVERWRITE_DYNAMIC);
private final Table icebergTable;
private final StructType requestedSchema;
private StructType lazyTableSchema = null;
private SparkSession lazySpark = null;
public SparkTable(Table icebergTable) {
this(icebergTable, null);
}
public SparkTable(Table icebergTable, StructType requestedSchema) {
this.icebergTable = icebergTable;
this.requestedSchema = requestedSchema;
if (requestedSchema != null) {
// convert the requested schema to throw an exception if any requested fields are unknown
SparkSchemaUtil.convert(icebergTable.schema(), requestedSchema);
}
}
private SparkSession sparkSession() {
if (lazySpark == null) {
this.lazySpark = SparkSession.active();
}
return lazySpark;
}
public Table table() {
return icebergTable;
}
@Override
public String name() {
return icebergTable.toString();
}
@Override
public StructType schema() {
if (lazyTableSchema == null) {
if (requestedSchema != null) {
this.lazyTableSchema = SparkSchemaUtil.convert(SparkSchemaUtil.prune(icebergTable.schema(), requestedSchema));
} else {
this.lazyTableSchema = SparkSchemaUtil.convert(icebergTable.schema());
}
}
return lazyTableSchema;
}
@Override
public Transform[] partitioning() {
return Spark3Util.toTransforms(icebergTable.spec());
}
@Override
public Map properties() {
ImmutableMap.Builder propsBuilder = ImmutableMap.builder();
String fileFormat = icebergTable.properties()
.getOrDefault(TableProperties.DEFAULT_FILE_FORMAT, TableProperties.DEFAULT_FILE_FORMAT_DEFAULT);
propsBuilder.put("format", "iceberg/" + fileFormat);
propsBuilder.put("provider", "iceberg");
String currentSnapshotId = icebergTable.currentSnapshot() != null ?
String.valueOf(icebergTable.currentSnapshot().snapshotId()) : "none";
propsBuilder.put("current-snapshot-id", currentSnapshotId);
icebergTable.properties().entrySet().stream()
.filter(entry -> !RESERVED_PROPERTIES.contains(entry.getKey()))
.forEach(propsBuilder::put);
return propsBuilder.build();
}
@Override
public Set capabilities() {
return CAPABILITIES;
}
@Override
public ScanBuilder newScanBuilder(CaseInsensitiveStringMap options) {
SparkScanBuilder scanBuilder = new SparkScanBuilder(sparkSession(), icebergTable, options);
if (requestedSchema != null) {
scanBuilder.pruneColumns(requestedSchema);
}
return scanBuilder;
}
@Override
public WriteBuilder newWriteBuilder(LogicalWriteInfo info) {
return new SparkWriteBuilder(sparkSession(), icebergTable, info);
}
@Override
public void deleteWhere(Filter[] filters) {
Expression deleteExpr = SparkFilters.convert(filters);
try {
icebergTable.newDelete()
.set("spark.app.id", sparkSession().sparkContext().applicationId())
.deleteFromRowFilter(deleteExpr)
.commit();
} catch (ValidationException e) {
throw new IllegalArgumentException("Failed to cleanly delete data files matching: " + deleteExpr, e);
}
}
@Override
public String toString() {
return icebergTable.toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy