org.apache.iceberg.TableMetadata Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of iceberg-core Show documentation
Show all versions of iceberg-core Show documentation
A table format for huge analytic datasets
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.iceberg;
import com.google.common.base.MoreObjects;
import com.google.common.base.Objects;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.function.Predicate;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.io.InputFile;
import org.apache.iceberg.types.TypeUtil;
import org.apache.iceberg.util.PropertyUtil;
/**
* Metadata for a table.
*/
public class TableMetadata {
static final int TABLE_FORMAT_VERSION = 1;
static final int INITIAL_SPEC_ID = 0;
public static TableMetadata newTableMetadata(TableOperations ops,
Schema schema,
PartitionSpec spec,
String location) {
return newTableMetadata(ops, schema, spec, location, ImmutableMap.of());
}
public static TableMetadata newTableMetadata(TableOperations ops,
Schema schema,
PartitionSpec spec,
String location,
Map properties) {
// reassign all column ids to ensure consistency
AtomicInteger lastColumnId = new AtomicInteger(0);
Schema freshSchema = TypeUtil.assignFreshIds(schema, lastColumnId::incrementAndGet);
// rebuild the partition spec using the new column ids
PartitionSpec.Builder specBuilder = PartitionSpec.builderFor(freshSchema)
.withSpecId(INITIAL_SPEC_ID);
for (PartitionField field : spec.fields()) {
// look up the name of the source field in the old schema to get the new schema's id
String sourceName = schema.findColumnName(field.sourceId());
specBuilder.add(
freshSchema.findField(sourceName).fieldId(),
field.name(),
field.transform().toString());
}
PartitionSpec freshSpec = specBuilder.build();
return new TableMetadata(ops, null, UUID.randomUUID().toString(), location,
System.currentTimeMillis(),
lastColumnId.get(), freshSchema, INITIAL_SPEC_ID, ImmutableList.of(freshSpec),
ImmutableMap.copyOf(properties), -1, ImmutableList.of(), ImmutableList.of());
}
public static class SnapshotLogEntry implements HistoryEntry {
private final long timestampMillis;
private final long snapshotId;
SnapshotLogEntry(long timestampMillis, long snapshotId) {
this.timestampMillis = timestampMillis;
this.snapshotId = snapshotId;
}
@Override
public long timestampMillis() {
return timestampMillis;
}
@Override
public long snapshotId() {
return snapshotId;
}
@Override
public boolean equals(Object other) {
if (this == other) {
return true;
}
if (other == null || getClass() != other.getClass()) {
return false;
}
SnapshotLogEntry that = (SnapshotLogEntry) other;
return timestampMillis == that.timestampMillis && snapshotId == that.snapshotId;
}
@Override
public int hashCode() {
return Objects.hashCode(timestampMillis, snapshotId);
}
@Override
public String toString() {
return MoreObjects.toStringHelper(this)
.add("timestampMillis", timestampMillis)
.add("snapshotId", snapshotId)
.toString();
}
}
private final TableOperations ops;
private final InputFile file;
// stored metadata
private final String uuid;
private final String location;
private final long lastUpdatedMillis;
private final int lastColumnId;
private final Schema schema;
private final int defaultSpecId;
private final List specs;
private final Map properties;
private final long currentSnapshotId;
private final List snapshots;
private final Map snapshotsById;
private final Map specsById;
private final List snapshotLog;
TableMetadata(TableOperations ops,
InputFile file,
String uuid,
String location,
long lastUpdatedMillis,
int lastColumnId,
Schema schema,
int defaultSpecId,
List specs,
Map properties,
long currentSnapshotId,
List snapshots,
List snapshotLog) {
this.ops = ops;
this.file = file;
this.uuid = uuid;
this.location = location;
this.lastUpdatedMillis = lastUpdatedMillis;
this.lastColumnId = lastColumnId;
this.schema = schema;
this.specs = specs;
this.defaultSpecId = defaultSpecId;
this.properties = properties;
this.currentSnapshotId = currentSnapshotId;
this.snapshots = snapshots;
this.snapshotLog = snapshotLog;
this.snapshotsById = indexSnapshots(snapshots);
this.specsById = indexSpecs(specs);
HistoryEntry last = null;
for (HistoryEntry logEntry : snapshotLog) {
if (last != null) {
Preconditions.checkArgument(
(logEntry.timestampMillis() - last.timestampMillis()) >= 0,
"[BUG] Expected sorted snapshot log entries.");
}
last = logEntry;
}
Preconditions.checkArgument(
currentSnapshotId < 0 || snapshotsById.containsKey(currentSnapshotId),
"Invalid table metadata: Cannot find current version");
}
public InputFile file() {
return file;
}
public String uuid() {
return uuid;
}
public long lastUpdatedMillis() {
return lastUpdatedMillis;
}
public int lastColumnId() {
return lastColumnId;
}
public Schema schema() {
return schema;
}
public PartitionSpec spec() {
return specsById.get(defaultSpecId);
}
public PartitionSpec spec(int id) {
return specsById.get(id);
}
public List specs() {
return specs;
}
public Map specsById() {
return specsById;
}
public int defaultSpecId() {
return defaultSpecId;
}
public String location() {
return location;
}
public Map properties() {
return properties;
}
public String property(String property, String defaultValue) {
return properties.getOrDefault(property, defaultValue);
}
public boolean propertyAsBoolean(String property, boolean defaultValue) {
return PropertyUtil.propertyAsBoolean(properties, property, defaultValue);
}
public int propertyAsInt(String property, int defaultValue) {
return PropertyUtil.propertyAsInt(properties, property, defaultValue);
}
public long propertyAsLong(String property, long defaultValue) {
return PropertyUtil.propertyAsLong(properties, property, defaultValue);
}
public Snapshot snapshot(long snapshotId) {
return snapshotsById.get(snapshotId);
}
public Snapshot currentSnapshot() {
return snapshotsById.get(currentSnapshotId);
}
public List snapshots() {
return snapshots;
}
public List snapshotLog() {
return snapshotLog;
}
public TableMetadata withUUID() {
if (uuid != null) {
return this;
} else {
return new TableMetadata(ops, null, UUID.randomUUID().toString(), location,
lastUpdatedMillis, lastColumnId, schema, defaultSpecId, specs, properties,
currentSnapshotId, snapshots, snapshotLog);
}
}
public TableMetadata updateTableLocation(String newLocation) {
return new TableMetadata(ops, null, uuid, newLocation,
System.currentTimeMillis(), lastColumnId, schema, defaultSpecId, specs, properties,
currentSnapshotId, snapshots, snapshotLog);
}
public TableMetadata updateSchema(Schema newSchema, int newLastColumnId) {
PartitionSpec.checkCompatibility(spec(), newSchema);
// rebuild all of the partition specs for the new current schema
List updatedSpecs = Lists.transform(specs,
spec -> updateSpecSchema(newSchema, spec));
return new TableMetadata(ops, null, uuid, location,
System.currentTimeMillis(), newLastColumnId, newSchema, defaultSpecId, updatedSpecs, properties,
currentSnapshotId, snapshots, snapshotLog);
}
public TableMetadata updatePartitionSpec(PartitionSpec newPartitionSpec) {
PartitionSpec.checkCompatibility(newPartitionSpec, schema);
// if the spec already exists, use the same ID. otherwise, use 1 more than the highest ID.
int newDefaultSpecId = INITIAL_SPEC_ID;
for (PartitionSpec spec : specs) {
if (newPartitionSpec.compatibleWith(spec)) {
newDefaultSpecId = spec.specId();
break;
} else if (newDefaultSpecId <= spec.specId()) {
newDefaultSpecId = spec.specId() + 1;
}
}
Preconditions.checkArgument(defaultSpecId != newDefaultSpecId,
"Cannot set default partition spec to the current default");
ImmutableList.Builder builder = ImmutableList.builder()
.addAll(specs);
if (!specsById.containsKey(newDefaultSpecId)) {
// get a fresh spec to ensure the spec ID is set to the new default
builder.add(freshSpec(newDefaultSpecId, schema, newPartitionSpec));
}
return new TableMetadata(ops, null, uuid, location,
System.currentTimeMillis(), lastColumnId, schema, newDefaultSpecId,
builder.build(), properties,
currentSnapshotId, snapshots, snapshotLog);
}
public TableMetadata addStagedSnapshot(Snapshot snapshot) {
List newSnapshots = ImmutableList.builder()
.addAll(snapshots)
.add(snapshot)
.build();
return new TableMetadata(ops, null, uuid, location,
snapshot.timestampMillis(), lastColumnId, schema, defaultSpecId, specs, properties,
currentSnapshotId, newSnapshots, snapshotLog);
}
public TableMetadata replaceCurrentSnapshot(Snapshot snapshot) {
List newSnapshots = ImmutableList.builder()
.addAll(snapshots)
.add(snapshot)
.build();
List newSnapshotLog = ImmutableList.builder()
.addAll(snapshotLog)
.add(new SnapshotLogEntry(snapshot.timestampMillis(), snapshot.snapshotId()))
.build();
return new TableMetadata(ops, null, uuid, location,
snapshot.timestampMillis(), lastColumnId, schema, defaultSpecId, specs, properties,
snapshot.snapshotId(), newSnapshots, newSnapshotLog);
}
public TableMetadata removeSnapshotsIf(Predicate removeIf) {
List filtered = Lists.newArrayListWithExpectedSize(snapshots.size());
for (Snapshot snapshot : snapshots) {
// keep the current snapshot and any snapshots that do not match the removeIf condition
if (snapshot.snapshotId() == currentSnapshotId || !removeIf.test(snapshot)) {
filtered.add(snapshot);
}
}
// update the snapshot log
Set validIds = Sets.newHashSet(Iterables.transform(filtered, Snapshot::snapshotId));
List newSnapshotLog = Lists.newArrayList();
for (HistoryEntry logEntry : snapshotLog) {
if (validIds.contains(logEntry.snapshotId())) {
// copy the log entries that are still valid
newSnapshotLog.add(logEntry);
} else {
// any invalid entry causes the history before it to be removed. otherwise, there could be
// history gaps that cause time-travel queries to produce incorrect results. for example,
// if history is [(t1, s1), (t2, s2), (t3, s3)] and s2 is removed, the history cannot be
// [(t1, s1), (t3, s3)] because it appears that s3 was current during the time between t2
// and t3 when in fact s2 was the current snapshot.
newSnapshotLog.clear();
}
}
return new TableMetadata(ops, null, uuid, location,
System.currentTimeMillis(), lastColumnId, schema, defaultSpecId, specs, properties,
currentSnapshotId, filtered, ImmutableList.copyOf(newSnapshotLog));
}
public TableMetadata rollbackTo(Snapshot snapshot) {
ValidationException.check(snapshotsById.containsKey(snapshot.snapshotId()),
"Cannot set current snapshot to unknown: %s", snapshot.snapshotId());
long nowMillis = System.currentTimeMillis();
List newSnapshotLog = ImmutableList.builder()
.addAll(snapshotLog)
.add(new SnapshotLogEntry(nowMillis, snapshot.snapshotId()))
.build();
return new TableMetadata(ops, null, uuid, location,
nowMillis, lastColumnId, schema, defaultSpecId, specs, properties,
snapshot.snapshotId(), snapshots, newSnapshotLog);
}
public TableMetadata replaceProperties(Map newProperties) {
ValidationException.check(newProperties != null, "Cannot set properties to null");
return new TableMetadata(ops, null, uuid, location,
System.currentTimeMillis(), lastColumnId, schema, defaultSpecId, specs, newProperties,
currentSnapshotId, snapshots, snapshotLog);
}
public TableMetadata removeSnapshotLogEntries(Set snapshotIds) {
List newSnapshotLog = Lists.newArrayList();
for (HistoryEntry logEntry : snapshotLog) {
if (!snapshotIds.contains(logEntry.snapshotId())) {
// copy the log entries that are still valid
newSnapshotLog.add(logEntry);
}
}
ValidationException.check(currentSnapshotId < 0 || // not set
Iterables.getLast(newSnapshotLog).snapshotId() == currentSnapshotId,
"Cannot set invalid snapshot log: latest entry is not the current snapshot");
return new TableMetadata(ops, null, uuid, location,
System.currentTimeMillis(), lastColumnId, schema, defaultSpecId, specs, properties,
currentSnapshotId, snapshots, newSnapshotLog);
}
public TableMetadata buildReplacement(Schema updatedSchema, PartitionSpec updatedPartitionSpec,
Map updatedProperties) {
AtomicInteger nextLastColumnId = new AtomicInteger(0);
Schema freshSchema = TypeUtil.assignFreshIds(updatedSchema, nextLastColumnId::incrementAndGet);
int nextSpecId = TableMetadata.INITIAL_SPEC_ID;
for (Integer specId : specsById.keySet()) {
if (nextSpecId <= specId) {
nextSpecId = specId + 1;
}
}
// rebuild the partition spec using the new column ids
PartitionSpec freshSpec = freshSpec(nextSpecId, freshSchema, updatedPartitionSpec);
// if the spec already exists, use the same ID. otherwise, use 1 more than the highest ID.
int specId = nextSpecId;
for (PartitionSpec spec : specs) {
if (freshSpec.compatibleWith(spec)) {
specId = spec.specId();
break;
}
}
ImmutableList.Builder builder = ImmutableList.builder()
.addAll(specs);
if (!specsById.containsKey(specId)) {
builder.add(freshSpec);
}
Map newProperties = Maps.newHashMap();
newProperties.putAll(this.properties);
newProperties.putAll(updatedProperties);
return new TableMetadata(ops, null, uuid, location,
System.currentTimeMillis(), nextLastColumnId.get(), freshSchema,
specId, builder.build(), ImmutableMap.copyOf(newProperties),
-1, snapshots, ImmutableList.of());
}
public TableMetadata updateLocation(String newLocation) {
return new TableMetadata(ops, null, uuid, newLocation,
System.currentTimeMillis(), lastColumnId, schema, defaultSpecId, specs, properties,
currentSnapshotId, snapshots, snapshotLog);
}
private static PartitionSpec updateSpecSchema(Schema schema, PartitionSpec partitionSpec) {
PartitionSpec.Builder specBuilder = PartitionSpec.builderFor(schema)
.withSpecId(partitionSpec.specId());
// add all of the fields to the builder. IDs should not change.
for (PartitionField field : partitionSpec.fields()) {
specBuilder.add(field.sourceId(), field.name(), field.transform().toString());
}
return specBuilder.build();
}
private static PartitionSpec freshSpec(int specId, Schema schema, PartitionSpec partitionSpec) {
PartitionSpec.Builder specBuilder = PartitionSpec.builderFor(schema)
.withSpecId(specId);
for (PartitionField field : partitionSpec.fields()) {
// look up the name of the source field in the old schema to get the new schema's id
String sourceName = partitionSpec.schema().findColumnName(field.sourceId());
specBuilder.add(
schema.findField(sourceName).fieldId(),
field.name(),
field.transform().toString());
}
return specBuilder.build();
}
private static Map indexSnapshots(List snapshots) {
ImmutableMap.Builder builder = ImmutableMap.builder();
for (Snapshot version : snapshots) {
builder.put(version.snapshotId(), version);
}
return builder.build();
}
private static Map indexSpecs(List specs) {
ImmutableMap.Builder builder = ImmutableMap.builder();
for (PartitionSpec spec : specs) {
builder.put(spec.specId(), spec);
}
return builder.build();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy