All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.BaseSnapshot Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.iceberg;

import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import org.apache.iceberg.exceptions.RuntimeIOException;
import org.apache.iceberg.io.CloseableIterable;
import org.apache.iceberg.io.FileIO;
import org.apache.iceberg.relocated.com.google.common.base.MoreObjects;
import org.apache.iceberg.relocated.com.google.common.base.Objects;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;

class BaseSnapshot implements Snapshot {
  private final long snapshotId;
  private final Long parentId;
  private final long sequenceNumber;
  private final long timestampMillis;
  private final String manifestListLocation;
  private final String operation;
  private final Map summary;
  private final Integer schemaId;
  private final String[] v1ManifestLocations;

  // lazily initialized
  private transient List allManifests = null;
  private transient List dataManifests = null;
  private transient List deleteManifests = null;
  private transient List addedDataFiles = null;
  private transient List removedDataFiles = null;
  private transient List addedDeleteFiles = null;
  private transient List removedDeleteFiles = null;

  BaseSnapshot(
      long sequenceNumber,
      long snapshotId,
      Long parentId,
      long timestampMillis,
      String operation,
      Map summary,
      Integer schemaId,
      String manifestList) {
    this.sequenceNumber = sequenceNumber;
    this.snapshotId = snapshotId;
    this.parentId = parentId;
    this.timestampMillis = timestampMillis;
    this.operation = operation;
    this.summary = summary;
    this.schemaId = schemaId;
    this.manifestListLocation = manifestList;
    this.v1ManifestLocations = null;
  }

  BaseSnapshot(
      long sequenceNumber,
      long snapshotId,
      Long parentId,
      long timestampMillis,
      String operation,
      Map summary,
      Integer schemaId,
      String[] v1ManifestLocations) {
    this.sequenceNumber = sequenceNumber;
    this.snapshotId = snapshotId;
    this.parentId = parentId;
    this.timestampMillis = timestampMillis;
    this.operation = operation;
    this.summary = summary;
    this.schemaId = schemaId;
    this.manifestListLocation = null;
    this.v1ManifestLocations = v1ManifestLocations;
  }

  @Override
  public long sequenceNumber() {
    return sequenceNumber;
  }

  @Override
  public long snapshotId() {
    return snapshotId;
  }

  @Override
  public Long parentId() {
    return parentId;
  }

  @Override
  public long timestampMillis() {
    return timestampMillis;
  }

  @Override
  public String operation() {
    return operation;
  }

  @Override
  public Map summary() {
    return summary;
  }

  @Override
  public Integer schemaId() {
    return schemaId;
  }

  private void cacheManifests(FileIO fileIO) {
    if (fileIO == null) {
      throw new IllegalArgumentException("Cannot cache changes: FileIO is null");
    }

    if (allManifests == null && v1ManifestLocations != null) {
      // if we have a collection of manifest locations, then we need to load them here
      allManifests =
          Lists.transform(
              Arrays.asList(v1ManifestLocations),
              location -> new GenericManifestFile(fileIO.newInputFile(location), 0));
    }

    if (allManifests == null) {
      // if manifests isn't set, then the snapshotFile is set and should be read to get the list
      this.allManifests = ManifestLists.read(fileIO.newInputFile(manifestListLocation));
    }

    if (dataManifests == null || deleteManifests == null) {
      this.dataManifests =
          ImmutableList.copyOf(
              Iterables.filter(
                  allManifests, manifest -> manifest.content() == ManifestContent.DATA));
      this.deleteManifests =
          ImmutableList.copyOf(
              Iterables.filter(
                  allManifests, manifest -> manifest.content() == ManifestContent.DELETES));
    }
  }

  @Override
  public List allManifests(FileIO fileIO) {
    if (allManifests == null) {
      cacheManifests(fileIO);
    }
    return allManifests;
  }

  @Override
  public List dataManifests(FileIO fileIO) {
    if (dataManifests == null) {
      cacheManifests(fileIO);
    }
    return dataManifests;
  }

  @Override
  public List deleteManifests(FileIO fileIO) {
    if (deleteManifests == null) {
      cacheManifests(fileIO);
    }
    return deleteManifests;
  }

  @Override
  public List addedDataFiles(FileIO fileIO) {
    if (addedDataFiles == null) {
      cacheDataFileChanges(fileIO);
    }
    return addedDataFiles;
  }

  @Override
  public List removedDataFiles(FileIO fileIO) {
    if (removedDataFiles == null) {
      cacheDataFileChanges(fileIO);
    }
    return removedDataFiles;
  }

  @Override
  public Iterable addedDeleteFiles(FileIO fileIO) {
    if (addedDeleteFiles == null) {
      cacheDeleteFileChanges(fileIO);
    }
    return addedDeleteFiles;
  }

  @Override
  public Iterable removedDeleteFiles(FileIO fileIO) {
    if (removedDeleteFiles == null) {
      cacheDeleteFileChanges(fileIO);
    }
    return removedDeleteFiles;
  }

  @Override
  public String manifestListLocation() {
    return manifestListLocation;
  }

  private void cacheDeleteFileChanges(FileIO fileIO) {
    Preconditions.checkArgument(fileIO != null, "Cannot cache delete file changes: FileIO is null");

    ImmutableList.Builder adds = ImmutableList.builder();
    ImmutableList.Builder deletes = ImmutableList.builder();

    Iterable changedManifests =
        Iterables.filter(
            deleteManifests(fileIO), manifest -> Objects.equal(manifest.snapshotId(), snapshotId));

    for (ManifestFile manifest : changedManifests) {
      try (ManifestReader reader =
          ManifestFiles.readDeleteManifest(manifest, fileIO, null)) {
        for (ManifestEntry entry : reader.entries()) {
          switch (entry.status()) {
            case ADDED:
              adds.add(entry.file().copy());
              break;
            case DELETED:
              deletes.add(entry.file().copyWithoutStats());
              break;
            default:
              // ignore existing
          }
        }
      } catch (IOException e) {
        throw new UncheckedIOException("Failed to close manifest reader", e);
      }
    }

    this.addedDeleteFiles = adds.build();
    this.removedDeleteFiles = deletes.build();
  }

  private void cacheDataFileChanges(FileIO fileIO) {
    Preconditions.checkArgument(fileIO != null, "Cannot cache data file changes: FileIO is null");

    ImmutableList.Builder adds = ImmutableList.builder();
    ImmutableList.Builder deletes = ImmutableList.builder();

    // read only manifests that were created by this snapshot
    Iterable changedManifests =
        Iterables.filter(
            dataManifests(fileIO), manifest -> Objects.equal(manifest.snapshotId(), snapshotId));
    try (CloseableIterable> entries =
        new ManifestGroup(fileIO, changedManifests).ignoreExisting().entries()) {
      for (ManifestEntry entry : entries) {
        switch (entry.status()) {
          case ADDED:
            adds.add(entry.file().copy());
            break;
          case DELETED:
            deletes.add(entry.file().copyWithoutStats());
            break;
          default:
            throw new IllegalStateException(
                "Unexpected entry status, not added or deleted: " + entry);
        }
      }
    } catch (IOException e) {
      throw new RuntimeIOException(e, "Failed to close entries while caching changes");
    }

    this.addedDataFiles = adds.build();
    this.removedDataFiles = deletes.build();
  }

  @Override
  public boolean equals(Object o) {
    if (this == o) {
      return true;
    }

    if (o instanceof BaseSnapshot) {
      BaseSnapshot other = (BaseSnapshot) o;
      return this.snapshotId == other.snapshotId()
          && Objects.equal(this.parentId, other.parentId())
          && this.sequenceNumber == other.sequenceNumber()
          && this.timestampMillis == other.timestampMillis()
          && Objects.equal(this.schemaId, other.schemaId());
    }

    return false;
  }

  @Override
  public int hashCode() {
    return Objects.hashCode(
        this.snapshotId, this.parentId, this.sequenceNumber, this.timestampMillis, this.schemaId);
  }

  @Override
  public String toString() {
    return MoreObjects.toStringHelper(this)
        .add("id", snapshotId)
        .add("timestamp_ms", timestampMillis)
        .add("operation", operation)
        .add("summary", summary)
        .add("manifest-list", manifestListLocation)
        .add("schema-id", schemaId)
        .toString();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy