All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.RollingManifestWriter Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.iceberg;

import java.io.Closeable;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.List;
import java.util.function.Supplier;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;

/** As opposed to {@link ManifestWriter}, a rolling writer could produce multiple manifest files. */
public class RollingManifestWriter> implements Closeable {
  private static final int ROWS_DIVISOR = 250;

  private final Supplier> manifestWriterSupplier;
  private final long targetFileSizeInBytes;
  private final List manifestFiles;

  private long currentFileRows = 0;
  private ManifestWriter currentWriter = null;

  private boolean closed = false;

  public RollingManifestWriter(
      Supplier> manifestWriterSupplier, long targetFileSizeInBytes) {
    this.manifestWriterSupplier = manifestWriterSupplier;
    this.targetFileSizeInBytes = targetFileSizeInBytes;
    this.manifestFiles = Lists.newArrayList();
  }

  /**
   * Add an added entry for a file.
   *
   * 

The entry's snapshot ID will be this manifest's snapshot ID. The data and file sequence * numbers will be assigned at commit. * * @param addedFile a data file */ public void add(F addedFile) { currentWriter().add(addedFile); currentFileRows++; } /** * Add an added entry for a file with a specific sequence number. * *

The entry's snapshot ID will be this manifest's snapshot ID. The entry's data sequence * number will be the provided data sequence number. The entry's file sequence number will be * assigned at commit. * * @param addedFile a data file * @param dataSequenceNumber a data sequence number for the file */ public void add(F addedFile, long dataSequenceNumber) { currentWriter().add(addedFile, dataSequenceNumber); currentFileRows++; } /** * Add an existing entry for a file. * *

The original data and file sequence numbers, snapshot ID, which were assigned at commit, * must be preserved when adding an existing entry. * * @param existingFile a file * @param fileSnapshotId snapshot ID when the data file was added to the table * @param dataSequenceNumber a data sequence number of the file (assigned when the file was added) * @param fileSequenceNumber a file sequence number (assigned when the file was added) */ public void existing( F existingFile, long fileSnapshotId, long dataSequenceNumber, Long fileSequenceNumber) { currentWriter().existing(existingFile, fileSnapshotId, dataSequenceNumber, fileSequenceNumber); currentFileRows++; } /** * Add a delete entry for a file. * *

The entry's snapshot ID will be this manifest's snapshot ID. However, the original data and * file sequence numbers of the file must be preserved when the file is marked as deleted. * * @param deletedFile a file * @param dataSequenceNumber a data sequence number of the file (assigned when the file was added) * @param fileSequenceNumber a file sequence number (assigned when the file was added) */ public void delete(F deletedFile, long dataSequenceNumber, Long fileSequenceNumber) { currentWriter().delete(deletedFile, dataSequenceNumber, fileSequenceNumber); currentFileRows++; } private ManifestWriter currentWriter() { if (currentWriter == null) { this.currentWriter = manifestWriterSupplier.get(); } else if (shouldRollToNewFile()) { closeCurrentWriter(); this.currentWriter = manifestWriterSupplier.get(); } return currentWriter; } private boolean shouldRollToNewFile() { return currentFileRows % ROWS_DIVISOR == 0 && currentWriter.length() >= targetFileSizeInBytes; } private void closeCurrentWriter() { if (currentWriter != null) { try { currentWriter.close(); ManifestFile currentFile = currentWriter.toManifestFile(); manifestFiles.add(currentFile); this.currentWriter = null; this.currentFileRows = 0; } catch (IOException e) { throw new UncheckedIOException("Failed to close current writer", e); } } } @Override public void close() throws IOException { if (!closed) { closeCurrentWriter(); this.closed = true; } } public List toManifestFiles() { Preconditions.checkState(closed, "Cannot get ManifestFile list from unclosed writer"); return manifestFiles; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy