All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.common.util.collection.LazyFileIterable Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hudi.common.util.collection;

import org.apache.hudi.common.util.BufferedRandomAccessFile;
import org.apache.hudi.exception.HoodieException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.util.Iterator;
import java.util.Map;
import java.util.function.Consumer;
import java.util.stream.Collectors;

/**
 * Iterable to lazily fetch values spilled to disk. This class uses BufferedRandomAccessFile to randomly access the position of
 * the latest value for a key spilled to disk and returns the result.
 */
public class LazyFileIterable implements Iterable {
  private static final Logger LOG = LoggerFactory.getLogger(LazyFileIterable.class);

  // Used to access the value written at a specific position in the file
  private final String filePath;
  // Stores the key and corresponding value's latest metadata spilled to disk
  private final Map inMemoryMetadataOfSpilledData;
  // Was compressions enabled for the values when inserted into the file/ map
  private final boolean isCompressionEnabled;

  private transient Thread shutdownThread = null;

  public LazyFileIterable(String filePath, Map map) {
    this(filePath, map, false);
  }

  public LazyFileIterable(String filePath, Map map, boolean isCompressionEnabled) {
    this.filePath = filePath;
    this.inMemoryMetadataOfSpilledData = map;
    this.isCompressionEnabled = isCompressionEnabled;
  }

  @Override
  public ClosableIterator iterator() {
    try {
      return new LazyFileIterator<>(filePath, inMemoryMetadataOfSpilledData);
    } catch (IOException io) {
      throw new HoodieException("Unable to initialize iterator for file on disk", io);
    }
  }

  /**
   * Iterator implementation for the iterable defined above.
   */
  public class LazyFileIterator implements ClosableIterator {

    private final String filePath;
    private BufferedRandomAccessFile readOnlyFileHandle;
    private final Iterator> metadataIterator;

    public LazyFileIterator(String filePath, Map map) throws IOException {
      this.filePath = filePath;
      this.readOnlyFileHandle = new BufferedRandomAccessFile(filePath, "r", BitCaskDiskMap.BUFFER_SIZE);
      readOnlyFileHandle.seek(0);

      // sort the map in increasing order of offset of value so disk seek is only in one(forward) direction
      this.metadataIterator = map.entrySet().stream()
          .sorted((Map.Entry o1, Map.Entry o2) -> o1
              .getValue().getOffsetOfValue().compareTo(o2.getValue().getOffsetOfValue()))
          .collect(Collectors.toList()).iterator();
      this.addShutdownHook();
    }

    @Override
    public boolean hasNext() {
      boolean available = this.metadataIterator.hasNext();
      if (!available) {
        close();
      }
      return available;
    }

    @Override
    public R next() {
      if (!hasNext()) {
        throw new IllegalStateException("next() called on EOF'ed stream. File :" + filePath);
      }
      Map.Entry entry = this.metadataIterator.next();
      return BitCaskDiskMap.get(entry.getValue(), readOnlyFileHandle, isCompressionEnabled);
    }

    @Override
    public void remove() {
      this.metadataIterator.remove();
    }

    @Override
    public void forEachRemaining(Consumer action) {
      action.accept(next());
    }

    public void close() {
      closeHandle();
      Runtime.getRuntime().removeShutdownHook(shutdownThread);
    }

    private void closeHandle() {
      if (readOnlyFileHandle != null) {
        try {
          readOnlyFileHandle.close();
          readOnlyFileHandle = null;
        } catch (IOException e) {
          throw new RuntimeException(e);
        }
      }
    }

    private void addShutdownHook() {
      shutdownThread = new Thread(() -> {
        LOG.warn("Failed to properly close LazyFileIterable in application.");
        this.closeHandle();
      });
      Runtime.getRuntime().addShutdownHook(shutdownThread);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy