All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.camunda.zeebe.journal.file.SegmentsManager Maven / Gradle / Ivy

There is a newer version: 8.6.0-rc2
Show newest version
/*
 * Copyright Camunda Services GmbH and/or licensed to Camunda Services GmbH under
 * one or more contributor license agreements. See the NOTICE file distributed
 * with this work for additional information regarding copyright ownership.
 * Licensed under the Camunda License 1.0. You may not use this file
 * except in compliance with the Camunda License 1.0.
 */
package io.camunda.zeebe.journal.file;

import static com.google.common.base.Preconditions.checkNotNull;

import io.camunda.zeebe.journal.CorruptedJournalException;
import io.camunda.zeebe.journal.JournalException;
import io.camunda.zeebe.journal.JournalMetaStore;
import io.camunda.zeebe.util.logging.ThrottledLogger;
import java.io.File;
import java.io.IOException;
import java.nio.file.DirectoryStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.time.Duration;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.SortedMap;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ConcurrentSkipListMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** Create new segments. Load existing segments from the disk. Keep track of all segments. */
final class SegmentsManager implements AutoCloseable {

  private static final long FIRST_SEGMENT_ID = 1;
  private static final long INITIAL_INDEX = 1;
  private static final long INITIAL_ASQN = SegmentedJournal.ASQN_IGNORE;

  private static final Logger LOG = LoggerFactory.getLogger(SegmentsManager.class);
  private static final Logger THROTTLED_LOG = new ThrottledLogger(LOG, Duration.ofSeconds(5));

  private final NavigableMap segments = new ConcurrentSkipListMap<>();
  private CompletableFuture nextSegment = null;

  private final JournalMetrics journalMetrics;
  private final JournalIndex journalIndex;
  private final int maxSegmentSize;
  private final File directory;
  private final SegmentLoader segmentLoader;
  private final String name;
  private final JournalMetaStore metaStore;

  private volatile Segment currentSegment;

  SegmentsManager(
      final JournalIndex journalIndex,
      final int maxSegmentSize,
      final File directory,
      final String name,
      final SegmentLoader segmentLoader,
      final JournalMetrics journalMetrics,
      final JournalMetaStore metaStore) {
    this.name = checkNotNull(name, "name cannot be null");
    this.journalIndex = journalIndex;
    this.maxSegmentSize = maxSegmentSize;
    this.directory = directory;
    this.segmentLoader = segmentLoader;
    this.journalMetrics = journalMetrics;
    this.metaStore = metaStore;
  }

  @Override
  public void close() {
    segments
        .values()
        .forEach(
            segment -> {
              LOG.debug("Closing segment: {}", segment);
              segment.close();
            });

    if (nextSegment != null) {
      try {
        nextSegment.join();
      } catch (final Exception e) {
        LOG.warn(
            "Next segment preparation failed during close, ignoring and proceeding to close", e);
      }
      nextSegment = null;
    }

    currentSegment = null;
  }

  Segment getCurrentSegment() {
    return currentSegment;
  }

  Segment getFirstSegment() {
    final Map.Entry segment = segments.firstEntry();
    return segment != null ? segment.getValue() : null;
  }

  Segment getLastSegment() {
    final Map.Entry segment = segments.lastEntry();
    return segment != null ? segment.getValue() : null;
  }

  /**
   * Creates and returns the next segment.
   *
   * @return The next segment.
   * @throws IllegalStateException if the segment manager is not open
   */
  Segment getNextSegment() {

    final Segment lastSegment = getLastSegment();
    final var lastWrittenAsqn = lastSegment != null ? lastSegment.lastAsqn() : INITIAL_ASQN;
    final var nextSegmentIndex = currentSegment.lastIndex() + 1;
    final SegmentDescriptor descriptor =
        SegmentDescriptor.builder()
            .withId(lastSegment != null ? lastSegment.descriptor().id() + 1 : 1)
            .withIndex(nextSegmentIndex)
            .withMaxSegmentSize(maxSegmentSize)
            .build();
    if (nextSegment != null) {
      try {
        currentSegment =
            nextSegment.join().initializeForUse(nextSegmentIndex, lastWrittenAsqn, journalMetrics);
      } catch (final CompletionException e) {
        LOG.error("Failed to acquire next segment, retrying synchronously now.", e);
        nextSegment = null;
        currentSegment = createSegment(descriptor, lastWrittenAsqn);
      }
    } else {
      currentSegment = createSegment(descriptor, lastWrittenAsqn);
    }
    prepareNextSegment();

    segments.put(descriptor.index(), currentSegment);
    journalMetrics.incSegmentCount();
    return currentSegment;
  }

  Segment getNextSegment(final long index) {
    final Map.Entry nextSegment = segments.higherEntry(index);
    return nextSegment != null ? nextSegment.getValue() : null;
  }

  Segment getSegment(final long index) {
    // Check if the current segment contains the given index first in order to prevent an
    // unnecessary map lookup.
    if (currentSegment != null && index > currentSegment.index()) {
      return currentSegment;
    }

    // If the index is in another segment, get the entry with the next lowest first index.
    final Map.Entry segment = segments.floorEntry(index);
    if (segment != null) {
      return segment.getValue();
    }
    return getFirstSegment();
  }

  private long getFirstIndex() {
    final var firstSegment = getFirstSegment();
    return firstSegment != null ? firstSegment.index() : 0;
  }

  boolean deleteUntil(final long index) {
    final Map.Entry segmentEntry = segments.floorEntry(index);
    if (segmentEntry == null) {
      return false;
    }

    final SortedMap compactSegments =
        segments.headMap(segmentEntry.getValue().index());
    if (compactSegments.isEmpty()) {
      THROTTLED_LOG.debug(
          "No segments can be deleted with index < {} (first log index: {})",
          index,
          getFirstIndex());
      return false;
    }

    LOG.debug(
        "{} - Deleting log up from {} up to {} (removing {} segments)",
        name,
        getFirstIndex(),
        compactSegments.get(compactSegments.lastKey()).index(),
        compactSegments.size());
    for (final Segment segment : compactSegments.values()) {
      LOG.trace("{} - Deleting segment: {}", name, segment);
      segment.delete();
      journalMetrics.decSegmentCount();
    }

    // removes them from the segment map
    compactSegments.clear();

    journalIndex.deleteUntil(index);

    return true;
  }

  /**
   * Resets and returns the first segment in the journal.
   *
   * @param index the starting index of the journal
   * @return the first segment
   */
  Segment resetSegments(final long index) {
    // reset the last flushed index before deleting data to avoid data corruption on start up in
    // case of node crash
    // setting the last flushed index to a semantic-null value will let us know on start up that
    // there is "nothing" written, even if we cannot read the descriptor (e.g. if we crash after
    // creating the segment but before writing its descriptor)
    metaStore.resetLastFlushedIndex();

    // delete the segments in reverse order, such that if the operation is interrupted (e.g. crash)
    // in the middle, there are no gaps in the log (or between the log and snapshot)
    final Iterator it = segments.descendingMap().values().iterator();
    while (it.hasNext()) {
      // we explicitly do not want to close the segment, as we may be only soft deleting it here to
      // allow readers to finish what they're doing and avoid a race condition with unmapping the
      // underlying buffer
      //noinspection resource
      final var segment = it.next();
      segment.delete();
      it.remove();
      journalMetrics.decSegmentCount();
    }

    final SegmentDescriptor descriptor =
        SegmentDescriptor.builder()
            .withId(1)
            .withIndex(index)
            .withMaxSegmentSize(maxSegmentSize)
            .build();
    currentSegment = createSegment(descriptor, INITIAL_ASQN);
    segments.put(index, currentSegment);
    journalMetrics.incSegmentCount();
    return currentSegment;
  }

  /**
   * Removes a segment.
   *
   * @param segment The segment to remove.
   */
  void removeSegment(final Segment segment) {
    //noinspection resource
    segments.remove(segment.index());
    journalMetrics.decSegmentCount();
    segment.delete();
    resetCurrentSegment();
  }

  /** Resets the current segment, creating a new segment if necessary. */
  private void resetCurrentSegment() {
    final Segment lastSegment = getLastSegment();
    if (lastSegment != null) {
      currentSegment = lastSegment;
    } else {
      final SegmentDescriptor descriptor =
          SegmentDescriptor.builder()
              .withId(FIRST_SEGMENT_ID)
              .withIndex(INITIAL_INDEX)
              .withMaxSegmentSize(maxSegmentSize)
              .build();

      currentSegment = createSegment(descriptor, INITIAL_ASQN);

      segments.put(1L, currentSegment);
      journalMetrics.incSegmentCount();
    }
  }

  /** Loads existing segments from the disk */
  void open() {
    final var openDurationTimer = journalMetrics.startJournalOpenDurationTimer();
    // Load existing log segments from disk.
    for (final Segment segment : loadSegments()) {
      segments.put(segment.descriptor().index(), segment);
      journalMetrics.incSegmentCount();
    }

    // If a segment doesn't already exist, create an initial segment starting at index 1.
    if (!segments.isEmpty()) {
      currentSegment = segments.lastEntry().getValue();
    } else {
      final SegmentDescriptor descriptor =
          SegmentDescriptor.builder()
              .withId(FIRST_SEGMENT_ID)
              .withIndex(INITIAL_INDEX)
              .withMaxSegmentSize(maxSegmentSize)
              .build();

      currentSegment = createSegment(descriptor, INITIAL_ASQN);

      segments.put(1L, currentSegment);
      journalMetrics.incSegmentCount();
    }
    // observe the journal open duration
    openDurationTimer.close();

    // Delete files that were previously marked for deletion but did not get deleted because the
    // node was stopped. It is safe to delete it now since there are no readers opened for these
    // segments.
    deleteDeferredFiles();
  }

  private void prepareNextSegment() {
    final var descriptor =
        SegmentDescriptor.builder()
            .withId(currentSegment.id() + 1)
            .withIndex(INITIAL_INDEX)
            .withMaxSegmentSize(maxSegmentSize)
            .build();
    nextSegment = CompletableFuture.supplyAsync(() -> createUninitializedSegment(descriptor));
  }

  Collection getTailSegments(final long index) {
    final var segment = getSegment(index);
    if (segment == null) {
      return Collections.emptySet();
    }

    return Collections.unmodifiableSortedMap(segments.tailMap(segment.index(), true)).values();
  }

  private UninitializedSegment createUninitializedSegment(final SegmentDescriptor descriptor) {
    final var segmentFile = SegmentFile.createSegmentFile(name, directory, descriptor.id());
    return segmentLoader.createUninitializedSegment(segmentFile.toPath(), descriptor, journalIndex);
  }

  private Segment createSegment(final SegmentDescriptor descriptor, final long lastWrittenAsqn) {
    final var segmentFile = SegmentFile.createSegmentFile(name, directory, descriptor.id());
    return segmentLoader.createSegment(
        segmentFile.toPath(), descriptor, lastWrittenAsqn, journalIndex);
  }

  /**
   * Loads all segments from disk.
   *
   * @return A collection of segments for the log.
   */
  private Collection loadSegments() {
    final var lastFlushedIndex = metaStore.loadLastFlushedIndex();

    // Ensure log directories are created.
    //noinspection ResultOfMethodCallIgnored
    directory.mkdirs();
    final List segments = new ArrayList<>();

    final List files = getSortedLogSegments();
    Segment previousSegment = null;
    for (int i = 0; i < files.size(); i++) {
      final File file = files.get(i);

      try {
        LOG.debug("Found segment file: {}", file.getName());
        final Segment segment =
            segmentLoader.loadExistingSegment(
                file.toPath(),
                previousSegment != null ? previousSegment.lastAsqn() : INITIAL_ASQN,
                journalIndex);

        if (i > 0) {
          // throws CorruptedJournalException if there is gap
          checkForIndexGaps(segments.get(i - 1), segment);
        }

        final boolean isLastSegment = i == files.size() - 1;
        if (isLastSegment && segment.lastIndex() < lastFlushedIndex) {
          throw new CorruptedJournalException(
              "Expected to find records until index %d, but last index is %d"
                  .formatted(lastFlushedIndex, segment.lastIndex()));
        }

        segments.add(segment);
        previousSegment = segment;
      } catch (final CorruptedJournalException e) {
        if (handleSegmentCorruption(files, segments, i, lastFlushedIndex)) {
          return segments;
        }

        throw e;
      }
    }

    return segments;
  }

  private void checkForIndexGaps(final Segment prevSegment, final Segment segment) {
    if (prevSegment.lastIndex() != segment.index() - 1) {
      throw new CorruptedJournalException(
          String.format(
              "Log segment %s is not aligned with previous segment %s (last index: %d).",
              segment, prevSegment, prevSegment.lastIndex()));
    }
  }

  /** Returns true if segments after corrupted segment were deleted; false, otherwise */
  private boolean handleSegmentCorruption(
      final List files,
      final List segments,
      final int failedIndex,
      final long lastFlushedIndex) {
    // if we've never flushed anything, then we can simply go head and delete the segment; otherwise
    // fail if we've already flushed the failing index
    if (metaStore.hasLastFlushedIndex()) {
      long lastSegmentIndex = 0;

      if (!segments.isEmpty()) {
        final Segment previousSegment = segments.get(segments.size() - 1);
        lastSegmentIndex = previousSegment.lastIndex();
      }

      if (lastFlushedIndex > lastSegmentIndex) {
        return false;
      }
    }

    deleteUnflushedSegments(files, failedIndex, lastFlushedIndex);
    return true;
  }

  private void deleteUnflushedSegments(
      final List files, final int failedIndex, final long lastFlushedIndex) {
    LOG.debug(
        "Found corrupted segment after last ack'ed index {}. Deleting segments {} - {}",
        lastFlushedIndex,
        files.get(failedIndex).getName(),
        files.get(files.size() - 1).getName());

    for (int i = failedIndex; i < files.size(); i++) {
      final File file = files.get(i);
      try {
        Files.delete(file.toPath());
      } catch (final IOException e) {
        throw new JournalException(
            String.format(
                "Failed to delete log segment '%s' when handling corruption.", file.getName()),
            e);
      }
    }
  }

  /** Returns an array of valid log segments sorted by their id which may be empty but not null. */
  private List getSortedLogSegments() {
    final File[] files =
        directory.listFiles(file -> file.isFile() && SegmentFile.isSegmentFile(name, file));

    if (files == null) {
      throw new IllegalStateException(
          String.format(
              "Could not list files in directory '%s'. Either the path doesn't point to a directory or an I/O error occurred.",
              directory));
    }

    Arrays.sort(files, Comparator.comparingInt(f -> SegmentFile.getSegmentIdFromPath(f.getName())));

    return Arrays.asList(files);
  }

  private void deleteDeferredFiles() {
    try (final DirectoryStream segmentsToDelete =
        Files.newDirectoryStream(
            directory.toPath(),
            path -> SegmentFile.isDeletedSegmentFile(name, path.getFileName().toString()))) {
      segmentsToDelete.forEach(this::deleteDeferredFile);
    } catch (final IOException e) {
      LOG.warn(
          "Could not delete segment files marked for deletion in {}. This can result in unnecessary disk usage.",
          directory.toPath(),
          e);
    }
  }

  private void deleteDeferredFile(final Path segmentFileToDelete) {
    try {
      Files.deleteIfExists(segmentFileToDelete);
    } catch (final IOException e) {
      LOG.warn(
          "Could not delete file {} which is marked for deletion. This can result in unnecessary disk usage.",
          segmentFileToDelete,
          e);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy