io.atomix.copycat.server.storage.Segment Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of copycat-server Show documentation
There is a newer version: 1.2.8
/*
 * Copyright 2015 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.atomix.copycat.server.storage;

import io.atomix.catalyst.buffer.Buffer;
import io.atomix.catalyst.buffer.FileBuffer;
import io.atomix.catalyst.buffer.MappedBuffer;
import io.atomix.catalyst.buffer.SlicedBuffer;
import io.atomix.catalyst.serializer.Serializer;
import io.atomix.catalyst.util.Assert;
import io.atomix.copycat.server.storage.entry.Entry;

import java.util.function.Predicate;

/**
 * Stores a set of sequential entries in a single file or memory {@link Buffer}.
 * 
 * Segments are individual file or memory based groups of sequential entries. Each segment has a fixed capacity
 * in terms of either number of entries or size in bytes.
 * 

 * The {@link SegmentDescriptor} describes the metadata for the segment, including the starting {@code index} of
 * the segment and various configuration options. The descriptor is persisted in 48 bytes at the head of the segment.
 * 

 * Internally, each segment maintains an in-memory index of entries. The index stores the offset and position of
 * each entry within the segment's internal {@link io.atomix.catalyst.buffer.Buffer}. For entries that are appended
 * to the log sequentially, the index has an O(1) lookup time. For instances where entries in a segment have been
 * skipped (due to log compaction), the lookup time is O(log n) due to binary search. However, due to the nature of
 * the Raft consensus algorithm, readers should typically benefit from O(1) lookups.
 * 

 * When a segment is constructed, the segment will attempt to rebuild its index from the underlying segment
 * {@link Buffer}. This is done by reading a 16-bit unsigned length and 32-bit offset for each entry. Once the
 * segment has been built, new entries will be {@link #append(Entry) appended} at the end of the segment.
 * 

 * Additionally, segments are responsible for keeping track of entries that have been {@link #clean(long) cleaned}.
 * Cleaned entries are tracked in an internal {@link io.atomix.catalyst.buffer.util.BitArray} with a size equal
 * to the segment's entry {@link #count()}.
 *
 * @author Jordan Halterman
 */
public class Segment implements AutoCloseable {
  private final SegmentDescriptor descriptor;
  private final Serializer serializer;
  private final Buffer buffer;
  private final OffsetIndex offsetIndex;
  private final OffsetCleaner cleaner;
  private final SegmentManager manager;
  private long skip = 0;
  private boolean open = true;

  /**
   * @throws NullPointerException if any argument is null
   */
  Segment(Buffer buffer, SegmentDescriptor descriptor, OffsetIndex offsetIndex, OffsetCleaner cleaner, Serializer serializer, SegmentManager manager) {
    this.serializer = Assert.notNull(serializer, "serializer");
    this.buffer = Assert.notNull(buffer, "buffer");
    this.descriptor = Assert.notNull(descriptor, "descriptor");
    this.offsetIndex = Assert.notNull(offsetIndex, "offsetIndex");
    this.cleaner = Assert.notNull(cleaner, "cleaner");
    this.manager = Assert.notNull(manager, "manager");

    // Rebuild the index from the segment data.
    long position = buffer.mark().position();
    int length = buffer.readUnsignedShort();
    while (length != 0) {
      long offset = buffer.readLong();
      offsetIndex.index(offset, position);
      position = buffer.skip(length).position();
      length = buffer.mark().readUnsignedShort();
    }
    buffer.reset();
  }

  /**
   * Returns the {@link SegmentDescriptor} for the segment.
   * 

   * The segment descriptor is stored in {@link SegmentDescriptor#BYTES} bytes at the head of the segment. The descriptor
   * defines essential information about the segment, including its position in the complete {@link Log} and its {@code version}.
   *
   * @return The segment descriptor stored at the head of the segment.
   */
  public SegmentDescriptor descriptor() {
    return descriptor;
  }

  /**
   * Returns a boolean value indicating whether the segment is open.
   *
   * @return Indicates whether the segment is open.
   */
  public boolean isOpen() {
    return open;
  }

  /**
   * Returns a boolean value indicating whether the segment is empty.
   * 

   * The segment is considered empty if no entries have been written to the segment and no indexes in the
   * segment have been {@link #skip(long) skipped}.
   *
   * @return Indicates whether the segment is empty.
   */
  public boolean isEmpty() {
    return offsetIndex.size() > 0 ? offsetIndex.lastOffset() + 1 + skip == 0 : skip == 0;
  }

  /**
   * Returns a boolean value indicating whether the segment has been compacted.
   * 

   * The segment is considered compacted if its {@link SegmentDescriptor#version()} is greater than {@code 1}.
   *
   * @return Indicates whether the segment has been compacted.
   */
  public boolean isCompacted() {
    return descriptor.version() > 1;
  }

  /**
   * Returns a boolean value indicating whether the segment is full.
   * 

   * The segment is considered full if one of the following conditions is met:
   * 

   *   {@link #size()} is greater than or equal to {@link SegmentDescriptor#maxSegmentSize()}
   *   {@link #count()} is greater than or equal to {@link SegmentDescriptor#maxEntries()}
   * 
   *
   * @return Indicates whether the segment is full.
   */
  public boolean isFull() {
    return size() >= descriptor.maxSegmentSize()
      || offsetIndex.size() >= descriptor.maxEntries();
  }

  /**
   * Returns the total size of the segment in bytes.
   *
   * @return The size of the segment in bytes.
   */
  public long size() {
    return buffer.offset() + buffer.position();
  }

  /**
   * Returns the current range of the segment.
   * 
   * The length includes entries that may have been {@link #skip(long) skipped} at the end of the segment.
   *
   * @return The current range of the segment.
   */
  public long length() {
    return !isEmpty() ? offsetIndex.lastOffset() + 1 + skip : 0;
  }

  /**
   * Returns the count of all entries in the segment.
   * 

   * The count includes only entries that are physically present in the segment. Entries that have been compacted
   * out of the segment are not counted towards the count, nor are {@link #skip(long) skipped} entries.
   *
   * @return The count of all entries in the segment.
   */
  public int count() {
    return offsetIndex.size();
  }

  /**
   * Returns the base index of the segment.
   * 

   * The base index is equivalent to the segment's {@link #firstIndex()} if the segment is not {@link #isEmpty() emtpy}.
   *
   * @return The base index of the segment.
   */
  long index() {
    return descriptor.index();
  }

  /**
   * Returns the index of the first entry in the segment.
   * 

   * If the segment is empty, {@code 0} will be returned regardless of the segment's base index.
   *
   * @return The index of the first entry in the segment or {@code 0} if the segment is empty.
   * @throws IllegalStateException if the segment is not open
   */
  public long firstIndex() {
    assertSegmentOpen();
    return !isEmpty() ? descriptor.index() : 0;
  }

  /**
   * Returns the index of the last entry in the segment.
   *
   * @return The index of the last entry in the segment or {@code 0} if the segment is empty.
   * @throws IllegalStateException if the segment is not open
   */
  public long lastIndex() {
    assertSegmentOpen();
    return !isEmpty() ? offsetIndex.lastOffset() + descriptor.index() + skip : descriptor.index() - 1;
  }

  /**
   * Returns the next index in the segment.
   *
   * @return The next index in the segment.
   */
  public long nextIndex() {
    return !isEmpty() ? lastIndex() + 1 : descriptor.index() + skip;
  }

  /**
   * Returns the offset of the given index within the segment.
   * 
   * The offset reflects the zero-based offset of the given {@code index} in the segment when missing/compacted
   * entries are taken into account. For instance, if a segment contains entries at indexes {@code {1, 3}}, the
   * {@code offset} of index {@code 1} will be {@code 0} and index {@code 3} will be {@code 1}.
   *
   * @param index The index to check.
   * @return The offset of the given index.
   */
  public long offset(long index) {
    return offsetIndex.find(relativeOffset(index));
  }

  /**
   * Returns the offset for the given index.
   */
  private long relativeOffset(long index) {
    return index - descriptor.index();
  }

  /**
   * Checks the range of the given index.
   *
   * @throws IndexOutOfBoundsException if the {@code index} is invalid for the segment
   */
  private void checkRange(long index) {
    Assert.indexNot(isEmpty(), "segment is empty");
    Assert.indexNot(index < firstIndex(), index + " is less than the first index in the segment");
    Assert.indexNot(index > lastIndex(), index + " is greater than the last index in the segment");
  }

  /**
   * Commits an entry to the segment.
   *
   * @throws NullPointerException if {@code entry} is null
   * @throws IllegalStateException if the segment is full
   * @throws IndexOutOfBoundsException if the {@code entry} index does not match the next index
   */
  public long append(Entry entry) {
    Assert.notNull(entry, "entry");
    Assert.stateNot(isFull(), "segment is full");
    long index = nextIndex();
    Assert.index(index == entry.getIndex(), "inconsistent index: %s", entry.getIndex());

    // Calculate the offset of the entry.
    long offset = relativeOffset(index);

    // Mark the starting position of the record and record the starting position of the new entry.
    long position = buffer.mark().position();

    // Serialize the object into the segment buffer.
    serializer.writeObject(entry, buffer.skip(Short.BYTES + Long.BYTES));

    // Calculate the length of the serialized bytes based on the resulting buffer position and the starting position.
    int length = (int) (buffer.position() - (position + Short.BYTES + Long.BYTES));

    // Set the entry size.
    entry.setSize(length);

    // Write the length of the entry for indexing.
    buffer.reset().writeUnsignedShort(length).writeLong(offset).skip(length);

    // Index the offset, position, and length.
    offsetIndex.index(offset, position);

    // Reset skip to zero since we wrote a new entry.
    skip = 0;

    return index;
  }

  /**
   * Reads the entry at the given index.
   *
   * @param index The index from which to read the entry.
   * @return The entry at the given index.
   * @throws IllegalStateException if the segment is not open or {@code index} is inconsistent with the entry
   */
  public synchronized  T get(long index) {
    assertSegmentOpen();
    checkRange(index);

    // Get the offset of the index within this segment.
    long offset = relativeOffset(index);

    // Get the start position of the entry from the memory index.
    long position = offsetIndex.position(offset);

    // If the index contained the entry, read the entry from the buffer.
    if (position != -1) {

      // Read the length of the entry.
      int length = buffer.readUnsignedShort(position);

      // Verify that the entry at the given offset matches.
      long entryOffset = buffer.readLong(position + Short.BYTES);
      Assert.state(entryOffset == offset, "inconsistent index: %s", index);

      // Read the entry buffer and deserialize the entry.
      try (Buffer value = buffer.slice(position + Short.BYTES + Long.BYTES, length)) {
        T entry = serializer.readObject(value);
        entry.setIndex(index).setSize(length);
        return entry;
      }
    }
    return null;
  }

  /**
   * Returns a boolean value indicating whether the given index is within the range of the segment.
   *
   * @param index The index to check.
   * @return Indicates whether the given index is within the range of the segment.
   * @throws IllegalStateException if the segment is not open
   */
  boolean validIndex(long index) {
    assertSegmentOpen();
    return !isEmpty() && index >= firstIndex() && index <= lastIndex();
  }

  /**
   * Returns a boolean value indicating whether the entry at the given index is active.
   *
   * @param index The index to check.
   * @return Indicates whether the entry at the given index is active.
   * @throws IllegalStateException if the segment is not open
   */
  public boolean contains(long index) {
    assertSegmentOpen();

    if (!validIndex(index))
      return false;

    // Check the memory index first for performance reasons.
    long offset = relativeOffset(index);
    return offsetIndex.contains(offset);
  }

  /**
   * Cleans an entry from the segment.
   *
   * @param index The index of the entry to clean.
   * @return Indicates whether the entry was newly cleaned from the segment.
   * @throws IllegalStateException if the segment is not open
   */
  public boolean clean(long index) {
    assertSegmentOpen();
    long offset = offsetIndex.find(relativeOffset(index));
    return offset != -1 && cleaner.clean(offset);
  }

  /**
   * Returns a boolean value indicating whether the given index was cleaned from the segment.
   *
   * @param index The index of the entry to check.
   * @return Indicates whether the given entry was cleaned from the segment.
   * @throws IllegalStateException if the segment is not open
   */
  public boolean isClean(long index) {
    assertSegmentOpen();
    return cleaner.isClean(offsetIndex.find(relativeOffset(index)));
  }

  /**
   * Returns the number of entries in the segment that have been cleaned.
   *
   * @return The number of entries in the segment that have been cleaned.
   * @throws IllegalStateException if the segment is not open
   */
  public long cleanCount() {
    assertSegmentOpen();
    return cleaner.count();
  }

  /**
   * Returns a predicate for cleaned offsets in the segment.
   *
   * @return A predicate for cleaned offsets in the segment.
   */
  public Predicate cleanPredicate() {
    return new OffsetCleaner(cleaner.bits().copy());
  }

  /**
   * Skips a number of entries in the segment.
   *
   * @param entries The number of entries to skip.
   * @return The segment.
   * @throws IllegalStateException if the segment is not open
   */
  public Segment skip(long entries) {
    assertSegmentOpen();
    this.skip += entries;
    return this;
  }

  /**
   * Truncates entries after the given index.
   *
   * @param index The index after which to remove entries.
   * @return The segment.
   * @throws IllegalStateException if the segment is not open
   */
  public Segment truncate(long index) {
    assertSegmentOpen();
    Assert.index(index >= manager.commitIndex(), "cannot truncate committed index");

    long offset = relativeOffset(index);
    long lastOffset = offsetIndex.lastOffset();

    long diff = Math.abs(lastOffset - offset);
    skip = Math.max(skip - diff, 0);

    if (offset < lastOffset) {
      long position = offsetIndex.truncate(offset);
      buffer.position(position)
        .zero(position)
        .flush();
    }
    return this;
  }

  /**
   * Flushes the segment buffers to disk.
   *
   * @return The segment.
   */
  public Segment flush() {
    buffer.flush();
    offsetIndex.flush();
    return this;
  }

  @Override
  public void close() {
    buffer.close();
    offsetIndex.close();
    descriptor.close();
    open = false;
  }

  /**
   * Deletes the segment.
   */
  public void delete() {
    Buffer buffer = this.buffer instanceof SlicedBuffer ? ((SlicedBuffer) this.buffer).root() : this.buffer;
    if (buffer instanceof FileBuffer) {
      ((FileBuffer) buffer).delete();
    } else if (buffer instanceof MappedBuffer) {
      ((MappedBuffer) buffer).delete();
    }

    offsetIndex.delete();
  }

  @Override
  public String toString() {
    return String.format("Segment[id=%d, version=%d, index=%d, length=%d]", descriptor.id(), descriptor.version(), firstIndex(), length());
  }

  private void assertSegmentOpen() {
    Assert.state(isOpen(), "segment not open");
  }
}