All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.kuujo.copycat.state.internal.DefaultStateLog Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2014 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.kuujo.copycat.state.internal;

import net.kuujo.copycat.CopycatException;
import net.kuujo.copycat.protocol.Consistency;
import net.kuujo.copycat.resource.internal.AbstractResource;
import net.kuujo.copycat.resource.internal.ResourceManager;
import net.kuujo.copycat.state.StateLog;
import net.kuujo.copycat.state.StateLogConfig;
import net.kuujo.copycat.util.concurrent.Futures;
import net.kuujo.copycat.util.function.TriConsumer;
import net.kuujo.copycat.util.internal.Assert;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.collect.Sets;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.UUID;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Supplier;

/**
 * Default state log partition implementation.
 *
 * @author Jordan Halterman
 */
@SuppressWarnings("rawtypes")
public class DefaultStateLog extends AbstractResource> implements StateLog {
  private static final Logger LOGGER = LoggerFactory.getLogger(DefaultStateLog.class);
  private static final int SNAPSHOT_ENTRY = 0;
  private static final int COMMAND_ENTRY = 1;
  private static final int SNAPSHOT_CHUNK_SIZE = 1024 * 1024;
  private static final int SNAPSHOT_INFO = 0;
  private static final int SNAPSHOT_CHUNK = 1;
  private final Map operations = new ConcurrentHashMap<>(128);
  private final Set> watchers = Sets.newCopyOnWriteArraySet();
  private final Consistency defaultConsistency;
  private final SnapshottableLogManager log;
  private Supplier snapshotter;
  private Consumer installer;
  private SnapshotInfo snapshotInfo;
  private List snapshotChunks;

  public DefaultStateLog(ResourceManager context) {
    super(context);
    this.log = (SnapshottableLogManager) context.log();
    defaultConsistency = context.config()
      .getResourceConfig()
      .getDefaultConsistency();
    context.consumer(this::consume);
  }

  @Override
  public  StateLog registerCommand(String name, Function command) {
    Assert.state(isClosed(), "Cannot register command on open state log");
    operations.put(name.hashCode(), new OperationInfo<>(name, command, false));
    LOGGER.debug("{} - Registered state log command {}", context.name(), name);
    return this;
  }

  @Override
  public StateLog unregisterCommand(String name) {
    Assert.state(isClosed(), "Cannot unregister command on open state log");
    OperationInfo info = operations.remove(name.hashCode());
    if (info != null) {
      LOGGER.debug("{} - Unregistered state log command {}", context.name(), name);
    }
    return this;
  }

  @Override
  public StateLog registerWatcher(TriConsumer watcher) {
    Assert.state(isClosed(), "Cannot register watcher on open state log");
    watchers.add(watcher);
    return this;
  }

  @Override
  public StateLog unregisterWatcher(TriConsumer watcher) {
    watchers.remove(watcher);
    return this;
  }

  @Override
  public  StateLog registerQuery(String name, Function query) {
    Assert.state(isClosed(), "Cannot register command on open state log");
    Assert.isNotNull(name, "name");
    Assert.isNotNull(query, "query");
    operations.put(name.hashCode(), new OperationInfo<>(name, query, true, defaultConsistency));
    LOGGER.debug("{} - Registered state log query {} with default consistency", context.name(), name);
    return this;
  }

  @Override
  public  StateLog registerQuery(String name, Function query, Consistency consistency) {
    Assert.state(isClosed(), "Cannot register command on open state log");
    Assert.isNotNull(name, "name");
    Assert.isNotNull(query, "query");
    operations.put(name.hashCode(), new OperationInfo<>(name, query, true, consistency == null || consistency == Consistency.DEFAULT ? defaultConsistency : consistency));
    LOGGER.debug("{} - Registered state log query {} with consistency {}", context.name(), name, consistency);
    return this;
  }

  @Override
  public StateLog unregisterQuery(String name) {
    Assert.state(isClosed(), "Cannot unregister command on open state log");
    OperationInfo info = operations.remove(name.hashCode());
    if (info != null) {
      LOGGER.debug("{} - Unregistered state log query {}", context.name(), name);
    }
    return this;
  }

  @Override
  public StateLog unregister(String name) {
    Assert.state(isClosed(), "Cannot unregister command on open state log");
    OperationInfo info = operations.remove(name.hashCode());
    if (info != null) {
      LOGGER.debug("{} - Unregistered state log operation {}", context.name(), name);
    }
    return this;
  }

  @Override
  public  StateLog snapshotWith(Supplier snapshotter) {
    Assert.state(isClosed(), "Cannot modify state log once opened");
    this.snapshotter = snapshotter;
    LOGGER.debug("{} - Registered state log snapshot handler", context.name());
    return this;
  }

  @Override
  public  StateLog installWith(Consumer installer) {
    Assert.state(isClosed(), "Cannot modify state log once opened");
    this.installer = installer;
    LOGGER.debug("{} - Registered state log install handler", context.name());
    return this;
  }

  @Override
  @SuppressWarnings("unchecked")
  public  CompletableFuture submit(String command, T entry) {
    Assert.state(isOpen(), "State log not open");
    OperationInfo operationInfo = operations.get(command.hashCode());
    if (operationInfo == null) {
      return Futures.exceptionalFutureAsync(new CopycatException(String.format("Invalid state log command %s", command)), executor);
    }

    // If this is a read-only command, check if the command is consistent. For consistent operations,
    // queries are forwarded to the current cluster leader for evaluation. Otherwise, it's safe to
    // read stale data from the local node.
    ByteBuffer buffer = serializer.writeObject(entry);
    ByteBuffer commandEntry = ByteBuffer.allocate(8 + buffer.capacity());
    commandEntry.putInt(COMMAND_ENTRY); // Entry type
    commandEntry.putInt(command.hashCode());
    commandEntry.put(buffer);
    commandEntry.rewind();
    if (operationInfo.readOnly) {
      LOGGER.debug("{} - Submitting state log query {} with entry {}", context.name(), command, entry);
      return context.query(commandEntry, operationInfo.consistency).thenApplyAsync(serializer::readObject, executor);
    } else {
      LOGGER.debug("{} - Submitting state log command {} with entry {}", context.name(), command, entry);
      return context.commit(commandEntry).thenApplyAsync(serializer::readObject, executor);
    }
  }

  @Override
  public synchronized CompletableFuture> open() {
    return runStartupTasks()
      .thenComposeAsync(v -> context.open(), executor)
      .thenApply(v -> this);
  }

  @Override
  public synchronized CompletableFuture close() {
    return context.close()
      .thenComposeAsync(v -> runShutdownTasks(), executor);
  }

  /**
   * Consumes a log entry.
   *
   * @param index The entry index.
   * @param entry The log entry.
   * @return The entry output.
   */
  @SuppressWarnings({"unchecked"})
  private ByteBuffer consume(long term, Long index, ByteBuffer entry) {
    int entryType = entry.getInt();
    switch (entryType) {
      case SNAPSHOT_ENTRY: // Snapshot entry
        if (installSnapshot(entry.slice())) {
          LOGGER.info("Installed snapshot upto index {}", index);
          try {
            log.split(index);
          } catch (IOException e) {
            LOGGER.warn("Split failed", e);
          }
        }
        return ByteBuffer.allocate(0);
      case COMMAND_ENTRY: // Command entry
        int commandCode = entry.getInt();
        OperationInfo operationInfo = operations.get(commandCode);
        if (operationInfo != null) {
          Object input = serializer.readObject(entry.slice());
          Object output = operationInfo.execute(term, index, input);
          // Notify watchers if this is a state changing operation.
          if (!operationInfo.readOnly) {
              watchers.forEach(w -> w.accept(operationInfo.name, input, output));
          }
          return serializer.writeObject(output);
        }
        throw new IllegalStateException("Invalid state log operation");
      default:
        throw new IllegalArgumentException("Invalid entry type");
    }
  }

  /**
   * Checks whether to take a snapshot.
   */
  private void checkSnapshot(long term, long index) {
    // If the given index is the last index of a lot segment and the segment is not the last segment in the log
    // then the index is considered snapshottable.
    if (log.isSnapshottable(index)) {
      takeSnapshot(term, index);
    }
  }

  /**
   * Takes a snapshot and compacts the log.
   */
  private void takeSnapshot(long term, long index) {
    String id = UUID.randomUUID().toString();
    LOGGER.info("{} - Taking snapshot {} upto index {}", context.name(), id, index);

    Object snapshot = snapshotter != null ? snapshotter.get() : null;
    ByteBuffer snapshotBuffer = snapshot != null ? serializer.writeObject(snapshot) : ByteBuffer.allocate(0);

    // Create a unique snapshot ID and calculate the number of chunks for the snapshot.
    LOGGER.debug("{} - Calculating snapshot chunk size for snapshot {}", context.name(), id);
    byte[] snapshotId = id.getBytes();
    int numChunks = (int) Math.ceil(snapshotBuffer.limit() / (double) SNAPSHOT_CHUNK_SIZE);

    LOGGER.debug("{} - Creating {} chunks for snapshot {}", context.name(), numChunks, id);
    List chunks = new ArrayList<>(numChunks + 1);

    // The first entry in the snapshot is snapshot metadata.
    ByteBuffer info = ByteBuffer.allocate(28 + snapshotId.length);
    info.putLong(term);
    info.putInt(SNAPSHOT_ENTRY);
    info.putInt(SNAPSHOT_INFO);
    info.putInt(snapshotId.length);
    info.put(snapshotId);
    info.putInt(snapshotBuffer.limit());
    info.putInt(numChunks);
    chunks.add(info);

    // Now we append a list of snapshot chunks. This ensures that snapshots can be easily replicated in chunks.
    int i = 0;
    int position = 0;
    while (position < snapshotBuffer.limit()) {
      byte[] bytes = new byte[Math.min(snapshotBuffer.limit() - position, SNAPSHOT_CHUNK_SIZE)];
      snapshotBuffer.get(bytes);
      ByteBuffer chunk = ByteBuffer.allocate(24 + bytes.length);
      chunk.putLong(term);
      chunk.putInt(SNAPSHOT_ENTRY); // Indicates the entry is a snapshot entry.
      chunk.putInt(SNAPSHOT_CHUNK);
      chunk.putInt(i++); // The position of the chunk in the snapshot.
      chunk.putInt(bytes.length); // The snapshot chunk length.
      chunk.put(bytes); // The snapshot chunk bytes.
      chunk.flip();
      chunks.add(chunk);
      position += bytes.length;
    }

    try {
      LOGGER.debug("{} - Appending {} chunks for snapshot {} at index {}", context.name(), chunks.size(), id, index);
      log.appendSnapshot(index, chunks);
    } catch (IOException e) {
      throw new CopycatException("Failed to compact state log", e);
    }
  }

  /**
   * Installs a snapshot.
   *
   * This method operates on the assumption that snapshots will always be replicated with an initial metadata entry.
   * The metadata entry specifies a set of chunks that complete the entire snapshot.
   */
  @SuppressWarnings("unchecked")
  private boolean installSnapshot(ByteBuffer snapshotChunk) {
    boolean installedSnapshot = false;
    // Get the snapshot entry type.
    int type = snapshotChunk.getInt();
    if (type == SNAPSHOT_INFO) {
      // The snapshot info defines the snapshot ID and number of chunks. When a snapshot info entry is processed,
      // reset the current snapshot chunks and store the snapshot info. The next entries to be applied should be the
      // snapshot chunks themselves.
      int idLength = snapshotChunk.getInt();
      byte[] idBytes = new byte[idLength];
      snapshotChunk.get(idBytes);
      String id = new String(idBytes);
      int size = snapshotChunk.getInt();
      int numChunks = snapshotChunk.getInt();
      if (snapshotInfo == null || !snapshotInfo.id.equals(id)) {
        LOGGER.debug("{} - Processing snapshot metadata for snapshot {}", context.name(), id);
        snapshotInfo = new SnapshotInfo(id, size, numChunks);
        snapshotChunks = new ArrayList<>(numChunks);
      }
    } else if (type == SNAPSHOT_CHUNK && snapshotInfo != null) {
      // When a chunk is received, use the chunk's position in the snapshot to ensure consistency. Extract the chunk
      // bytes and only append the the chunk if it matches the expected position in the local chunks list.
      int index = snapshotChunk.getInt();
      int chunkLength = snapshotChunk.getInt();
      byte[] chunkBytes = new byte[chunkLength];
      snapshotChunk.get(chunkBytes);
      if (snapshotChunks.size() == index) {
        LOGGER.debug("{} - Processing snapshot chunk {} for snapshot {}", context.name(), index, snapshotInfo.id);
        snapshotChunks.add(ByteBuffer.wrap(chunkBytes));

        // Once the number of chunks has grown to the complete expected chunk count, combine and install the snapshot.
        if (snapshotChunks.size() == snapshotInfo.chunks) {
          LOGGER.debug("{} - Completed assembly of snapshot {} from log", context.name(), snapshotInfo.id);
          if (installer != null) {
            // Calculate the total aggregated size of the snapshot.
            int size = 0;
            for (ByteBuffer chunk : snapshotChunks) {
              size += chunk.limit();
            }

            // Make sure the combined snapshot size is equal to the expected snapshot size.
            Assert.state(size == snapshotInfo.size, "Received inconsistent snapshot");

            LOGGER.debug("{} - Assembled snapshot size: {} bytes", context.name(), size);

            if (size > 0) {
              // Create a complete view of the snapshot by appending all chunks to each other.
              ByteBuffer completeSnapshot = ByteBuffer.allocate(size);
              snapshotChunks.forEach(completeSnapshot::put);
              completeSnapshot.flip();

              // Once a view of the snapshot has been created, deserialize and install the snapshot.
              LOGGER.info("{} - Installing snapshot {}", context.name(), snapshotInfo.id);

              try {
                installer.accept(serializer.readObject(completeSnapshot));
                installedSnapshot = true;
              } catch (Exception e) {
                LOGGER.warn("{} - Failed to install snapshot: {}", context.name(), e.getMessage());
              }
            }
          }
          snapshotInfo = null;
          snapshotChunks = null;
        }
      }
    }
    return installedSnapshot;
  }

  /**
   * Current snapshot info.
   */
  private static class SnapshotInfo {
    private final String id;
    private final int size;
    private final int chunks;

    private SnapshotInfo(String id, int size, int chunks) {
      this.id = id;
      this.size = size;
      this.chunks = chunks;
    }
  }

  /**
   * Snapshot chunk.
   */
  private static class SnapshotChunk {
    private final int index;
    private final ByteBuffer chunk;

    private SnapshotChunk(int index, ByteBuffer chunk) {
      this.index = index;
      this.chunk = chunk;
    }
  }

  @Override
  public String toString() {
    return String.format("%s[name=%s]", getClass().getSimpleName(), context.name());
  }

  /**
   * State command info.
   */
  private class OperationInfo {
    private final String name;
    private final Function function;
    private final boolean readOnly;
    private final Consistency consistency;

    private OperationInfo(String name, Function function, boolean readOnly) {
      this(name, function, readOnly, Consistency.DEFAULT);
    }

    private OperationInfo(String name, Function function, boolean readOnly, Consistency consistency) {
      this.name = name;
      this.function = function;
      this.readOnly = readOnly;
      this.consistency = consistency;
    }

    private U execute(long term, Long index, TT entry) {
      if (index != null)
        checkSnapshot(term, index);
      return function.apply(entry);
    }
  }
}