All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.scalar.db.transaction.consensuscommit.Coordinator Maven / Gradle / Ivy

Go to download

A universal transaction manager that achieves database-agnostic transactions and distributed transactions that span multiple databases

The newest version!
package com.scalar.db.transaction.consensuscommit;

import static com.google.common.base.Preconditions.checkNotNull;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Joiner;
import com.google.common.base.Splitter;
import com.google.common.util.concurrent.Uninterruptibles;
import com.scalar.db.api.Consistency;
import com.scalar.db.api.DistributedStorage;
import com.scalar.db.api.Get;
import com.scalar.db.api.Put;
import com.scalar.db.api.PutIfNotExists;
import com.scalar.db.api.Result;
import com.scalar.db.api.TableMetadata;
import com.scalar.db.api.TransactionState;
import com.scalar.db.exception.storage.ExecutionException;
import com.scalar.db.exception.storage.NoMutationException;
import com.scalar.db.io.DataType;
import com.scalar.db.io.Key;
import com.scalar.db.io.Value;
import com.scalar.db.transaction.consensuscommit.CoordinatorGroupCommitter.CoordinatorGroupCommitKeyManipulator;
import com.scalar.db.util.groupcommit.KeyManipulator.Keys;
import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.concurrent.TimeUnit;
import javax.annotation.Nonnull;
import javax.annotation.concurrent.ThreadSafe;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@ThreadSafe
public class Coordinator {
  public static final String NAMESPACE = "coordinator";
  public static final String TABLE = "state";
  public static final TableMetadata TABLE_METADATA_WITH_GROUP_COMMIT_DISABLED =
      TableMetadata.newBuilder()
          .addColumn(Attribute.ID, DataType.TEXT)
          .addColumn(Attribute.STATE, DataType.INT)
          .addColumn(Attribute.CREATED_AT, DataType.BIGINT)
          .addPartitionKey(Attribute.ID)
          .build();
  public static final TableMetadata TABLE_METADATA_WITH_GROUP_COMMIT_ENABLED =
      TableMetadata.newBuilder()
          .addColumn(Attribute.ID, DataType.TEXT)
          .addColumn(Attribute.CHILD_IDS, DataType.TEXT)
          .addColumn(Attribute.STATE, DataType.INT)
          .addColumn(Attribute.CREATED_AT, DataType.BIGINT)
          .addPartitionKey(Attribute.ID)
          .build();

  private static final int MAX_RETRY_COUNT = 5;
  private static final long SLEEP_BASE_MILLIS = 50;
  private static final Logger logger = LoggerFactory.getLogger(Coordinator.class);
  private final DistributedStorage storage;
  private final String coordinatorNamespace;
  private final CoordinatorGroupCommitKeyManipulator keyManipulator;

  /**
   * @param storage a storage
   * @deprecated As of release 3.3.0. Will be removed in release 5.0.0
   */
  @SuppressFBWarnings("EI_EXPOSE_REP2")
  @Deprecated
  public Coordinator(DistributedStorage storage) {
    this.storage = storage;
    coordinatorNamespace = NAMESPACE;
    keyManipulator = new CoordinatorGroupCommitKeyManipulator();
  }

  @SuppressFBWarnings("EI_EXPOSE_REP2")
  public Coordinator(DistributedStorage storage, ConsensusCommitConfig config) {
    this.storage = storage;
    coordinatorNamespace = config.getCoordinatorNamespace().orElse(NAMESPACE);
    keyManipulator = new CoordinatorGroupCommitKeyManipulator();
  }

  public Optional getState(String id) throws CoordinatorException {
    if (keyManipulator.isFullKey(id)) {
      return getStateForGroupCommit(id);
    }

    Get get = createGetWith(id);
    return get(get);
  }

  @VisibleForTesting
  Optional getStateForGroupCommit(String fullId) throws CoordinatorException {
    // Reading a coordinator state is likely to occur during lazy recovery, as follows:
    // 1. Transaction T1 starts and creates PREPARED state records but hasn't committed or aborted
    //    yet.
    // 2. Transaction T2 starts and reads the PREPARED state records created by T1.
    // 3. T2 reads the coordinator table record for T1 to decide whether to roll back or roll
    //    forward.
    //
    // The likelihood of step 2 would increase if T1 is delayed.
    //
    // With the group commit feature enabled, delayed transactions are isolated from a normal group
    // that is looked up by a parent ID into a delayed group that is looked up by a full ID.
    // Therefore, looking up with the full transaction ID should be tried first to minimize read
    // operations as much as possible.

    // Scan with the full ID for a delayed group that contains only a single transaction.
    // The normal lookup logic can be used as is.
    Optional stateOfDelayedTxn = get(createGetWith(fullId));
    if (stateOfDelayedTxn.isPresent()) {
      return stateOfDelayedTxn;
    }

    // Scan with the parent ID for a normal group that contains multiple transactions.
    Keys idForGroupCommit = keyManipulator.keysFromFullKey(fullId);

    String parentId = idForGroupCommit.parentKey;
    String childId = idForGroupCommit.childKey;
    Get get = createGetWith(parentId);
    Optional state = get(get);
    return state.flatMap(
        s -> {
          if (s.getChildIds().contains(childId)) {
            return state;
          }
          return Optional.empty();
        });
  }

  public void putState(Coordinator.State state) throws CoordinatorException {
    Put put = createPutWith(state);
    put(put);
  }

  void putStateForGroupCommit(
      String parentId, List fullIds, TransactionState transactionState, long createdAt)
      throws CoordinatorException {

    if (keyManipulator.isFullKey(parentId)) {
      throw new AssertionError(
          "This method is only for normal group commits that use a parent ID as the key");
    }

    // Put the state that contains a parent ID as the key and multiple child transaction IDs.
    List childIds = new ArrayList<>(fullIds.size());
    for (String fullId : fullIds) {
      Keys keys = keyManipulator.keysFromFullKey(fullId);
      childIds.add(keys.childKey);
    }
    State state = new State(parentId, childIds, transactionState, createdAt);

    Put put = createPutWith(state);
    put(put);
  }

  public void putStateForLazyRecoveryRollback(String id) throws CoordinatorException {
    if (keyManipulator.isFullKey(id)) {
      putStateForLazyRecoveryRollbackForGroupCommit(id);
      return;
    }

    putState(new Coordinator.State(id, TransactionState.ABORTED));
  }

  private void putStateForLazyRecoveryRollbackForGroupCommit(String id)
      throws CoordinatorException {
    // Lazy recoveries don't know which the transaction that created the PREPARE record is using, a
    // parent ID or a full ID as `tx_id` partition key.
    //
    // Case a) If a transaction becomes "ready for commit" in time, it'll be committed in a group
    // with `tx_id: `.
    // Case b) If a transaction is delayed, it'll be committed in an isolated group with a full ID
    // as `tx_id: `.
    //
    // If lazy recoveries only insert a record with `tx_id: ` to abort the transaction,
    // it will not conflict with the group commit using `tx_id: ` in case #a.
    // Therefore, lazy recoveries first need to insert a record with `tx_id: ` and
    // empty `tx_child_ids` to the Coordinator table. We'll call this insertion
    // `lazy-recovery-abort-with-parent-id`. This record is intended to conflict with a potential
    // group commit considering case#1, even though it doesn't help in finding the coordinator state
    // since `tx_child_ids` is empty.
    //
    // Once the record insertion with `tx_id: ` succeeds, the lazy recovery will
    // insert another record with `tx_id: `. We'll call this insertion
    // `lazy-recovery-abort-with-full-id`. This record insertion is needed to conflict with a
    // potential delayed group commit that has `tx_id: ` in case #b, and indicates the
    // transaction is aborted.
    //
    // Let's walk through all the cases.
    //
    // A. The original commit with `tx_id: ` succeeds in case #a, and then lazy
    // recovery happens
    // - The original commit with `tx_id: ` succeeds
    // - `lazy-recovery-abort-with-parent-id` fails
    // - The transaction is treated as committed since the commit's `tx_child_ids` contains the
    // transaction child ID
    //
    // B. The original commit with `tx_id: ` is in-progress in case #a, and lazy
    // recovery happens first
    // - `lazy-recovery-abort-with-parent-id` succeeds
    // - The original commit with `tx_id: ` fails
    // - (If the lazy recovery crashes here, another lazy recovery will insert the below
    // `lazy-recovery-abort-with-full-id` later)
    // - `lazy-recovery-abort-with-full-id` succeeds
    // - The transaction is treated as aborted because of `lazy-recovery-abort-with-full-id`
    //
    // C. The original commit with `tx_id: ` is done in case #b, and then lazy recovery
    // happens
    // - The original commit with `tx_id: ` succeeds
    // - `lazy-recovery-abort-with-parent-id` succeeds
    // - `lazy-recovery-abort-with-full-id` fails
    // - The transaction is treated as committed since the commit `tx_id` is the transaction full
    // ID
    //
    // D. The original commit with `tx_id: ` is in-progress in case #b, and lazy
    // recovery happens first
    // - `lazy-recovery-abort-with-parent-id` succeeds
    // - (If the lazy recovery crashes here and the original commit happens, the situation will be
    // the same as C)
    // - `lazy-recovery-abort-with-full-id` succeeds
    // - The original commit with `tx_id: ` fails
    // - The transaction is treated as aborted because of `lazy-recovery-abort-with-full-id`
    Keys keys = keyManipulator.keysFromFullKey(id);
    try {
      // This record is to prevent a group commit that has the same parent ID considering case #a
      // regardless if the transaction is actually in a group commit (case #a) or a delayed commit
      // (case #b).
      putStateForGroupCommit(
          keys.parentKey,
          Collections.emptyList(),
          TransactionState.ABORTED,
          System.currentTimeMillis());
    } catch (CoordinatorConflictException e) {
      // The group commit finished already, although there may be ongoing delayed groups.

      // If the group commit contains the transaction, follow the state.
      // Otherwise, continue to insert a record with the full ID.
      Optional optState = getState(keys.parentKey);
      if (!optState.isPresent()) {
        throw new AssertionError();
      }
      State state = optState.get();
      if (state.getChildIds().contains(keys.childKey)) {
        if (state.getState() == TransactionState.ABORTED) {
          return;
        } else {
          // Conflicted.
          throw e;
        }
      }
    }
    // This record is to intend the transaction is aborted.
    putState(new Coordinator.State(id, TransactionState.ABORTED));
  }

  private Get createGetWith(String id) {
    return new Get(new Key(Attribute.toIdValue(id)))
        .withConsistency(Consistency.LINEARIZABLE)
        .forNamespace(coordinatorNamespace)
        .forTable(TABLE);
  }

  private Optional get(Get get) throws CoordinatorException {
    int counter = 0;
    while (true) {
      if (counter >= MAX_RETRY_COUNT) {
        throw new CoordinatorException("Can't get coordinator state");
      }
      try {
        Optional result = storage.get(get);
        if (result.isPresent()) {
          return Optional.of(new State(result.get()));
        } else {
          return Optional.empty();
        }
      } catch (ExecutionException e) {
        logger.warn("Can't get coordinator state", e);
      }
      exponentialBackoff(counter++);
    }
  }

  @VisibleForTesting
  Put createPutWith(Coordinator.State state) {
    Put put = new Put(new Key(Attribute.toIdValue(state.getId())));
    String childIds = state.getChildIdsAsString();
    if (!childIds.isEmpty()) {
      put.withValue(Attribute.toChildIdsValue(childIds));
    }
    return put.withValue(Attribute.toStateValue(state.getState()))
        .withValue(Attribute.toCreatedAtValue(state.getCreatedAt()))
        .withConsistency(Consistency.LINEARIZABLE)
        .withCondition(new PutIfNotExists())
        .forNamespace(coordinatorNamespace)
        .forTable(TABLE);
  }

  private void put(Put put) throws CoordinatorException {
    int counter = 0;
    while (true) {
      if (counter >= MAX_RETRY_COUNT) {
        throw new CoordinatorException("Couldn't put coordinator state");
      }
      try {
        storage.put(put);
        break;
      } catch (NoMutationException e) {
        throw new CoordinatorConflictException("Mutation seems applied already", e);
      } catch (ExecutionException e) {
        logger.warn("Putting state in coordinator failed", e);
      }
      exponentialBackoff(counter++);
    }
  }

  private void exponentialBackoff(int counter) {
    Uninterruptibles.sleepUninterruptibly(
        (long) Math.pow(2, counter) * SLEEP_BASE_MILLIS, TimeUnit.MILLISECONDS);
  }

  @ThreadSafe
  public static class State {
    private static final List EMPTY_CHILD_IDS = Collections.emptyList();
    private static final String CHILD_IDS_DELIMITER = ",";
    private final String id;
    private final TransactionState state;
    private final long createdAt;
    private final List childIds;

    public State(Result result) throws CoordinatorException {
      checkNotMissingRequired(result);
      id = result.getValue(Attribute.ID).get().getAsString().get();
      state = TransactionState.getInstance(result.getValue(Attribute.STATE).get().getAsInt());
      createdAt = result.getValue(Attribute.CREATED_AT).get().getAsLong();
      Optional> childIdsOpt = result.getValue(Attribute.CHILD_IDS);
      Optional childIdsStrOpt;
      if (childIdsOpt.isPresent()) {
        childIdsStrOpt = childIdsOpt.get().getAsString();
      } else {
        childIdsStrOpt = Optional.empty();
      }
      childIds =
          childIdsStrOpt
              .map(s -> Splitter.on(CHILD_IDS_DELIMITER).omitEmptyStrings().splitToList(s))
              .orElse(EMPTY_CHILD_IDS);
    }

    public State(String id, TransactionState state) {
      this(id, state, System.currentTimeMillis());
    }

    // For the SpotBugs warning CT_CONSTRUCTOR_THROW
    @Override
    protected final void finalize() {}

    @VisibleForTesting
    State(String id, List childIds, TransactionState state, long createdAt) {
      this.id = checkNotNull(id);
      for (String childId : childIds) {
        if (childId.contains(CHILD_IDS_DELIMITER)) {
          throw new IllegalArgumentException(
              String.format(
                  "This child transaction ID itself contains the delimiter. ChildTransactionID: %s, Delimiter: %s",
                  childId, CHILD_IDS_DELIMITER));
        }
      }
      this.childIds = childIds;
      this.state = checkNotNull(state);
      this.createdAt = createdAt;
    }

    @VisibleForTesting
    State(String id, TransactionState state, long createdAt) {
      this(id, EMPTY_CHILD_IDS, state, createdAt);
    }

    @Nonnull
    public String getId() {
      return id;
    }

    @Nonnull
    public TransactionState getState() {
      return state;
    }

    public long getCreatedAt() {
      return createdAt;
    }

    @VisibleForTesting
    List getChildIds() {
      return childIds;
    }

    @VisibleForTesting
    String getChildIdsAsString() {
      return Joiner.on(CHILD_IDS_DELIMITER).join(childIds);
    }

    @Override
    public boolean equals(Object o) {
      if (o == this) {
        return true;
      }
      if (!(o instanceof State)) {
        return false;
      }
      State other = (State) o;
      // NOTICE: createdAt is not taken into account
      return Objects.equals(id, other.id)
          && state == other.state
          && Objects.equals(childIds, other.childIds);
    }

    @Override
    public int hashCode() {
      // NOTICE: createdAt is not taken into account
      return Objects.hash(id, state, childIds);
    }

    private void checkNotMissingRequired(Result result) throws CoordinatorException {
      if (!result.getValue(Attribute.ID).isPresent()
          || !result.getValue(Attribute.ID).get().getAsString().isPresent()) {
        throw new CoordinatorException("id is missing in the coordinator state");
      }
      if (!result.getValue(Attribute.STATE).isPresent()
          || result.getValue(Attribute.STATE).get().getAsInt() == 0) {
        throw new CoordinatorException("state is missing in the coordinator state");
      }
      if (!result.getValue(Attribute.CREATED_AT).isPresent()
          || result.getValue(Attribute.CREATED_AT).get().getAsLong() == 0) {
        throw new CoordinatorException("created_at is missing in the coordinator state");
      }
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy