org.projectnessie.versioned.persist.adapter.DatabaseAdapter Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of nessie-versioned-persist-adapter Show documentation
There is a newer version: 0.59.0
Show newest version
/*
 * Copyright (C) 2020 Dremio
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.projectnessie.versioned.persist.adapter;

import com.google.common.annotations.VisibleForTesting;
import com.google.errorprone.annotations.MustBeClosed;
import com.google.protobuf.ByteString;
import java.util.Collection;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import java.util.stream.Stream;
import javax.annotation.Nonnull;
import org.projectnessie.versioned.ContentAttachment;
import org.projectnessie.versioned.ContentAttachmentKey;
import org.projectnessie.versioned.Diff;
import org.projectnessie.versioned.GetNamedRefsParams;
import org.projectnessie.versioned.Hash;
import org.projectnessie.versioned.Key;
import org.projectnessie.versioned.MergeResult;
import org.projectnessie.versioned.NamedRef;
import org.projectnessie.versioned.RefLogNotFoundException;
import org.projectnessie.versioned.ReferenceAlreadyExistsException;
import org.projectnessie.versioned.ReferenceConflictException;
import org.projectnessie.versioned.ReferenceInfo;
import org.projectnessie.versioned.ReferenceNotFoundException;

/**
 * Database-Adapter interface that encapsulates all database related logic, an abstraction between a
 * {@link org.projectnessie.versioned.VersionStore} implementation and a variety of different
 * databases that share common core implementations for example for the commit/merge/transplant
 * operations.
 *
 * One or more adapter instances may use the same storage (database instance / schema). In this
 * case adapter instances usually differ by their {@link DatabaseAdapterConfig#getRepositoryId()
 * repository ID} configuration parameters.
 *
 * 
Database-adapters treat the actual "Nessie content" and "Nessie commit metadata" as an opaque
 * value ("BLOB") without interpreting the content. Database-adapter must persist serialized values
 * for commit-metadata and content as is and must return those in the exact same representation on
 * read.
 *
 * 
Actual implementation usually extend either {@code
 * org.projectnessie.versioned.persist.nontx.NonTxDatabaseAdapter} (NoSQL databases) or {@code
 * org.projectnessie.versioned.persist.tx.TxDatabaseAdapter} (JDBC/transactional). Both in turn
 * extend {@link org.projectnessie.versioned.persist.adapter.spi.AbstractDatabaseAdapter}.
 *
 * 
All returned {@link Stream}s must be closed.
 */
public interface DatabaseAdapter {

  DatabaseAdapterConfig getConfig();

  /** Ensures that mandatory data is present in the repository, does not change an existing repo. */
  void initializeRepo(String defaultBranchName);

  /**
   * Forces all repository data managed by this adapter instance to be deleted.
   *
   * 
This includes all data for the configured {@link DatabaseAdapterConfig#getRepositoryId()
   * repository ID}.
   *
   * 
After erasing a repository {@link #initializeRepo(String)} may be called to reinitialize the
   * minimal required data structures for the same repository ID.
   */
  void eraseRepo();

  /** Get the {@link Hash} for "beginning of time". */
  Hash noAncestorHash();

  /**
   * Verifies that the given {@code namedReference} exists and that {@code hashOnReference}, if
   * present, is reachable via that reference.
   *
   * @return verified {@code hashOnReference} or, if {@code hashOnReference} is not present, the
   *     current HEAD of {@code namedReference}
   * @throws ReferenceNotFoundException if {@code namedReference} does not exist or {@code
   *     hashOnReference}, if present, is not reachable from that reference
   */
  Hash hashOnReference(NamedRef namedReference, Optional hashOnReference)
      throws ReferenceNotFoundException;

  /**
   * Retrieve the reference-local and global state for the given keys for the specified commit.
   *
   * @param commit commit to retrieve the values for.
   * @param keys keys to retrieve the values (reference-local and global) for
   * @param keyFilter predicate to optionally skip specific keys in the result and return those as
   *     {@link Optional#empty() "not present"}, for example to implement a security policy.
   * @return Ordered stream
   * @throws ReferenceNotFoundException if {@code commit} does not exist.
   */
  Map values(Hash commit, Collection keys, KeyFilterPredicate keyFilter)
      throws ReferenceNotFoundException;

  /**
   * Retrieve the commit-log starting at the commit referenced by {@code offset}.
   *
   * @param offset hash to start at
   * @return stream of {@link CommitLogEntry}s
   * @throws ReferenceNotFoundException if {@code offset} does not exist.
   */
  @MustBeClosed
  Stream commitLog(Hash offset) throws ReferenceNotFoundException;

  /**
   * Loads commit log entries.
   *
   * @return the loaded {@link CommitLogEntry}s, non-existing entries will not be returned.
   */
  @MustBeClosed
  Stream fetchCommitLogEntries(Stream hashes);

  /**
   * Retrieve the content-keys that are "present" for the specified commit.
   *
   * @param commit commit to retrieve the values for.
   * @param keyFilter predicate to optionally skip specific keys in the result and return those as
   *     {@link Optional#empty() "not present"}, for example to implement a security policy.
   * @return Ordered stream with content-keys, content-ids and content-types
   * @throws ReferenceNotFoundException if {@code commit} does not exist.
   */
  @MustBeClosed
  Stream keys(Hash commit, KeyFilterPredicate keyFilter)
      throws ReferenceNotFoundException;

  /**
   * Commit operation, see {@link CommitParams} for a description of the parameters.
   *
   * @param commitParams parameters for the commit
   * @return optimistically written commit-log-entry
   * @throws ReferenceNotFoundException if either the named reference in {@link
   *     CommitParams#getToBranch()} or the commit on that reference, if specified, does not exist.
   * @throws ReferenceConflictException if any of the commits could not be committed onto the target
   *     branch due to a conflicting change or if the expected hash in {@link
   *     CommitParams#getToBranch()}is not its expected hEAD
   */
  Hash commit(CommitParams commitParams)
      throws ReferenceConflictException, ReferenceNotFoundException;

  /**
   * Cherry-pick the commits with the hashes {@code sequenceToTransplant} from named reference
   * {@code source} onto the reference {@code targetBranch}.
   *
   * @return the hash of the last cherry-picked commit, in other words the new HEAD of the target
   *     branch
   * @throws ReferenceNotFoundException if either the named reference in {@code commitOnReference}
   *     or the commit on that reference, if specified, does not exist.
   * @throws ReferenceConflictException if any of the commits could not be committed onto the target
   *     branch due to a conflicting change or if the expected hash of {@code toBranch} is not its
   *     expected hEAD
   */
  MergeResult transplant(TransplantParams transplantParams)
      throws ReferenceNotFoundException, ReferenceConflictException;

  /**
   * Merge all commits on {@code from} since the common ancestor of {@code from} and {@code to} and
   * commit those onto {@code to}.
   *
   * 
The implementation first identifies the common-ancestor (the most-recent commit that is both
   * reachable via {@code from} and {@code to}).
   *
   * @return the hash of the last cherry-picked commit, in other words the new HEAD of the target
   *     branch
   * @throws ReferenceNotFoundException if either the named reference in {@code toBranch} or the
   *     commit on that reference, if specified, does not exist.
   * @throws ReferenceConflictException if any of the commits could not be committed onto the target
   *     branch due to a conflicting change or if the expected hash of {@code toBranch} is not its
   *     expected hEAD
   */
  MergeResult merge(MergeParams mergeParams)
      throws ReferenceNotFoundException, ReferenceConflictException;

  /**
   * Resolve the current HEAD of the given named-reference and optionally additional information.
   *
   * 
This is actually a convenience for {@link #hashOnReference(NamedRef, Optional)
   * hashOnReference(ref, Optional.empty()}.
   *
   * @param ref named reference to resolve
   * @param params options that control which information shall be returned in {@link
   *     ReferenceInfo}, see {@link GetNamedRefsParams} for details.
   * @return current HEAD of {@code ref}
   * @throws ReferenceNotFoundException if the named reference {@code ref} does not exist.
   */
  ReferenceInfo namedRef(String ref, GetNamedRefsParams params)
      throws ReferenceNotFoundException;

  /**
   * Get all named references including their current HEAD.
   *
   * @param params options that control which information shall be returned in each {@link
   *     ReferenceInfo}, see {@link ReferenceInfo} for details.
   * @return stream with all named references.
   */
  @MustBeClosed
  Stream> namedRefs(GetNamedRefsParams params)
      throws ReferenceNotFoundException;

  /**
   * Create a new named reference.
   *
   * @param ref Named reference to create - either a {@link org.projectnessie.versioned.BranchName}
   *     or {@link org.projectnessie.versioned.TagName}.
   * @param target The already existing named reference with an optional hash on that branch. This
   *     parameter can be {@code null} for the edge case when the default branch is re-created after
   *     it has been dropped.
   * @return the current HEAD of the created branch or tag
   * @throws ReferenceAlreadyExistsException if the reference {@code ref} already exists.
   * @throws ReferenceNotFoundException if {@code target} does not exist.
   */
  Hash create(NamedRef ref, Hash target)
      throws ReferenceAlreadyExistsException, ReferenceNotFoundException;

  /**
   * Delete the given reference.
   *
   * @param reference named-reference to delete. If a value for the hash is specified, it must be
   *     equal to the current HEAD.
   * @param expectedHead if present, {@code reference}'s current HEAD must be equal to this value
   * @throws ReferenceNotFoundException if the named reference in {@code reference} does not exist.
   * @throws ReferenceConflictException if the named reference's HEAD is not equal to the expected
   *     HEAD
   */
  void delete(NamedRef reference, Optional expectedHead)
      throws ReferenceNotFoundException, ReferenceConflictException;

  /**
   * Updates {@code assignee}'s HEAD to {@code assignTo}.
   *
   * @param assignee named reference to re-assign
   * @param expectedHead if present, {@code assignee}'s current HEAD must be equal to this value
   * @param assignTo commit to update {@code assignee}'s HEAD to
   * @throws ReferenceNotFoundException if either the named reference in {@code assignTo} or the
   *     commit on that reference, if specified, does not exist or if the named reference specified
   *     in {@code assignee} does not exist.
   * @throws ReferenceConflictException if the HEAD of the named reference {@code assignee} is not
   *     equal to the expected HEAD
   */
  void assign(NamedRef assignee, Optional expectedHead, Hash assignTo)
      throws ReferenceNotFoundException, ReferenceConflictException;

  /**
   * Compute the difference of the content for the two commits identified by {@code from} and {@code
   * to}.
   *
   * @param from {@link Diff#getFromValue() "From"} side of the diff
   * @param to {@link Diff#getToValue() "To" side} of the diff
   * @param keyFilter predicate to optionally skip specific keys in the diff result and not return
   *     those, for example to implement a security policy.
   * @return stream containing the difference of the content, excluding both equal values and values
   *     that were excluded via {@code keyFilter}
   * @throws ReferenceNotFoundException if {@code from} or {@code to} does not exist.
   */
  @MustBeClosed
  Stream diff(Hash from, Hash to, KeyFilterPredicate keyFilter)
      throws ReferenceNotFoundException;

  /** Fetches the current version and descriptive attributes of the repository. */
  RepoDescription fetchRepositoryDescription();

  /**
   * Updates the repository description. Takes a function that receives the current repository
   * description and returns the updated description.
   *
   * @param updater updater function, the input argument is never {@code null}, if {@code updater}
   *     return {@code null}, the update will be aborted
   * @throws ReferenceConflictException thrown if the repository description could not be updated
   *     due to other concurrent updates
   */
  void updateRepositoryDescription(Function updater)
      throws ReferenceConflictException;

  /**
   * Retrieves the global content for the given contents-id.
   *
   * @param contentId contents-id to retrieve the global content for
   * @return global content, if present or an empty optional, never {@code null}.
   */
  Optional globalContent(ContentId contentId);

  Map> repoMaintenance(RepoMaintenanceParams repoMaintenanceParams);

  /**
   * Retrieve the refLog starting at the refLog referenced by {@code offset}.
   *
   * @return stream of {@link RefLog}s
   * @param offset initial reflog id to read from
   */
  @MustBeClosed
  Stream refLog(Hash offset) throws RefLogNotFoundException;

  /**
   * Scan all commit log entries, no guarantees about order nor about the behavior when commits
   * happen while the returned {@link Stream} is consumed.
   */
  @MustBeClosed
  Stream scanAllCommitLogEntries();

  /**
   * Retrieve the known attachment keys for a content ID.
   *
   * 
Implementations may return keys that do not or no longer exist. Especially non-transactional
   * database adapter implementations allow this as a compromise. See {@link
   * #mapToAttachment(Stream)}, which only returns existing content attachments.
   */
  @MustBeClosed
  Stream getAttachmentKeys(String contentId);

  /**
   * Retrieve the content attachments identified via {@code keys}. Attachments that do not exist are
   * not returned.
   *
   * 
Whether the input stream is "terminated" or "fluently" mapped, can vary between
   * implementations.
   */
  @MustBeClosed
  Stream mapToAttachment(Stream keys);

  /**
   * Consistent put-attachment operation.
   *
   * 
Either a "put-if-absent", if {@code expectedVersion} is empty or a compare-and-swap based on
   * the value of {@code expectedVersion}.
   *
   * 
Note: this method uses conditional put operations, unlike {@link #putAttachments(Stream)}.
   * Users should not use this method and {@link #putAttachments(Stream)} for the same keys.
   *
   * @param attachment the attachment to write
   * @param expectedVersion indicator for put-if-absent or the expected value on an existing item
   */
  boolean consistentPutAttachment(ContentAttachment attachment, Optional expectedVersion);

  /**
   * Bulk-write the given content attachments. The values of attachments written with this method
   * should be immutable and deterministic for the respective attachment keys.
   *
   * 
The behavior of the implementation whether an already existing attachment will be
   * overwritten or not is undefined.
   *
   * 
The outcome of this method is undefined when an error occurred.
   *
   * 
Note: this method uses unconditional put operations, unlike {@link
   * #consistentPutAttachment(ContentAttachment, Optional)}. Should not use this method and {@link
   * #consistentPutAttachment(ContentAttachment, Optional)} for the same keys.
   */
  void putAttachments(Stream attachments);

  /** Unconditionally delete the content attachments identified via {@code keys}. */
  void deleteAttachments(Stream keys);

  @VisibleForTesting
  void assertCleanStateForTests();

  /**
   * Write multiple new commit-entries, the given commit entries are to be persisted as is. All
   * values of the given {@link CommitLogEntry} can be considered valid and consistent.
   *
   * 
Callers must call {@link #updateMultipleCommits(List)} for already existing {@link
   * CommitLogEntry}s and {@link #writeMultipleCommits(List)} for new {@link CommitLogEntry}s.
   * Implementations can rely on this assumption (think: SQL {@code INSERT} + {@code UPDATE}
   * compared to a "simple put" for NoSQL databases).
   *
   * 
Implementations however can enforce strict consistency checks/guarantees, like a best-effort
   * approach to prevent hash-collisions but without any other consistency checks/guarantees.
   */
  void writeMultipleCommits(List commitLogEntries)
      throws ReferenceConflictException;

  /**
   * Updates multiple commit-entries, the given commit entries are to be persisted as is. All values
   * of the given {@link CommitLogEntry} can be considered valid and consistent.
   *
   * 
Callers must call {@link #updateMultipleCommits(List)} for already existing {@link
   * CommitLogEntry}s and {@link #writeMultipleCommits(List)} for new {@link CommitLogEntry}s.
   * Implementations can rely on this assumption (think: SQL {@code INSERT} + {@code UPDATE}
   * compared to a "simple put" for NoSQL databases).
   *
   * Implementations however can enforce strict consistency checks/guarantees.
   */
  void updateMultipleCommits(List commitLogEntries)
      throws ReferenceNotFoundException;

  /**
   * Populates the aggregated key-list for the given {@code entry} and returns it.
   *
   * @param entry the {@link CommitLogEntry} to build the aggregated key list for
   * @param inMemoryCommits function to retrieve not-yet-written commit-log-entries
   * @return commit-log-entry with the aggregated key-list. The returned {@link CommitLogEntry} has
   *     not been persisted.
   */
  CommitLogEntry rebuildKeyList(
      CommitLogEntry entry, @Nonnull Function inMemoryCommits)
      throws ReferenceNotFoundException;
}