org.projectnessie.versioned.persist.adapter.spi.AbstractDatabaseAdapter Maven / Gradle / Ivy
/*
* Copyright (C) 2020 Dremio
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.projectnessie.versioned.persist.adapter.spi;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.hashKey;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.hashNotFound;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.newHasher;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.randomHash;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.referenceNotFound;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.takeUntilExcludeLast;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.takeUntilIncludeLast;
import com.google.common.hash.Hasher;
import com.google.protobuf.ByteString;
import com.google.protobuf.UnsafeByteOperations;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.Spliterators.AbstractSpliterator;
import java.util.concurrent.TimeUnit;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.projectnessie.versioned.BranchName;
import org.projectnessie.versioned.Diff;
import org.projectnessie.versioned.GetNamedRefsParams;
import org.projectnessie.versioned.GetNamedRefsParams.RetrieveOptions;
import org.projectnessie.versioned.Hash;
import org.projectnessie.versioned.ImmutableReferenceInfo;
import org.projectnessie.versioned.Key;
import org.projectnessie.versioned.NamedRef;
import org.projectnessie.versioned.ReferenceConflictException;
import org.projectnessie.versioned.ReferenceInfo;
import org.projectnessie.versioned.ReferenceInfo.CommitsAheadBehind;
import org.projectnessie.versioned.ReferenceNotFoundException;
import org.projectnessie.versioned.TagName;
import org.projectnessie.versioned.persist.adapter.CommitAttempt;
import org.projectnessie.versioned.persist.adapter.CommitLogEntry;
import org.projectnessie.versioned.persist.adapter.ContentAndState;
import org.projectnessie.versioned.persist.adapter.ContentId;
import org.projectnessie.versioned.persist.adapter.DatabaseAdapter;
import org.projectnessie.versioned.persist.adapter.DatabaseAdapterConfig;
import org.projectnessie.versioned.persist.adapter.Difference;
import org.projectnessie.versioned.persist.adapter.ImmutableCommitLogEntry;
import org.projectnessie.versioned.persist.adapter.ImmutableKeyList;
import org.projectnessie.versioned.persist.adapter.KeyFilterPredicate;
import org.projectnessie.versioned.persist.adapter.KeyList;
import org.projectnessie.versioned.persist.adapter.KeyListEntity;
import org.projectnessie.versioned.persist.adapter.KeyWithBytes;
import org.projectnessie.versioned.persist.adapter.KeyWithType;
/**
* Contains all the database-independent logic for a Database-adapter.
*
* This class does not implement everything from {@link
* org.projectnessie.versioned.persist.adapter.DatabaseAdapter}.
*
*
Implementations must consider that production environments may use instances of this class in
* JAX-RS {@code RequestScope}, which means that it must be very cheap to create new instances of
* the implementations.
*
*
Managed resources like a connection-pool must be managed outside of {@link
* AbstractDatabaseAdapter} implementations. The recommended way to "inject" such managed resources
* into short-lived {@link AbstractDatabaseAdapter} implementations is via a special configuration
* attribute.
*
* @param context for each operation, so for each operation in {@link
* org.projectnessie.versioned.persist.adapter.DatabaseAdapter} that requires database access.
* For example, used to have one "borrowed" database connection per database-adapter operation.
* @param configuration interface type for the concrete implementation
*/
public abstract class AbstractDatabaseAdapter
implements DatabaseAdapter {
protected final CONFIG config;
@SuppressWarnings("UnstableApiUsage")
public static final Hash NO_ANCESTOR =
Hash.of(
UnsafeByteOperations.unsafeWrap(
newHasher().putString("empty", StandardCharsets.UTF_8).hash().asBytes()));
protected static long COMMIT_LOG_HASH_SEED = 946928273206945677L;
protected AbstractDatabaseAdapter(CONFIG config) {
Objects.requireNonNull(config, "config parameter must not be null");
this.config = config;
}
@Override
public Hash noAncestorHash() {
return NO_ANCESTOR;
}
// /////////////////////////////////////////////////////////////////////////////////////////////
// DatabaseAdapter subclass API (protected)
// /////////////////////////////////////////////////////////////////////////////////////////////
/** Returns the current time in microseconds since epoch. */
protected long commitTimeInMicros() {
Instant instant = config.getClock().instant();
long time = instant.getEpochSecond();
long nano = instant.getNano();
return TimeUnit.SECONDS.toMicros(time) + TimeUnit.NANOSECONDS.toMicros(nano);
}
/**
* Logic implementation of a commit-attempt.
*
* @param ctx technical operation-context
* @param commitAttempt commit parameters
* @param branchHead current HEAD of {@code branch}
* @param newKeyLists consumer for optimistically written {@link KeyListEntity}s
* @return optimistically written commit-log-entry
*/
protected CommitLogEntry commitAttempt(
OP_CONTEXT ctx,
long timeInMicros,
Hash branchHead,
CommitAttempt commitAttempt,
Consumer newKeyLists)
throws ReferenceNotFoundException, ReferenceConflictException {
List mismatches = new ArrayList<>();
// verify expected global-states
checkExpectedGlobalStates(ctx, commitAttempt, mismatches::add);
checkForModifiedKeysBetweenExpectedAndCurrentCommit(ctx, commitAttempt, branchHead, mismatches);
if (!mismatches.isEmpty()) {
throw new ReferenceConflictException(String.join("\n", mismatches));
}
CommitLogEntry currentBranchEntry = fetchFromCommitLog(ctx, branchHead);
int parentsPerCommit = config.getParentsPerCommit();
List newParents = new ArrayList<>(parentsPerCommit);
newParents.add(branchHead);
long commitSeq;
if (currentBranchEntry != null) {
List p = currentBranchEntry.getParents();
newParents.addAll(p.subList(0, Math.min(p.size(), parentsPerCommit - 1)));
commitSeq = currentBranchEntry.getCommitSeq() + 1;
} else {
commitSeq = 1;
}
CommitLogEntry newBranchCommit =
buildIndividualCommit(
ctx,
timeInMicros,
newParents,
commitSeq,
commitAttempt.getCommitMetaSerialized(),
commitAttempt.getPuts(),
commitAttempt.getDeletes(),
currentBranchEntry != null ? currentBranchEntry.getKeyListDistance() : 0,
newKeyLists);
writeIndividualCommit(ctx, newBranchCommit);
return newBranchCommit;
}
/**
* Logic implementation of a merge-attempt.
*
* @param ctx technical operation context
* @param from merge-from commit
* @param toBranch merge-into reference with expected hash of HEAD
* @param expectedHead if present, {@code toBranch}'s current HEAD must be equal to this value
* @param toHead current HEAD of {@code toBranch}
* @param branchCommits consumer for the individual commits to merge
* @param newKeyLists consumer for optimistically written {@link KeyListEntity}s
* @return hash of the last commit-log-entry written to {@code toBranch}
*/
protected Hash mergeAttempt(
OP_CONTEXT ctx,
long timeInMicros,
Hash from,
BranchName toBranch,
Optional expectedHead,
Hash toHead,
Consumer branchCommits,
Consumer newKeyLists)
throws ReferenceNotFoundException, ReferenceConflictException {
validateHashExists(ctx, from);
// 1. ensure 'expectedHash' is a parent of HEAD-of-'toBranch'
hashOnRef(ctx, toBranch, expectedHead, toHead);
// 2. find nearest common-ancestor between 'from' + 'fromHash'
Hash commonAncestor = findCommonAncestor(ctx, from, toBranch, toHead);
// 3. Collect commit-log-entries
List toEntriesReverseChronological =
takeUntilExcludeLast(
readCommitLogStream(ctx, toHead), e -> e.getHash().equals(commonAncestor))
.collect(Collectors.toList());
Collections.reverse(toEntriesReverseChronological);
List commitsToMergeChronological =
takeUntilExcludeLast(
readCommitLogStream(ctx, from), e -> e.getHash().equals(commonAncestor))
.collect(Collectors.toList());
if (commitsToMergeChronological.isEmpty()) {
// Nothing to merge, shortcut
throw new IllegalArgumentException(
String.format(
"No hashes to merge from '%s' onto '%s' @ '%s'.",
from.asString(), toBranch.getName(), toHead));
}
// 4. Collect modified keys.
Set keysTouchedOnTarget = collectModifiedKeys(toEntriesReverseChronological);
// 5. check for key-collisions
checkForKeyCollisions(keysTouchedOnTarget, commitsToMergeChronological);
// (no need to verify the global states during a transplant)
// 6. re-apply commits in 'sequenceToTransplant' onto 'targetBranch'
toHead = copyCommits(ctx, timeInMicros, toHead, commitsToMergeChronological, newKeyLists);
// 7. Write commits
commitsToMergeChronological.stream().map(CommitLogEntry::getHash).forEach(branchCommits);
writeMultipleCommits(ctx, commitsToMergeChronological);
return toHead;
}
/**
* Logic implementation of a transplant-attempt.
*
* @param ctx technical operation context
* @param targetBranch target reference with expected HEAD
* @param expectedHead if present, {@code targetBranch}'s current HEAD must be equal to this value
* @param targetHead current HEAD of {@code targetBranch}
* @param sequenceToTransplant sequential list of commits to transplant from {@code source}
* @param branchCommits consumer for the individual commits to merge
* @param newKeyLists consumer for optimistically written {@link KeyListEntity}s
* @return hash of the last commit-log-entry written to {@code targetBranch}
*/
protected Hash transplantAttempt(
OP_CONTEXT ctx,
long timeInMicros,
BranchName targetBranch,
Optional expectedHead,
Hash targetHead,
List sequenceToTransplant,
Consumer branchCommits,
Consumer newKeyLists)
throws ReferenceNotFoundException, ReferenceConflictException {
if (sequenceToTransplant.isEmpty()) {
throw new IllegalArgumentException("No hashes to transplant given.");
}
// 1. ensure 'expectedHash' is a parent of HEAD-of-'targetBranch' & collect keys
List targetEntriesReverseChronological = new ArrayList<>();
hashOnRef(ctx, targetHead, targetBranch, expectedHead, targetEntriesReverseChronological::add);
// Exclude the expected-hash on the target-branch from key-collisions check
if (!targetEntriesReverseChronological.isEmpty()
&& expectedHead.isPresent()
&& targetEntriesReverseChronological.get(0).getHash().equals(expectedHead.get())) {
targetEntriesReverseChronological.remove(0);
}
Collections.reverse(targetEntriesReverseChronological);
// 2. Collect modified keys.
Set keysTouchedOnTarget = collectModifiedKeys(targetEntriesReverseChronological);
// 4. ensure 'sequenceToTransplant' is sequential
int[] index = new int[] {sequenceToTransplant.size() - 1};
Hash lastHash = sequenceToTransplant.get(sequenceToTransplant.size() - 1);
List commitsToTransplantChronological =
takeUntilExcludeLast(
readCommitLogStream(ctx, lastHash),
e -> {
int i = index[0]--;
if (i == -1) {
return true;
}
if (!e.getHash().equals(sequenceToTransplant.get(i))) {
throw new IllegalArgumentException("Sequence of hashes is not contiguous.");
}
return false;
})
.collect(Collectors.toList());
// 5. check for key-collisions
checkForKeyCollisions(keysTouchedOnTarget, commitsToTransplantChronological);
// (no need to verify the global states during a transplant)
// 6. re-apply commits in 'sequenceToTransplant' onto 'targetBranch'
targetHead =
copyCommits(ctx, timeInMicros, targetHead, commitsToTransplantChronological, newKeyLists);
// 7. Write commits
commitsToTransplantChronological.stream().map(CommitLogEntry::getHash).forEach(branchCommits);
writeMultipleCommits(ctx, commitsToTransplantChronological);
return targetHead;
}
/**
* Compute the diff between two references.
*
* @param ctx technical operation context
* @param from "from" reference to compute the difference from, appears on the "from" side in
* {@link Diff} with hash in {@code from} to compute the diff for, must exist in {@code from}
* @param to "to" reference to compute the difference from, appears on the "to" side in {@link
* Diff} with hash in {@code to} to compute the diff for, must exist in {@code to}
* @param keyFilter optional filter on key + content-id + content-type
* @return computed difference
*/
protected Stream buildDiff(
OP_CONTEXT ctx, Hash from, Hash to, KeyFilterPredicate keyFilter)
throws ReferenceNotFoundException {
// TODO this implementation works, but is definitely not the most efficient one.
Set allKeys = new HashSet<>();
try (Stream s = keysForCommitEntry(ctx, from, keyFilter).map(KeyWithType::getKey)) {
s.forEach(allKeys::add);
}
try (Stream s = keysForCommitEntry(ctx, to, keyFilter).map(KeyWithType::getKey)) {
s.forEach(allKeys::add);
}
if (allKeys.isEmpty()) {
// no keys, shortcut
return Stream.empty();
}
List allKeysList = new ArrayList<>(allKeys);
Map> fromValues =
fetchValues(ctx, from, allKeysList, keyFilter);
Map> toValues = fetchValues(ctx, to, allKeysList, keyFilter);
Function, Optional> valToContent =
cs -> cs != null ? Optional.of(cs.getRefState()) : Optional.empty();
return IntStream.range(0, allKeys.size())
.mapToObj(allKeysList::get)
.map(
k -> {
ContentAndState fromVal = fromValues.get(k);
ContentAndState toVal = toValues.get(k);
Optional f = valToContent.apply(fromVal);
Optional t = valToContent.apply(toVal);
if (f.equals(t)) {
return null;
}
Optional g =
Optional.ofNullable(
fromVal != null
? fromVal.getGlobalState()
: (toVal != null ? toVal.getGlobalState() : null));
return Difference.of(k, g, f, t);
})
.filter(Objects::nonNull);
}
/**
* Common functionality to filter and enhance based on the given {@link GetNamedRefsParams}.
*
* @param ctx database-adapter context
* @param params defines which kind of references and which additional information shall be
* retrieved
* @param defaultBranchHead prerequisite, the hash of the default branch's HEAD commit (depending
* on the database-adapter implementation). If {@code null}, {@link
* #namedRefsWithDefaultBranchRelatedInfo(Object, GetNamedRefsParams, Stream, Hash)} will not
* add additional default-branch related information (common ancestor and commits
* behind/ahead).
* @param refs current {@link Stream} of {@link ReferenceInfo} to be enhanced.
* @return filtered/enhanced stream based on {@code refs}.
*/
protected Stream> namedRefsFilterAndEnhance(
OP_CONTEXT ctx,
GetNamedRefsParams params,
Hash defaultBranchHead,
Stream> refs) {
refs = namedRefsMaybeFilter(params, refs);
refs = namedRefsWithDefaultBranchRelatedInfo(ctx, params, refs, defaultBranchHead);
refs = namedReferenceWithCommitMeta(ctx, params, refs);
return refs;
}
/** Applies the reference type filter (tags or branches) to the Java stream. */
protected static Stream> namedRefsMaybeFilter(
GetNamedRefsParams params, Stream> refs) {
if (params.getBranchRetrieveOptions().isRetrieve()
&& params.getTagRetrieveOptions().isRetrieve()) {
// No filtering necessary, if all named-reference types (tags and branches) are being fetched.
return refs;
}
return refs.filter(ref -> namedRefsRetrieveOptionsForReference(params, ref).isRetrieve());
}
protected static boolean namedRefsRequiresBaseReference(GetNamedRefsParams params) {
return namedRefsRequiresBaseReference(params.getBranchRetrieveOptions())
|| namedRefsRequiresBaseReference(params.getTagRetrieveOptions());
}
protected static boolean namedRefsRequiresBaseReference(
GetNamedRefsParams.RetrieveOptions retrieveOptions) {
return retrieveOptions.isComputeAheadBehind() || retrieveOptions.isComputeCommonAncestor();
}
protected static boolean namedRefsAnyRetrieves(GetNamedRefsParams params) {
return params.getBranchRetrieveOptions().isRetrieve()
|| params.getTagRetrieveOptions().isRetrieve();
}
protected static GetNamedRefsParams.RetrieveOptions namedRefsRetrieveOptionsForReference(
GetNamedRefsParams params, ReferenceInfo ref) {
return namedRefsRetrieveOptionsForReference(params, ref.getNamedRef());
}
protected static GetNamedRefsParams.RetrieveOptions namedRefsRetrieveOptionsForReference(
GetNamedRefsParams params, NamedRef ref) {
if (ref instanceof BranchName) {
return params.getBranchRetrieveOptions();
}
if (ref instanceof TagName) {
return params.getTagRetrieveOptions();
}
throw new IllegalArgumentException("ref must be either BranchName or TabName, but is " + ref);
}
/**
* Returns an updated {@link ReferenceInfo} with the commit-meta of the reference's HEAD commit.
*/
protected Stream> namedReferenceWithCommitMeta(
OP_CONTEXT ctx, GetNamedRefsParams params, Stream> refs) {
return refs.map(
ref -> {
if (!namedRefsRetrieveOptionsForReference(params, ref).isRetrieveCommitMetaForHead()) {
return ref;
}
CommitLogEntry logEntry = fetchFromCommitLog(ctx, ref.getHash());
if (logEntry == null) {
return ref;
}
return ImmutableReferenceInfo.builder()
.from(ref)
.headCommitMeta(logEntry.getMetadata())
.commitSeq(logEntry.getCommitSeq())
.build();
});
}
/**
* If necessary based on the given {@link GetNamedRefsParams}, updates the returned {@link
* ReferenceInfo}s with the common-ancestor of the named reference and the default branch and the
* number of commits behind/ahead compared to the default branch.
*
* The common ancestor and/or information of commits behind/ahead is meaningless ({@code null})
* for the default branch. Both fields are also {@code null} if the named reference points to the
* {@link #noAncestorHash()} (beginning of time).
*/
protected Stream> namedRefsWithDefaultBranchRelatedInfo(
OP_CONTEXT ctx,
GetNamedRefsParams params,
Stream> refs,
Hash defaultBranchHead) {
if (defaultBranchHead == null) {
// No enhancement of common ancestor and/or commits behind/ahead.
return refs;
}
CommonAncestorState commonAncestorState =
new CommonAncestorState(
ctx,
defaultBranchHead,
params.getBranchRetrieveOptions().isComputeAheadBehind()
|| params.getTagRetrieveOptions().isComputeAheadBehind());
return refs.map(
ref -> {
if (ref.getNamedRef().equals(params.getBaseReference())) {
return ref;
}
RetrieveOptions retrieveOptions = namedRefsRetrieveOptionsForReference(params, ref);
ReferenceInfo updated =
namedRefsRequiresBaseReference(retrieveOptions)
? findCommonAncestor(
ctx,
ref.getHash(),
commonAncestorState,
(diffOnFrom, hash) -> {
ReferenceInfo newRef = ref;
if (retrieveOptions.isComputeCommonAncestor()) {
newRef = newRef.withCommonAncestor(hash);
}
if (retrieveOptions.isComputeAheadBehind()) {
int behind = commonAncestorState.indexOf(hash);
CommitsAheadBehind aheadBehind =
CommitsAheadBehind.of(diffOnFrom, behind);
newRef = newRef.withAheadBehind(aheadBehind);
}
return newRef;
})
: null;
return updated != null ? updated : ref;
});
}
/**
* Convenience for {@link #hashOnRef(Object, Hash, NamedRef, Optional, Consumer) hashOnRef(ctx,
* knownHead, ref.getReference(), ref.getHashOnReference(), null)}.
*/
protected Hash hashOnRef(
OP_CONTEXT ctx, NamedRef reference, Optional hashOnRef, Hash knownHead)
throws ReferenceNotFoundException {
return hashOnRef(ctx, knownHead, reference, hashOnRef, null);
}
/**
* Ensures that {@code ref} exists and that the hash in {@code hashOnRef} exists in that
* reference.
*
* @param ctx technical operation context
* @param ref reference that must contain {@code hashOnRef}
* @param knownHead current HEAD of {@code ref}
* @param hashOnRef hash to verify whether it exists in {@code ref}
* @param commitLogVisitor optional consumer that will receive all visited {@link
* CommitLogEntry}s, can be {@code null}.
* @return value of {@code hashOnRef} or, if {@code hashOnRef} is empty, {@code knownHead}
* @throws ReferenceNotFoundException if either {@code ref} does not exist or {@code hashOnRef}
* does not exist on {@code ref}
*/
protected Hash hashOnRef(
OP_CONTEXT ctx,
Hash knownHead,
NamedRef ref,
Optional hashOnRef,
Consumer commitLogVisitor)
throws ReferenceNotFoundException {
if (hashOnRef.isPresent()) {
Hash suspect = hashOnRef.get();
// If the client requests 'NO_ANCESTOR' (== beginning of time), skip the existence-check.
if (suspect.equals(NO_ANCESTOR)) {
if (commitLogVisitor != null) {
readCommitLogStream(ctx, knownHead).forEach(commitLogVisitor);
}
return suspect;
}
Stream hashes;
if (commitLogVisitor != null) {
hashes =
readCommitLogStream(ctx, knownHead).peek(commitLogVisitor).map(CommitLogEntry::getHash);
} else {
hashes = readCommitLogHashesStream(ctx, knownHead);
}
if (hashes.noneMatch(suspect::equals)) {
throw hashNotFound(ref, suspect);
}
return suspect;
} else {
return knownHead;
}
}
protected void validateHashExists(OP_CONTEXT ctx, Hash hash) throws ReferenceNotFoundException {
if (!NO_ANCESTOR.equals(hash) && fetchFromCommitLog(ctx, hash) == null) {
throw referenceNotFound(hash);
}
}
/** Load the commit-log entry for the given hash, return {@code null}, if not found. */
protected abstract CommitLogEntry fetchFromCommitLog(OP_CONTEXT ctx, Hash hash);
/**
* Fetch multiple {@link CommitLogEntry commit-log-entries} from the commit-log. The returned list
* must have exactly as many elements as in the parameter {@code hashes}. Non-existing hashes are
* returned as {@code null}.
*/
protected abstract List fetchPageFromCommitLog(OP_CONTEXT ctx, List hashes);
/** Reads from the commit-log starting at the given commit-log-hash. */
protected Stream readCommitLogStream(OP_CONTEXT ctx, Hash initialHash)
throws ReferenceNotFoundException {
Spliterator split = readCommitLog(ctx, initialHash);
return StreamSupport.stream(split, false);
}
protected Spliterator readCommitLog(OP_CONTEXT ctx, Hash initialHash)
throws ReferenceNotFoundException {
if (NO_ANCESTOR.equals(initialHash)) {
return Spliterators.emptySpliterator();
}
CommitLogEntry initial = fetchFromCommitLog(ctx, initialHash);
if (initial == null) {
throw referenceNotFound(initialHash);
}
return logFetcher(ctx, initial, this::fetchPageFromCommitLog, CommitLogEntry::getParents);
}
/**
* Like {@link #readCommitLogStream(Object, Hash)}, but only returns the {@link Hash
* commit-log-entry hashes}, which can be taken from {@link CommitLogEntry#getParents()}, thus no
* need to perform a read-operation against every hash.
*/
protected Stream readCommitLogHashesStream(OP_CONTEXT ctx, Hash initialHash) {
Spliterator split = readCommitLogHashes(ctx, initialHash);
return StreamSupport.stream(split, false);
}
protected Spliterator readCommitLogHashes(OP_CONTEXT ctx, Hash initialHash) {
return logFetcher(
ctx,
initialHash,
(c, hashes) -> hashes,
hash -> {
CommitLogEntry entry = fetchFromCommitLog(ctx, hash);
if (entry == null) {
return Collections.emptyList();
}
return entry.getParents();
});
}
/**
* Constructs a {@link Stream} of entries for either the global-state-log or a commit-log. Use
* {@link #readCommitLogStream(Object, Hash)} or the similar implementation for the global-log for
* non-transactional adapters.
*/
protected Spliterator logFetcher(
OP_CONTEXT ctx,
T initial,
BiFunction, List> fetcher,
Function> nextPage) {
return new AbstractSpliterator(Long.MAX_VALUE, 0) {
private Iterator currentBatch;
private boolean eof;
private T previous;
@Override
public boolean tryAdvance(Consumer super T> consumer) {
if (eof) {
return false;
} else if (currentBatch == null) {
currentBatch = Collections.singletonList(initial).iterator();
} else if (!currentBatch.hasNext()) {
if (previous == null) {
eof = true;
return false;
}
List page = nextPage.apply(previous);
previous = null;
if (!page.isEmpty()) {
currentBatch = fetcher.apply(ctx, page).iterator();
} else {
eof = true;
return false;
}
}
T v = currentBatch.next();
if (v != null) {
consumer.accept(v);
previous = v;
}
return true;
}
};
}
/**
* Builds a {@link CommitLogEntry} using the given values. This function also includes a {@link
* KeyList}, if triggered by the values of {@code currentKeyListDistance} and {@link
* DatabaseAdapterConfig#getKeyListDistance()}, so read operations may happen.
*/
protected CommitLogEntry buildIndividualCommit(
OP_CONTEXT ctx,
long timeInMicros,
List parentHashes,
long commitSeq,
ByteString commitMeta,
List puts,
List deletes,
int currentKeyListDistance,
Consumer newKeyLists)
throws ReferenceNotFoundException {
Hash commitHash = individualCommitHash(parentHashes, commitMeta, puts, deletes);
int keyListDistance = currentKeyListDistance + 1;
CommitLogEntry entry =
CommitLogEntry.of(
timeInMicros,
commitHash,
commitSeq,
parentHashes,
commitMeta,
puts,
deletes,
keyListDistance,
null,
Collections.emptyList());
if (keyListDistance >= config.getKeyListDistance()) {
entry = buildKeyList(ctx, entry, newKeyLists);
}
return entry;
}
/** Calculate the hash for the content of a {@link CommitLogEntry}. */
@SuppressWarnings("UnstableApiUsage")
protected Hash individualCommitHash(
List parentHashes, ByteString commitMeta, List puts, List deletes) {
Hasher hasher = newHasher();
hasher.putLong(COMMIT_LOG_HASH_SEED);
parentHashes.forEach(h -> hasher.putBytes(h.asBytes().asReadOnlyByteBuffer()));
hasher.putBytes(commitMeta.asReadOnlyByteBuffer());
puts.forEach(
e -> {
hashKey(hasher, e.getKey());
hasher.putString(e.getContentId().getId(), StandardCharsets.UTF_8);
hasher.putBytes(e.getValue().asReadOnlyByteBuffer());
});
deletes.forEach(e -> hashKey(hasher, e));
return Hash.of(UnsafeByteOperations.unsafeWrap(hasher.hash().asBytes()));
}
/** Helper object for {@link #buildKeyList(Object, CommitLogEntry, Consumer)}. */
private static class KeyListBuildState {
final ImmutableCommitLogEntry.Builder newCommitEntry;
/** Builder for {@link CommitLogEntry#getKeyList()}. */
ImmutableKeyList.Builder embeddedBuilder = ImmutableKeyList.builder();
/** Builder for {@link KeyListEntity}. */
ImmutableKeyList.Builder currentKeyList;
/** Already built {@link KeyListEntity}s. */
List newKeyListEntities = new ArrayList<>();
/** Flag whether {@link CommitLogEntry#getKeyList()} is being filled. */
boolean embedded = true;
/** Current size of either the {@link CommitLogEntry} or current {@link KeyListEntity}. */
int currentSize;
KeyListBuildState(int initialSize, ImmutableCommitLogEntry.Builder newCommitEntry) {
this.currentSize = initialSize;
this.newCommitEntry = newCommitEntry;
}
void finishKeyListEntity() {
Hash id = randomHash();
newKeyListEntities.add(KeyListEntity.of(id, currentKeyList.build()));
newCommitEntry.addKeyListsIds(id);
}
void newKeyListEntity() {
currentSize = 0;
currentKeyList = ImmutableKeyList.builder();
}
void addToKeyListEntity(KeyWithType keyWithType, int keyTypeSize) {
currentSize += keyTypeSize;
currentKeyList.addKeys(keyWithType);
}
void addToEmbedded(KeyWithType keyWithType, int keyTypeSize) {
currentSize += keyTypeSize;
embeddedBuilder.addKeys(keyWithType);
}
}
/**
* Adds a complete key-list to the given {@link CommitLogEntry}, will read from the database.
*
* The implementation fills {@link CommitLogEntry#getKeyList()} with the most recently updated
* {@link Key}s.
*
*
If the calculated size of the database-object/row gets larger than {@link
* DatabaseAdapterConfig#getMaxKeyListSize()}, the next {@link Key}s will be added to new {@link
* KeyListEntity}s, each with a maximum size of {@link DatabaseAdapterConfig#getMaxKeyListSize()}.
*
*
The current implementation fetches all keys and "blindly" populated {@link
* CommitLogEntry#getKeyList()} and nested {@link KeyListEntity} via {@link
* CommitLogEntry#getKeyListsIds()}. So this implementation does not yet reuse previous {@link
* KeyListEntity}s. A follow-up improvement should check if already existing {@link
* KeyListEntity}s contain the same keys. This proposed optimization should be accompanied by an
* optimized read of the keys: for example, if the set of changed keys only affects {@link
* CommitLogEntry#getKeyList()} but not the keys via {@link KeyListEntity}, it is just unnecessary
* to both read and re-write those rows for {@link KeyListEntity}.
*/
protected CommitLogEntry buildKeyList(
OP_CONTEXT ctx, CommitLogEntry unwrittenEntry, Consumer newKeyLists)
throws ReferenceNotFoundException {
// Read commit-log until the previous persisted key-list
Hash startHash = unwrittenEntry.getParents().get(0);
// Return the new commit-log-entry with the complete-key-list
ImmutableCommitLogEntry.Builder newCommitEntry =
ImmutableCommitLogEntry.builder().from(unwrittenEntry).keyListDistance(0);
KeyListBuildState buildState =
new KeyListBuildState(entitySize(unwrittenEntry), newCommitEntry);
keysForCommitEntry(ctx, startHash)
.forEach(
keyWithType -> {
int keyTypeSize = entitySize(keyWithType);
if (buildState.embedded) {
// filling the embedded key-list in CommitLogEntry
if (buildState.currentSize + keyTypeSize < config.getMaxKeyListSize()) {
// CommitLogEntry.keyList still has room
buildState.addToEmbedded(keyWithType, keyTypeSize);
} else {
// CommitLogEntry.keyList is "full", switch to the first KeyListEntity
buildState.embedded = false;
buildState.newKeyListEntity();
buildState.addToKeyListEntity(keyWithType, keyTypeSize);
}
} else {
// filling linked key-lists via CommitLogEntry.keyListIds
if (buildState.currentSize + keyTypeSize > config.getMaxKeyListSize()) {
// current KeyListEntity is "full", switch to a new one
buildState.finishKeyListEntity();
buildState.newKeyListEntity();
}
buildState.addToKeyListEntity(keyWithType, keyTypeSize);
}
});
// If there's an "unfinished" KeyListEntity, build it.
if (buildState.currentKeyList != null) {
buildState.finishKeyListEntity();
}
// Inform the (CAS)-op-loop about the IDs of the KeyListEntities being optimistically written.
buildState.newKeyListEntities.stream().map(KeyListEntity::getId).forEach(newKeyLists);
// Write the new KeyListEntities
if (!buildState.newKeyListEntities.isEmpty()) {
writeKeyListEntities(ctx, buildState.newKeyListEntities);
}
// Return the new commit-log-entry with the complete-key-list
return newCommitEntry.keyList(buildState.embeddedBuilder.build()).build();
}
/** Calculate the expected size of the given {@link CommitLogEntry} in the database. */
protected abstract int entitySize(CommitLogEntry entry);
/** Calculate the expected size of the given {@link CommitLogEntry} in the database. */
protected abstract int entitySize(KeyWithType entry);
/**
* If the current HEAD of the target branch for a commit/transplant/merge is not equal to the
* expected/reference HEAD, verify that there is no conflict, like keys in the operations of the
* commit(s) contained in keys of the commits 'expectedHead (excluding) .. currentHead
* (including)'.
*/
protected void checkForModifiedKeysBetweenExpectedAndCurrentCommit(
OP_CONTEXT ctx, CommitAttempt commitAttempt, Hash branchHead, List mismatches)
throws ReferenceNotFoundException {
if (commitAttempt.getExpectedHead().isPresent()) {
Hash expectedHead = commitAttempt.getExpectedHead().get();
if (!expectedHead.equals(branchHead)) {
Set operationKeys = new HashSet<>();
operationKeys.addAll(commitAttempt.getDeletes());
operationKeys.addAll(commitAttempt.getUnchanged());
commitAttempt.getPuts().stream().map(KeyWithBytes::getKey).forEach(operationKeys::add);
boolean sinceSeen =
checkConflictingKeysForCommit(
ctx, branchHead, expectedHead, operationKeys, mismatches::add);
// If the expectedHead is the special value NO_ANCESTOR, which is not persisted,
// ignore the fact that it has not been seen. Otherwise, raise a
// ReferenceNotFoundException that the expected-hash does not exist on the target
// branch.
if (!sinceSeen && !expectedHead.equals(NO_ANCESTOR)) {
throw hashNotFound(commitAttempt.getCommitToBranch(), expectedHead);
}
}
}
}
/** Retrieve the content-keys and their types for the commit-log-entry with the given hash. */
protected Stream keysForCommitEntry(
OP_CONTEXT ctx, Hash hash, KeyFilterPredicate keyFilter) throws ReferenceNotFoundException {
return keysForCommitEntry(ctx, hash)
.filter(kt -> keyFilter.check(kt.getKey(), kt.getContentId(), kt.getType()));
}
/** Retrieve the content-keys and their types for the commit-log-entry with the given hash. */
protected Stream keysForCommitEntry(OP_CONTEXT ctx, Hash hash)
throws ReferenceNotFoundException {
// walk the commit-logs in reverse order - starting with the last persisted key-list
Set seen = new HashSet<>();
Stream log = readCommitLogStream(ctx, hash);
log = takeUntilIncludeLast(log, e -> e.getKeyList() != null);
return log.flatMap(
e -> {
// Add CommitLogEntry.deletes to "seen" so these keys won't be returned
seen.addAll(e.getDeletes());
// Return from CommitLogEntry.puts first
Stream stream =
e.getPuts().stream()
.filter(kt -> seen.add(kt.getKey()))
.map(KeyWithBytes::asKeyWithType);
if (e.getKeyList() != null) {
// Return from CommitLogEntry.keyList after the keys in CommitLogEntry.puts
Stream embedded =
e.getKeyList().getKeys().stream().filter(kt -> seen.add(kt.getKey()));
stream = Stream.concat(stream, embedded);
if (!e.getKeyListsIds().isEmpty()) {
// If there are nested key-lists, retrieve those and add the keys from these
stream =
Stream.concat(
stream,
// "lazily" fetch key-lists
Stream.of(e.getKeyListsIds())
.flatMap(ids -> fetchKeyLists(ctx, ids))
.map(KeyListEntity::getKeys)
.flatMap(k -> k.getKeys().stream())
.filter(kt -> seen.add(kt.getKey())));
}
}
return stream;
});
}
/**
* Fetch the global-state and per-ref content for the given {@link Key}s and {@link Hash
* commitSha}. Non-existing keys must not be present in the returned map.
*/
protected Map> fetchValues(
OP_CONTEXT ctx, Hash refHead, Collection keys, KeyFilterPredicate keyFilter)
throws ReferenceNotFoundException {
Set remainingKeys = new HashSet<>(keys);
Map nonGlobal = new HashMap<>();
Map keyToContentIds = new HashMap<>();
Set contentIds = new HashSet<>();
try (Stream log =
takeUntilExcludeLast(readCommitLogStream(ctx, refHead), e -> remainingKeys.isEmpty())) {
log.peek(entry -> entry.getDeletes().forEach(remainingKeys::remove))
.flatMap(entry -> entry.getPuts().stream())
.filter(put -> remainingKeys.remove(put.getKey()))
.filter(put -> keyFilter.check(put.getKey(), put.getContentId(), put.getType()))
.forEach(
put -> {
nonGlobal.put(put.getKey(), put.getValue());
keyToContentIds.put(put.getKey(), put.getContentId());
contentIds.add(put.getContentId());
});
}
Map globals = fetchGlobalStates(ctx, contentIds);
return nonGlobal.entrySet().stream()
.collect(
Collectors.toMap(
Entry::getKey,
e ->
ContentAndState.of(
e.getValue(), globals.get(keyToContentIds.get(e.getKey())))));
}
/**
* Fetches the global-state information for the given content-ids.
*
* @param ctx technical context
* @param contentIds the content-ids to fetch
* @return map of content-id to state
*/
protected abstract Map fetchGlobalStates(
OP_CONTEXT ctx, Set contentIds) throws ReferenceNotFoundException;
protected abstract Stream fetchKeyLists(OP_CONTEXT ctx, List keyListsIds);
/**
* Write a new commit-entry, the given commit entry is to be persisted as is. All values of the
* given {@link CommitLogEntry} can be considered valid and consistent.
*
* Implementations however can enforce strict consistency checks/guarantees, like a best-effort
* approach to prevent hash-collisions but without any other consistency checks/guarantees.
*/
protected abstract void writeIndividualCommit(OP_CONTEXT ctx, CommitLogEntry entry)
throws ReferenceConflictException;
/**
* Write multiple new commit-entries, the given commit entries are to be persisted as is. All
* values of the * given {@link CommitLogEntry} can be considered valid and consistent.
*
*
Implementations however can enforce strict consistency checks/guarantees, like a best-effort
* approach to prevent hash-collisions but without any other consistency checks/guarantees.
*/
protected abstract void writeMultipleCommits(OP_CONTEXT ctx, List entries)
throws ReferenceConflictException;
protected abstract void writeKeyListEntities(
OP_CONTEXT ctx, List newKeyListEntities);
/**
* Check whether the commits in the range {@code sinceCommitExcluding] .. [upToCommitIncluding}
* contain any of the given {@link Key}s.
*
* Conflicts are reported via {@code mismatches}.
*/
protected boolean checkConflictingKeysForCommit(
OP_CONTEXT ctx,
Hash upToCommitIncluding,
Hash sinceCommitExcluding,
Set keys,
Consumer mismatches)
throws ReferenceNotFoundException {
boolean[] sinceSeen = new boolean[1];
Stream log = readCommitLogStream(ctx, upToCommitIncluding);
log =
takeUntilExcludeLast(
log,
e -> {
if (e.getHash().equals(sinceCommitExcluding)) {
sinceSeen[0] = true;
return true;
}
return false;
});
Set handled = new HashSet<>();
log.forEach(
e -> {
e.getPuts()
.forEach(
a -> {
if (keys.contains(a.getKey()) && handled.add(a.getKey())) {
mismatches.accept(
String.format(
"Key '%s' has conflicting put-operation from another commit.",
a.getKey()));
}
});
e.getDeletes()
.forEach(
a -> {
if (keys.contains(a) && handled.add(a)) {
mismatches.accept(
String.format(
"Key '%s' has conflicting delete-operation from another commit.", a));
}
});
});
return sinceSeen[0];
}
protected final class CommonAncestorState {
final Iterator toLog;
final List toCommitHashesList;
final Set toCommitHashes = new HashSet<>();
public CommonAncestorState(OP_CONTEXT ctx, Hash toHead, boolean trackCount) {
this.toLog = Spliterators.iterator(readCommitLogHashes(ctx, toHead));
this.toCommitHashesList = trackCount ? new ArrayList<>() : null;
}
boolean fetchNext() {
if (toLog.hasNext()) {
Hash hash = toLog.next();
toCommitHashes.add(hash);
if (toCommitHashesList != null) {
toCommitHashesList.add(hash);
}
return true;
}
return false;
}
public boolean contains(Hash candidate) {
return toCommitHashes.contains(candidate);
}
public int indexOf(Hash hash) {
return toCommitHashesList.indexOf(hash);
}
}
/**
* Finds the common-ancestor of two commit-log-entries. If no common-ancestor is found, throws a
* {@link ReferenceConflictException} or. Otherwise this method returns the hash of the
* common-ancestor.
*/
protected Hash findCommonAncestor(OP_CONTEXT ctx, Hash from, NamedRef toBranch, Hash toHead)
throws ReferenceConflictException {
// TODO this implementation requires guardrails:
// max number of "to"-commits to fetch, max number of "from"-commits to fetch,
// both impact the cost (CPU, memory, I/O) of a merge operation.
CommonAncestorState commonAncestorState = new CommonAncestorState(ctx, toHead, false);
Hash commonAncestorHash =
findCommonAncestor(ctx, from, commonAncestorState, (dist, hash) -> hash);
if (commonAncestorHash == null) {
throw new ReferenceConflictException(
String.format(
"No common ancestor found for merge of '%s' into branch '%s'",
from, toBranch.getName()));
}
return commonAncestorHash;
}
protected R findCommonAncestor(
OP_CONTEXT ctx, Hash from, CommonAncestorState state, BiFunction result) {
Iterator fromLog = Spliterators.iterator(readCommitLogHashes(ctx, from));
List fromCommitHashes = new ArrayList<>();
while (true) {
boolean anyFetched = false;
for (int i = 0; i < config.getParentsPerCommit(); i++) {
if (state.fetchNext()) {
anyFetched = true;
}
if (fromLog.hasNext()) {
fromCommitHashes.add(fromLog.next());
anyFetched = true;
}
}
if (!anyFetched) {
return null;
}
for (int diffOnFrom = 0; diffOnFrom < fromCommitHashes.size(); diffOnFrom++) {
Hash f = fromCommitHashes.get(diffOnFrom);
if (state.contains(f)) {
return result.apply(diffOnFrom, f);
}
}
}
}
/**
* For merge/transplant, verifies that the given commits do not touch any of the given keys.
*
* @param commitsChronological list of commit-log-entries, in order of commit-operations,
* chronological order
*/
protected void checkForKeyCollisions(
Set keysTouchedOnTarget, List commitsChronological)
throws ReferenceConflictException {
Set keyCollisions = new HashSet<>();
for (int i = commitsChronological.size() - 1; i >= 0; i--) {
CommitLogEntry sourceCommit = commitsChronological.get(i);
Stream.concat(
sourceCommit.getPuts().stream().map(KeyWithBytes::getKey),
sourceCommit.getDeletes().stream())
.filter(keysTouchedOnTarget::contains)
.forEach(keyCollisions::add);
}
if (!keyCollisions.isEmpty()) {
throw new ReferenceConflictException(
String.format(
"The following keys have been changed in conflict: %s",
keyCollisions.stream()
.map(k -> String.format("'%s'", k.toString()))
.collect(Collectors.joining(", "))));
}
}
/**
* For merge/transplant, collect the content-keys that were modified in the given list of entries.
*
* @param commitsReverseChronological list of commit-log-entries, in reverse order of
* commit-operations, reverse chronological order
*/
protected Set collectModifiedKeys(List commitsReverseChronological) {
Set keysTouchedOnTarget = new HashSet<>();
commitsReverseChronological.forEach(
e -> {
e.getPuts().stream().map(KeyWithBytes::getKey).forEach(keysTouchedOnTarget::add);
e.getDeletes().forEach(keysTouchedOnTarget::remove);
});
return keysTouchedOnTarget;
}
/** For merge/transplant, applies the given commits onto the target-hash. */
protected Hash copyCommits(
OP_CONTEXT ctx,
long timeInMicros,
Hash targetHead,
List commitsChronological,
Consumer newKeyLists)
throws ReferenceNotFoundException {
int parentsPerCommit = config.getParentsPerCommit();
List parents = new ArrayList<>(parentsPerCommit);
CommitLogEntry targetHeadCommit = fetchFromCommitLog(ctx, targetHead);
long commitSeq;
if (targetHeadCommit != null) {
parents.addAll(targetHeadCommit.getParents());
commitSeq = targetHeadCommit.getCommitSeq() + 1;
} else {
commitSeq = 1L;
}
int keyListDistance = targetHeadCommit != null ? targetHeadCommit.getKeyListDistance() : 0;
// Rewrite commits to transplant and store those in 'commitsToTransplantReverse'
for (int i = commitsChronological.size() - 1; i >= 0; i--, commitSeq++) {
CommitLogEntry sourceCommit = commitsChronological.get(i);
while (parents.size() > parentsPerCommit - 1) {
parents.remove(parentsPerCommit - 1);
}
if (parents.isEmpty()) {
parents.add(targetHead);
} else {
parents.add(0, targetHead);
}
CommitLogEntry newEntry =
buildIndividualCommit(
ctx,
timeInMicros,
parents,
commitSeq,
sourceCommit.getMetadata(),
sourceCommit.getPuts(),
sourceCommit.getDeletes(),
keyListDistance,
newKeyLists);
keyListDistance = newEntry.getKeyListDistance();
if (!newEntry.getHash().equals(sourceCommit.getHash())) {
commitsChronological.set(i, newEntry);
} else {
// Newly built CommitLogEntry is equal to the CommitLogEntry to transplant.
// This can happen, if the commit to transplant has NO_ANCESTOR as its parent.
commitsChronological.remove(i);
}
targetHead = newEntry.getHash();
}
return targetHead;
}
/**
* Verifies that the current global-states match the {@code expectedStates}, produces human
* readable messages for the violations.
*/
protected void checkExpectedGlobalStates(
OP_CONTEXT ctx, CommitAttempt commitAttempt, Consumer mismatches)
throws ReferenceNotFoundException {
Map globalStates =
fetchGlobalStates(ctx, commitAttempt.getExpectedStates().keySet());
for (Entry> expectedState :
commitAttempt.getExpectedStates().entrySet()) {
ByteString currentState = globalStates.get(expectedState.getKey());
if (currentState == null) {
if (expectedState.getValue().isPresent()) {
mismatches.accept(
String.format(
"No current global-state for content-id '%s'.", expectedState.getKey()));
}
} else {
if (!expectedState.getValue().isPresent()) {
// This happens, when a table's being created on a branch, but that table already exists.
mismatches.accept(
String.format(
"Global-state for content-id '%s' already exists.", expectedState.getKey()));
} else if (!currentState.equals(expectedState.getValue().get())) {
mismatches.accept(
String.format(
"Mismatch in global-state for content-id '%s'.", expectedState.getKey()));
}
}
}
}
}