All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.projectnessie.versioned.persist.adapter.spi.AbstractDatabaseAdapter Maven / Gradle / Ivy

/*
 * Copyright (C) 2020 Dremio
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.projectnessie.versioned.persist.adapter.spi;

import static java.util.concurrent.TimeUnit.NANOSECONDS;
import static org.projectnessie.versioned.persist.adapter.KeyFilterPredicate.ALLOW_ALL;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterMetrics.tryLoopFinished;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.hashKey;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.hashNotFound;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.newHasher;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.randomHash;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.referenceNotFound;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.takeUntilExcludeLast;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.takeUntilIncludeLast;
import static org.projectnessie.versioned.persist.adapter.spi.Traced.trace;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.Sets;
import com.google.common.hash.Hasher;
import com.google.protobuf.ByteString;
import com.google.protobuf.UnsafeByteOperations;
import java.nio.charset.StandardCharsets;
import java.time.Instant;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.Spliterators.AbstractSpliterator;
import java.util.concurrent.Callable;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import javax.annotation.Nonnull;
import org.projectnessie.versioned.BranchName;
import org.projectnessie.versioned.Diff;
import org.projectnessie.versioned.GetNamedRefsParams;
import org.projectnessie.versioned.GetNamedRefsParams.RetrieveOptions;
import org.projectnessie.versioned.Hash;
import org.projectnessie.versioned.ImmutableReferenceInfo;
import org.projectnessie.versioned.Key;
import org.projectnessie.versioned.MetadataRewriter;
import org.projectnessie.versioned.NamedRef;
import org.projectnessie.versioned.RefLogNotFoundException;
import org.projectnessie.versioned.ReferenceConflictException;
import org.projectnessie.versioned.ReferenceInfo;
import org.projectnessie.versioned.ReferenceInfo.CommitsAheadBehind;
import org.projectnessie.versioned.ReferenceNotFoundException;
import org.projectnessie.versioned.StoreWorker;
import org.projectnessie.versioned.TagName;
import org.projectnessie.versioned.persist.adapter.CommitLogEntry;
import org.projectnessie.versioned.persist.adapter.CommitParams;
import org.projectnessie.versioned.persist.adapter.ContentAndState;
import org.projectnessie.versioned.persist.adapter.ContentId;
import org.projectnessie.versioned.persist.adapter.DatabaseAdapter;
import org.projectnessie.versioned.persist.adapter.DatabaseAdapterConfig;
import org.projectnessie.versioned.persist.adapter.Difference;
import org.projectnessie.versioned.persist.adapter.ImmutableCommitLogEntry;
import org.projectnessie.versioned.persist.adapter.ImmutableKeyList;
import org.projectnessie.versioned.persist.adapter.KeyFilterPredicate;
import org.projectnessie.versioned.persist.adapter.KeyList;
import org.projectnessie.versioned.persist.adapter.KeyListEntity;
import org.projectnessie.versioned.persist.adapter.KeyListEntry;
import org.projectnessie.versioned.persist.adapter.KeyWithBytes;
import org.projectnessie.versioned.persist.adapter.MergeParams;
import org.projectnessie.versioned.persist.adapter.MetadataRewriteParams;
import org.projectnessie.versioned.persist.adapter.RefLog;
import org.projectnessie.versioned.persist.adapter.TransplantParams;

/**
 * Contains all the database-independent logic for a Database-adapter.
 *
 * 

This class does not implement everything from {@link * org.projectnessie.versioned.persist.adapter.DatabaseAdapter}. * *

Implementations must consider that production environments may use instances of this class in * JAX-RS {@code RequestScope}, which means that it must be very cheap to create new instances of * the implementations. * *

Managed resources like a connection-pool must be managed outside of {@link * AbstractDatabaseAdapter} implementations. The recommended way to "inject" such managed resources * into short-lived {@link AbstractDatabaseAdapter} implementations is via a special configuration * attribute. * * @param context for each operation, so for each operation in {@link * org.projectnessie.versioned.persist.adapter.DatabaseAdapter} that requires database access. * For example, used to have one "borrowed" database connection per database-adapter operation. * @param configuration interface type for the concrete implementation */ public abstract class AbstractDatabaseAdapter< OP_CONTEXT extends AutoCloseable, CONFIG extends DatabaseAdapterConfig> implements DatabaseAdapter { protected static final String TAG_HASH = "hash"; protected static final String TAG_COUNT = "count"; protected final CONFIG config; protected final StoreWorker storeWorker; @SuppressWarnings("UnstableApiUsage") public static final Hash NO_ANCESTOR = Hash.of( UnsafeByteOperations.unsafeWrap( newHasher().putString("empty", StandardCharsets.UTF_8).hash().asBytes())); protected static long COMMIT_LOG_HASH_SEED = 946928273206945677L; protected AbstractDatabaseAdapter(CONFIG config, StoreWorker storeWorker) { Objects.requireNonNull(config, "config parameter must not be null"); this.config = config; this.storeWorker = storeWorker; } @VisibleForTesting public CONFIG getConfig() { return config; } @VisibleForTesting public abstract OP_CONTEXT borrowConnection(); @Override public Hash noAncestorHash() { return NO_ANCESTOR; } // ///////////////////////////////////////////////////////////////////////////////////////////// // DatabaseAdapter subclass API (protected) // ///////////////////////////////////////////////////////////////////////////////////////////// /** Returns the current time in microseconds since epoch. */ protected long commitTimeInMicros() { Instant instant = config.getClock().instant(); long time = instant.getEpochSecond(); long nano = instant.getNano(); return TimeUnit.SECONDS.toMicros(time) + NANOSECONDS.toMicros(nano); } /** * Logic implementation of a commit-attempt. * * @param ctx technical operation-context * @param commitParams commit parameters * @param branchHead current HEAD of {@code branch} * @param newKeyLists consumer for optimistically written {@link KeyListEntity}s * @return optimistically written commit-log-entry */ protected CommitLogEntry commitAttempt( OP_CONTEXT ctx, long timeInMicros, Hash branchHead, CommitParams commitParams, Consumer newKeyLists) throws ReferenceNotFoundException, ReferenceConflictException { List mismatches = new ArrayList<>(); Callable validator = commitParams.getValidator(); if (validator != null) { try { validator.call(); } catch (RuntimeException e) { // just propagate the RuntimeException up throw e; } catch (Exception e) { throw new RuntimeException(e); } } checkContentKeysUnique(commitParams); // verify expected global-states checkExpectedGlobalStates(ctx, commitParams, mismatches::add); CommitLogEntry currentBranchEntry = checkForModifiedKeysBetweenExpectedAndCurrentCommit( ctx, commitParams, branchHead, mismatches); if (!mismatches.isEmpty()) { throw new ReferenceConflictException(String.join("\n", mismatches)); } int parentsPerCommit = config.getParentsPerCommit(); List newParents = new ArrayList<>(parentsPerCommit); newParents.add(branchHead); long commitSeq; if (currentBranchEntry != null) { List p = currentBranchEntry.getParents(); newParents.addAll(p.subList(0, Math.min(p.size(), parentsPerCommit - 1))); commitSeq = currentBranchEntry.getCommitSeq() + 1; } else { commitSeq = 1; } // Helps when building a new key-list to not fetch the current commit again. Function currentCommit = h -> h.equals(branchHead) ? currentBranchEntry : null; CommitLogEntry newBranchCommit = buildIndividualCommit( ctx, timeInMicros, newParents, commitSeq, commitParams.getCommitMetaSerialized(), commitParams.getPuts(), commitParams.getDeletes(), currentBranchEntry != null ? currentBranchEntry.getKeyListDistance() : 0, newKeyLists, currentCommit); writeIndividualCommit(ctx, newBranchCommit); return newBranchCommit; } private static void checkContentKeysUnique(CommitParams commitParams) { Set keys = new HashSet<>(); Set duplicates = new HashSet<>(); Stream.concat( Stream.concat( commitParams.getDeletes().stream(), commitParams.getPuts().stream().map(KeyWithBytes::getKey)), commitParams.getUnchanged().stream()) .forEach( key -> { if (!keys.add(key)) { duplicates.add(key); } }); if (!duplicates.isEmpty()) { throw new IllegalArgumentException( String.format( "Duplicate keys are not allowed in a commit: %s", duplicates.stream().map(Key::toString).collect(Collectors.joining(", ")))); } } /** * Logic implementation of a merge-attempt. * * @param ctx technical operation context * @param toHead current HEAD of {@code toBranch} * @param branchCommits consumer for the individual commits to merge * @param newKeyLists consumer for optimistically written {@link KeyListEntity}s * @return hash of the last commit-log-entry written to {@code toBranch} */ protected Hash mergeAttempt( OP_CONTEXT ctx, long timeInMicros, Hash toHead, Consumer branchCommits, Consumer newKeyLists, MergeParams mergeParams) throws ReferenceNotFoundException, ReferenceConflictException { validateHashExists(ctx, mergeParams.getMergeFromHash()); // 1. ensure 'expectedHash' is a parent of HEAD-of-'toBranch' hashOnRef(ctx, mergeParams.getToBranch(), mergeParams.getExpectedHead(), toHead); // 2. find nearest common-ancestor between 'from' + 'fromHash' Hash commonAncestor = findCommonAncestor(ctx, mergeParams.getMergeFromHash(), mergeParams.getToBranch(), toHead); // 3. Collect commit-log-entries List toEntriesReverseChronological = takeUntilExcludeLast( readCommitLogStream(ctx, toHead), e -> e.getHash().equals(commonAncestor)) .collect(Collectors.toList()); List commitsToMergeChronological = takeUntilExcludeLast( readCommitLogStream(ctx, mergeParams.getMergeFromHash()), e -> e.getHash().equals(commonAncestor)) .collect(Collectors.toList()); if (commitsToMergeChronological.isEmpty()) { // Nothing to merge, shortcut throw new IllegalArgumentException( String.format( "No hashes to merge from '%s' onto '%s' @ '%s'.", mergeParams.getMergeFromHash().asString(), mergeParams.getToBranch().getName(), toHead)); } return mergeTransplantCommon( ctx, timeInMicros, toHead, branchCommits, newKeyLists, commitsToMergeChronological, toEntriesReverseChronological, mergeParams); } /** * Logic implementation of a transplant-attempt. * * @param ctx technical operation context * @param targetHead current HEAD of {@code targetBranch} * @param branchCommits consumer for the individual commits to merge * @param newKeyLists consumer for optimistically written {@link KeyListEntity}s * @return hash of the last commit-log-entry written to {@code targetBranch} */ protected Hash transplantAttempt( OP_CONTEXT ctx, long timeInMicros, Hash targetHead, Consumer branchCommits, Consumer newKeyLists, TransplantParams transplantParams) throws ReferenceNotFoundException, ReferenceConflictException { if (transplantParams.getSequenceToTransplant().isEmpty()) { throw new IllegalArgumentException("No hashes to transplant given."); } // 1. ensure 'expectedHash' is a parent of HEAD-of-'targetBranch' & collect keys List targetEntriesReverseChronological = new ArrayList<>(); hashOnRef( ctx, targetHead, transplantParams.getToBranch(), transplantParams.getExpectedHead(), targetEntriesReverseChronological::add); // Exclude the expected-hash on the target-branch from key-collisions check if (!targetEntriesReverseChronological.isEmpty() && transplantParams.getExpectedHead().isPresent() && targetEntriesReverseChronological .get(0) .getHash() .equals(transplantParams.getExpectedHead().get())) { targetEntriesReverseChronological.remove(0); } // 4. ensure 'sequenceToTransplant' is sequential int[] index = new int[] {transplantParams.getSequenceToTransplant().size() - 1}; Hash lastHash = transplantParams .getSequenceToTransplant() .get(transplantParams.getSequenceToTransplant().size() - 1); List commitsToTransplantChronological = takeUntilExcludeLast( readCommitLogStream(ctx, lastHash), e -> { int i = index[0]--; if (i == -1) { return true; } if (!e.getHash().equals(transplantParams.getSequenceToTransplant().get(i))) { throw new IllegalArgumentException("Sequence of hashes is not contiguous."); } return false; }) .collect(Collectors.toList()); // 5. check for key-collisions return mergeTransplantCommon( ctx, timeInMicros, targetHead, branchCommits, newKeyLists, commitsToTransplantChronological, targetEntriesReverseChronological, transplantParams); } protected Hash mergeTransplantCommon( OP_CONTEXT ctx, long timeInMicros, Hash toHead, Consumer branchCommits, Consumer newKeyLists, List commitsToMergeChronological, List toEntriesReverseChronological, MetadataRewriteParams params) throws ReferenceConflictException, ReferenceNotFoundException { // Collect modified keys. Collections.reverse(toEntriesReverseChronological); Set keysTouchedOnTarget = collectModifiedKeys(toEntriesReverseChronological); Predicate skipCheckPredicate = k -> params.getMergeTypes().getOrDefault(k, params.getDefaultMergeType()).isSkipCheck(); Predicate mergePredicate = k -> params.getMergeTypes().getOrDefault(k, params.getDefaultMergeType()).isMerge(); // Ignore keys in collision-check that will not be merged or collision-checked keysTouchedOnTarget.removeIf(skipCheckPredicate); checkForKeyCollisions(ctx, toHead, keysTouchedOnTarget, commitsToMergeChronological); if (params.keepIndividualCommits() || commitsToMergeChronological.size() == 1) { // (no need to verify the global states during a merge) // re-apply commits in 'sequenceToTransplant' onto 'targetBranch' toHead = copyCommits( ctx, timeInMicros, toHead, commitsToMergeChronological, newKeyLists, params.getUpdateCommitMetadata(), mergePredicate); // Write commits commitsToMergeChronological.stream().map(CommitLogEntry::getHash).forEach(branchCommits); writeMultipleCommits(ctx, commitsToMergeChronological); } else { toHead = squashCommits( ctx, timeInMicros, toHead, commitsToMergeChronological, newKeyLists, params.getUpdateCommitMetadata(), mergePredicate); } return toHead; } /** * Compute the diff between two references. * * @param ctx technical operation context * @param from "from" reference to compute the difference from, appears on the "from" side in * {@link Diff} with hash in {@code from} to compute the diff for, must exist in {@code from} * @param to "to" reference to compute the difference from, appears on the "to" side in {@link * Diff} with hash in {@code to} to compute the diff for, must exist in {@code to} * @param keyFilter optional filter on key + content-id + content-type * @return computed difference */ protected Stream buildDiff( OP_CONTEXT ctx, Hash from, Hash to, KeyFilterPredicate keyFilter) throws ReferenceNotFoundException { // TODO this implementation works, but is definitely not the most efficient one. Set allKeys = new HashSet<>(); try (Stream s = keysForCommitEntry(ctx, from, keyFilter).map(KeyListEntry::getKey)) { s.forEach(allKeys::add); } try (Stream s = keysForCommitEntry(ctx, to, keyFilter).map(KeyListEntry::getKey)) { s.forEach(allKeys::add); } if (allKeys.isEmpty()) { // no keys, shortcut return Stream.empty(); } List allKeysList = new ArrayList<>(allKeys); Map> fromValues = fetchValues(ctx, from, allKeysList, keyFilter); Map> toValues = fetchValues(ctx, to, allKeysList, keyFilter); Function, Optional> valToContent = cs -> cs != null ? Optional.of(cs.getRefState()) : Optional.empty(); return IntStream.range(0, allKeys.size()) .mapToObj(allKeysList::get) .map( k -> { ContentAndState fromVal = fromValues.get(k); ContentAndState toVal = toValues.get(k); Optional f = valToContent.apply(fromVal); Optional t = valToContent.apply(toVal); if (f.equals(t)) { return null; } Optional g = Optional.ofNullable( fromVal != null ? fromVal.getGlobalState() : (toVal != null ? toVal.getGlobalState() : null)); return Difference.of(k, g, f, t); }) .filter(Objects::nonNull); } /** * Common functionality to filter and enhance based on the given {@link GetNamedRefsParams}. * * @param ctx database-adapter context * @param params defines which kind of references and which additional information shall be * retrieved * @param defaultBranchHead prerequisite, the hash of the default branch's HEAD commit (depending * on the database-adapter implementation). If {@code null}, {@link * #namedRefsWithDefaultBranchRelatedInfo(AutoCloseable, GetNamedRefsParams, Stream, Hash)} * will not add additional default-branch related information (common ancestor and commits * behind/ahead). * @param refs current {@link Stream} of {@link ReferenceInfo} to be enhanced. * @return filtered/enhanced stream based on {@code refs}. */ protected Stream> namedRefsFilterAndEnhance( OP_CONTEXT ctx, GetNamedRefsParams params, Hash defaultBranchHead, Stream> refs) { refs = namedRefsMaybeFilter(params, refs); refs = namedRefsWithDefaultBranchRelatedInfo(ctx, params, refs, defaultBranchHead); refs = namedReferenceWithCommitMeta(ctx, params, refs); return refs; } /** Applies the reference type filter (tags or branches) to the Java stream. */ protected static Stream> namedRefsMaybeFilter( GetNamedRefsParams params, Stream> refs) { if (params.getBranchRetrieveOptions().isRetrieve() && params.getTagRetrieveOptions().isRetrieve()) { // No filtering necessary, if all named-reference types (tags and branches) are being fetched. return refs; } return refs.filter(ref -> namedRefsRetrieveOptionsForReference(params, ref).isRetrieve()); } protected static boolean namedRefsRequiresBaseReference(GetNamedRefsParams params) { return namedRefsRequiresBaseReference(params.getBranchRetrieveOptions()) || namedRefsRequiresBaseReference(params.getTagRetrieveOptions()); } protected static boolean namedRefsRequiresBaseReference( GetNamedRefsParams.RetrieveOptions retrieveOptions) { return retrieveOptions.isComputeAheadBehind() || retrieveOptions.isComputeCommonAncestor(); } protected static boolean namedRefsAnyRetrieves(GetNamedRefsParams params) { return params.getBranchRetrieveOptions().isRetrieve() || params.getTagRetrieveOptions().isRetrieve(); } protected static GetNamedRefsParams.RetrieveOptions namedRefsRetrieveOptionsForReference( GetNamedRefsParams params, ReferenceInfo ref) { return namedRefsRetrieveOptionsForReference(params, ref.getNamedRef()); } protected static GetNamedRefsParams.RetrieveOptions namedRefsRetrieveOptionsForReference( GetNamedRefsParams params, NamedRef ref) { if (ref instanceof BranchName) { return params.getBranchRetrieveOptions(); } if (ref instanceof TagName) { return params.getTagRetrieveOptions(); } throw new IllegalArgumentException("ref must be either BranchName or TabName, but is " + ref); } /** * Returns an updated {@link ReferenceInfo} with the commit-meta of the reference's HEAD commit. */ protected Stream> namedReferenceWithCommitMeta( OP_CONTEXT ctx, GetNamedRefsParams params, Stream> refs) { return refs.map( ref -> { if (!namedRefsRetrieveOptionsForReference(params, ref).isRetrieveCommitMetaForHead()) { return ref; } CommitLogEntry logEntry = fetchFromCommitLog(ctx, ref.getHash()); if (logEntry == null) { return ref; } return ImmutableReferenceInfo.builder() .from(ref) .headCommitMeta(logEntry.getMetadata()) .commitSeq(logEntry.getCommitSeq()) .build(); }); } /** * If necessary based on the given {@link GetNamedRefsParams}, updates the returned {@link * ReferenceInfo}s with the common-ancestor of the named reference and the default branch and the * number of commits behind/ahead compared to the default branch. * *

The common ancestor and/or information of commits behind/ahead is meaningless ({@code null}) * for the default branch. Both fields are also {@code null} if the named reference points to the * {@link #noAncestorHash()} (beginning of time). */ protected Stream> namedRefsWithDefaultBranchRelatedInfo( OP_CONTEXT ctx, GetNamedRefsParams params, Stream> refs, Hash defaultBranchHead) { if (defaultBranchHead == null) { // No enhancement of common ancestor and/or commits behind/ahead. return refs; } CommonAncestorState commonAncestorState = new CommonAncestorState( ctx, defaultBranchHead, params.getBranchRetrieveOptions().isComputeAheadBehind() || params.getTagRetrieveOptions().isComputeAheadBehind()); return refs.map( ref -> { if (ref.getNamedRef().equals(params.getBaseReference())) { return ref; } RetrieveOptions retrieveOptions = namedRefsRetrieveOptionsForReference(params, ref); ReferenceInfo updated = namedRefsRequiresBaseReference(retrieveOptions) ? findCommonAncestor( ctx, ref.getHash(), commonAncestorState, (diffOnFrom, hash) -> { ReferenceInfo newRef = ref; if (retrieveOptions.isComputeCommonAncestor()) { newRef = newRef.withCommonAncestor(hash); } if (retrieveOptions.isComputeAheadBehind()) { int behind = commonAncestorState.indexOf(hash); CommitsAheadBehind aheadBehind = CommitsAheadBehind.of(diffOnFrom, behind); newRef = newRef.withAheadBehind(aheadBehind); } return newRef; }) : null; return updated != null ? updated : ref; }); } /** * Convenience for {@link #hashOnRef(AutoCloseable, Hash, NamedRef, Optional, Consumer) * hashOnRef(ctx, knownHead, ref.getReference(), ref.getHashOnReference(), null)}. */ protected Hash hashOnRef( OP_CONTEXT ctx, NamedRef reference, Optional hashOnRef, Hash knownHead) throws ReferenceNotFoundException { return hashOnRef(ctx, knownHead, reference, hashOnRef, null); } /** * Ensures that {@code ref} exists and that the hash in {@code hashOnRef} exists in that * reference. * * @param ctx technical operation context * @param ref reference that must contain {@code hashOnRef} * @param knownHead current HEAD of {@code ref} * @param hashOnRef hash to verify whether it exists in {@code ref} * @param commitLogVisitor optional consumer that will receive all visited {@link * CommitLogEntry}s, can be {@code null}. * @return value of {@code hashOnRef} or, if {@code hashOnRef} is empty, {@code knownHead} * @throws ReferenceNotFoundException if either {@code ref} does not exist or {@code hashOnRef} * does not exist on {@code ref} */ protected Hash hashOnRef( OP_CONTEXT ctx, Hash knownHead, NamedRef ref, Optional hashOnRef, Consumer commitLogVisitor) throws ReferenceNotFoundException { if (hashOnRef.isPresent()) { Hash suspect = hashOnRef.get(); // If the client requests 'NO_ANCESTOR' (== beginning of time), skip the existence-check. if (suspect.equals(NO_ANCESTOR)) { if (commitLogVisitor != null) { readCommitLogStream(ctx, knownHead).forEach(commitLogVisitor); } return suspect; } Stream hashes; if (commitLogVisitor != null) { hashes = readCommitLogStream(ctx, knownHead).peek(commitLogVisitor).map(CommitLogEntry::getHash); } else { hashes = readCommitLogHashesStream(ctx, knownHead); } if (hashes.noneMatch(suspect::equals)) { throw hashNotFound(ref, suspect); } return suspect; } else { return knownHead; } } protected void validateHashExists(OP_CONTEXT ctx, Hash hash) throws ReferenceNotFoundException { if (!NO_ANCESTOR.equals(hash) && fetchFromCommitLog(ctx, hash) == null) { throw referenceNotFound(hash); } } /** Load the commit-log entry for the given hash, return {@code null}, if not found. */ @VisibleForTesting public final CommitLogEntry fetchFromCommitLog(OP_CONTEXT ctx, Hash hash) { if (hash.equals(NO_ANCESTOR)) { // Do not try to fetch NO_ANCESTOR - it won't exist. return null; } try (Traced ignore = trace("fetchFromCommitLog").tag(TAG_HASH, hash.asString())) { return doFetchFromCommitLog(ctx, hash); } } protected abstract CommitLogEntry doFetchFromCommitLog(OP_CONTEXT ctx, Hash hash); /** * Fetch multiple {@link CommitLogEntry commit-log-entries} from the commit-log. The returned list * must have exactly as many elements as in the parameter {@code hashes}. Non-existing hashes are * returned as {@code null}. */ private List fetchMultipleFromCommitLog( OP_CONTEXT ctx, List hashes, @Nonnull Function inMemoryCommits) { List result = new ArrayList<>(hashes.size()); BitSet remainingHashes = null; // Prefetch commits already available in memory. Record indexes for the missing commits to // enable placing them in the correct positions later, when they are fetched from storage. for (int i = 0; i < hashes.size(); i++) { Hash hash = hashes.get(i); if (NO_ANCESTOR.equals(hash)) { result.add(null); continue; } CommitLogEntry found = inMemoryCommits.apply(hash); if (found != null) { result.add(found); } else { if (remainingHashes == null) { remainingHashes = new BitSet(); } result.add(null); // to be replaced with storage result below remainingHashes.set(i); } } if (remainingHashes != null) { List fromStorage; try (Traced ignore = trace("fetchPageFromCommitLog") .tag(TAG_HASH, hashes.get(0).asString()) .tag(TAG_COUNT, hashes.size())) { fromStorage = doFetchMultipleFromCommitLog( ctx, remainingHashes.stream().mapToObj(hashes::get).collect(Collectors.toList())); } // Fill the gaps in the final result list. Note that fetchPageFromCommitLog must return the // list of the same size as its `remainingHashes` parameter. Iterator iter = fromStorage.iterator(); remainingHashes.stream() .forEach( i -> { result.set(i, iter.next()); }); } return result; } protected abstract List doFetchMultipleFromCommitLog( OP_CONTEXT ctx, List hashes); /** Reads from the commit-log starting at the given commit-log-hash. */ protected Stream readCommitLogStream(OP_CONTEXT ctx, Hash initialHash) throws ReferenceNotFoundException { Spliterator split = readCommitLog(ctx, initialHash, h -> null); return StreamSupport.stream(split, false); } protected Stream readCommitLogStream( OP_CONTEXT ctx, Hash initialHash, @Nonnull Function inMemoryCommits) throws ReferenceNotFoundException { Spliterator split = readCommitLog(ctx, initialHash, inMemoryCommits); return StreamSupport.stream(split, false); } protected Spliterator readCommitLog( OP_CONTEXT ctx, Hash initialHash, @Nonnull Function inMemoryCommits) throws ReferenceNotFoundException { Preconditions.checkNotNull(inMemoryCommits, "in-memory commits cannot be null"); if (NO_ANCESTOR.equals(initialHash)) { return Spliterators.emptySpliterator(); } CommitLogEntry initial = inMemoryCommits.apply(initialHash); if (initial == null) { initial = fetchFromCommitLog(ctx, initialHash); } if (initial == null) { throw referenceNotFound(initialHash); } BiFunction, List> fetcher = (c, hashes) -> fetchMultipleFromCommitLog(c, hashes, inMemoryCommits); return logFetcher(ctx, initial, fetcher, CommitLogEntry::getParents); } /** * Like {@link #readCommitLogStream(AutoCloseable, Hash)}, but only returns the {@link Hash * commit-log-entry hashes}, which can be taken from {@link CommitLogEntry#getParents()}, thus no * need to perform a read-operation against every hash. */ protected Stream readCommitLogHashesStream(OP_CONTEXT ctx, Hash initialHash) { Spliterator split = readCommitLogHashes(ctx, initialHash); return StreamSupport.stream(split, false); } protected Spliterator readCommitLogHashes(OP_CONTEXT ctx, Hash initialHash) { return logFetcher( ctx, initialHash, (c, hashes) -> hashes, hash -> { CommitLogEntry entry = fetchFromCommitLog(ctx, hash); if (entry == null) { return Collections.emptyList(); } return entry.getParents(); }); } /** * Constructs a {@link Stream} of entries for either the global-state-log or a commit-log or a * reflog entry. Use {@link #readCommitLogStream(AutoCloseable, Hash)} or the similar * implementation for the global-log or reflog entry for non-transactional adapters. */ protected Spliterator logFetcher( OP_CONTEXT ctx, T initial, BiFunction, List> fetcher, Function> nextPage) { return logFetcherCommon(ctx, Collections.singletonList(initial), fetcher, nextPage); } protected Spliterator logFetcherWithPage( OP_CONTEXT ctx, List initialPage, BiFunction, List> fetcher, Function> nextPage) { return logFetcherCommon(ctx, fetcher.apply(ctx, initialPage), fetcher, nextPage); } private Spliterator logFetcherCommon( OP_CONTEXT ctx, List initial, BiFunction, List> fetcher, Function> nextPage) { return new AbstractSpliterator(Long.MAX_VALUE, 0) { private Iterator currentBatch; private boolean eof; private T previous; @Override public boolean tryAdvance(Consumer consumer) { if (eof) { return false; } else if (currentBatch == null) { currentBatch = initial.iterator(); } else if (!currentBatch.hasNext()) { if (previous == null) { eof = true; return false; } List page = nextPage.apply(previous); previous = null; if (!page.isEmpty()) { currentBatch = fetcher.apply(ctx, page).iterator(); if (!currentBatch.hasNext()) { eof = true; return false; } } else { eof = true; return false; } } T v = currentBatch.next(); if (v != null) { consumer.accept(v); previous = v; } return true; } }; } /** * Builds a {@link CommitLogEntry} using the given values. This function also includes a {@link * KeyList}, if triggered by the values of {@code currentKeyListDistance} and {@link * DatabaseAdapterConfig#getKeyListDistance()}, so read operations may happen. */ protected CommitLogEntry buildIndividualCommit( OP_CONTEXT ctx, long timeInMicros, List parentHashes, long commitSeq, ByteString commitMeta, Iterable puts, Iterable deletes, int currentKeyListDistance, Consumer newKeyLists, @Nonnull Function inMemoryCommits) throws ReferenceNotFoundException { Hash commitHash = individualCommitHash(parentHashes, commitMeta, puts, deletes); int keyListDistance = currentKeyListDistance + 1; CommitLogEntry entry = CommitLogEntry.of( timeInMicros, commitHash, commitSeq, parentHashes, commitMeta, puts, deletes, keyListDistance, null, Collections.emptyList()); if (keyListDistance >= config.getKeyListDistance()) { entry = buildKeyList(ctx, entry, newKeyLists, inMemoryCommits); } return entry; } /** Calculate the hash for the content of a {@link CommitLogEntry}. */ @SuppressWarnings("UnstableApiUsage") protected Hash individualCommitHash( List parentHashes, ByteString commitMeta, Iterable puts, Iterable deletes) { Hasher hasher = newHasher(); hasher.putLong(COMMIT_LOG_HASH_SEED); parentHashes.forEach(h -> hasher.putBytes(h.asBytes().asReadOnlyByteBuffer())); hasher.putBytes(commitMeta.asReadOnlyByteBuffer()); puts.forEach( e -> { hashKey(hasher, e.getKey()); hasher.putString(e.getContentId().getId(), StandardCharsets.UTF_8); hasher.putBytes(e.getValue().asReadOnlyByteBuffer()); }); deletes.forEach(e -> hashKey(hasher, e)); return Hash.of(UnsafeByteOperations.unsafeWrap(hasher.hash().asBytes())); } /** Helper object for {@link #buildKeyList(AutoCloseable, CommitLogEntry, Consumer, Function)}. */ private static class KeyListBuildState { final ImmutableCommitLogEntry.Builder newCommitEntry; /** Builder for {@link CommitLogEntry#getKeyList()}. */ ImmutableKeyList.Builder embeddedBuilder = ImmutableKeyList.builder(); /** Builder for {@link KeyListEntity}. */ ImmutableKeyList.Builder currentKeyList; /** Already built {@link KeyListEntity}s. */ List newKeyListEntities = new ArrayList<>(); /** Flag whether {@link CommitLogEntry#getKeyList()} is being filled. */ boolean embedded = true; /** Current size of either the {@link CommitLogEntry} or current {@link KeyListEntity}. */ int currentSize; KeyListBuildState(int initialSize, ImmutableCommitLogEntry.Builder newCommitEntry) { this.currentSize = initialSize; this.newCommitEntry = newCommitEntry; } void finishKeyListEntity() { Hash id = randomHash(); newKeyListEntities.add(KeyListEntity.of(id, currentKeyList.build())); newCommitEntry.addKeyListsIds(id); } void newKeyListEntity() { currentSize = 0; currentKeyList = ImmutableKeyList.builder(); } void addToKeyListEntity(KeyListEntry keyListEntry, int keyTypeSize) { currentSize += keyTypeSize; currentKeyList.addKeys(keyListEntry); } void addToEmbedded(KeyListEntry keyListEntry, int keyTypeSize) { currentSize += keyTypeSize; embeddedBuilder.addKeys(keyListEntry); } } /** * Adds a complete key-list to the given {@link CommitLogEntry}, will read from the database. * *

The implementation fills {@link CommitLogEntry#getKeyList()} with the most recently updated * {@link Key}s. * *

If the calculated size of the database-object/row gets larger than {@link * DatabaseAdapterConfig#getMaxKeyListSize()}, the next {@link Key}s will be added to new {@link * KeyListEntity}s, each with a maximum size of {@link DatabaseAdapterConfig#getMaxKeyListSize()}. * *

The current implementation fetches all keys and "blindly" populated {@link * CommitLogEntry#getKeyList()} and nested {@link KeyListEntity} via {@link * CommitLogEntry#getKeyListsIds()}. So this implementation does not yet reuse previous {@link * KeyListEntity}s. A follow-up improvement should check if already existing {@link * KeyListEntity}s contain the same keys. This proposed optimization should be accompanied by an * optimized read of the keys: for example, if the set of changed keys only affects {@link * CommitLogEntry#getKeyList()} but not the keys via {@link KeyListEntity}, it is just unnecessary * to both read and re-write those rows for {@link KeyListEntity}. */ protected CommitLogEntry buildKeyList( OP_CONTEXT ctx, CommitLogEntry unwrittenEntry, Consumer newKeyLists, @Nonnull Function inMemoryCommits) throws ReferenceNotFoundException { // Read commit-log until the previous persisted key-list Hash startHash = unwrittenEntry.getParents().get(0); // Return the new commit-log-entry with the complete-key-list ImmutableCommitLogEntry.Builder newCommitEntry = ImmutableCommitLogEntry.builder().from(unwrittenEntry).keyListDistance(0); KeyListBuildState buildState = new KeyListBuildState(entitySize(unwrittenEntry), newCommitEntry); Set keysToEnhanceWithCommitId = new HashSet<>(); Consumer addKeyListEntry = keyListEntry -> { int keyTypeSize = entitySize(keyListEntry); if (buildState.embedded) { // filling the embedded key-list in CommitLogEntry if (buildState.currentSize + keyTypeSize < maxEntitySize(config.getMaxKeyListSize())) { // CommitLogEntry.keyList still has room buildState.addToEmbedded(keyListEntry, keyTypeSize); } else { // CommitLogEntry.keyList is "full", switch to the first KeyListEntity buildState.embedded = false; buildState.newKeyListEntity(); buildState.addToKeyListEntity(keyListEntry, keyTypeSize); } } else { // filling linked key-lists via CommitLogEntry.keyListIds if (buildState.currentSize + keyTypeSize > maxEntitySize(config.getMaxKeyListEntitySize())) { // current KeyListEntity is "full", switch to a new one buildState.finishKeyListEntity(); buildState.newKeyListEntity(); } buildState.addToKeyListEntity(keyListEntry, keyTypeSize); } }; keysForCommitEntry(ctx, startHash, null, inMemoryCommits) .forEach( keyListEntry -> { if (keyListEntry.getCommitId() == null) { keysToEnhanceWithCommitId.add(keyListEntry.getKey()); } else { addKeyListEntry.accept(keyListEntry); } }); if (!keysToEnhanceWithCommitId.isEmpty()) { // Found KeyListEntry w/o commitId, need to add that information. Spliterator clSplit = readCommitLog(ctx, startHash, inMemoryCommits); while (true) { boolean more = clSplit.tryAdvance( e -> { e.getDeletes().forEach(keysToEnhanceWithCommitId::remove); for (KeyWithBytes put : e.getPuts()) { if (keysToEnhanceWithCommitId.remove(put.getKey())) { KeyListEntry entry = KeyListEntry.of( put.getKey(), put.getContentId(), put.getType(), e.getHash()); addKeyListEntry.accept(entry); } } }); if (!more || keysToEnhanceWithCommitId.isEmpty()) { break; } } } // If there's an "unfinished" KeyListEntity, build it. if (buildState.currentKeyList != null) { buildState.finishKeyListEntity(); } // Inform the (CAS)-op-loop about the IDs of the KeyListEntities being optimistically written. buildState.newKeyListEntities.stream().map(KeyListEntity::getId).forEach(newKeyLists); // Write the new KeyListEntities if (!buildState.newKeyListEntities.isEmpty()) { writeKeyListEntities(ctx, buildState.newKeyListEntities); } // Return the new commit-log-entry with the complete-key-list return newCommitEntry.keyList(buildState.embeddedBuilder.build()).build(); } protected int maxEntitySize(int value) { return value; } /** Calculate the expected size of the given {@link CommitLogEntry} in the database. */ protected abstract int entitySize(CommitLogEntry entry); /** Calculate the expected size of the given {@link CommitLogEntry} in the database. */ protected abstract int entitySize(KeyListEntry entry); /** * If the current HEAD of the target branch for a commit/transplant/merge is not equal to the * expected/reference HEAD, verify that there is no conflict, like keys in the operations of the * commit(s) contained in keys of the commits 'expectedHead (excluding) .. currentHead * (including)'. * * @return commit log entry at {@code branchHead} */ protected CommitLogEntry checkForModifiedKeysBetweenExpectedAndCurrentCommit( OP_CONTEXT ctx, CommitParams commitParams, Hash branchHead, List mismatches) throws ReferenceNotFoundException { CommitLogEntry commitAtHead = null; if (commitParams.getExpectedHead().isPresent()) { Hash expectedHead = commitParams.getExpectedHead().get(); if (!expectedHead.equals(branchHead)) { Set operationKeys = Sets.newHashSetWithExpectedSize( commitParams.getDeletes().size() + commitParams.getUnchanged().size() + commitParams.getPuts().size()); operationKeys.addAll(commitParams.getDeletes()); operationKeys.addAll(commitParams.getUnchanged()); commitParams.getPuts().stream().map(KeyWithBytes::getKey).forEach(operationKeys::add); ConflictingKeyCheckResult conflictingKeyCheckResult = checkConflictingKeysForCommit( ctx, branchHead, expectedHead, operationKeys, mismatches::add); // If the expectedHead is the special value NO_ANCESTOR, which is not persisted, // ignore the fact that it has not been seen. Otherwise, raise a // ReferenceNotFoundException that the expected-hash does not exist on the target // branch. if (!conflictingKeyCheckResult.sinceSeen && !expectedHead.equals(NO_ANCESTOR)) { throw hashNotFound(commitParams.getToBranch(), expectedHead); } commitAtHead = conflictingKeyCheckResult.headCommit; } } if (commitAtHead == null) { commitAtHead = fetchFromCommitLog(ctx, branchHead); } return commitAtHead; } /** Retrieve the content-keys and their types for the commit-log-entry with the given hash. */ protected Stream keysForCommitEntry( OP_CONTEXT ctx, Hash hash, KeyFilterPredicate keyFilter) throws ReferenceNotFoundException { return keysForCommitEntry(ctx, hash, keyFilter, h -> null); } /** Retrieve the content-keys and their types for the commit-log-entry with the given hash. */ protected Stream keysForCommitEntry( OP_CONTEXT ctx, Hash hash, KeyFilterPredicate keyFilter, @Nonnull Function inMemoryCommits) throws ReferenceNotFoundException { // walk the commit-logs in reverse order - starting with the last persisted key-list Set seen = new HashSet<>(); Predicate predicate = keyListEntry -> seen.add(keyListEntry.getKey()); if (keyFilter != null) { predicate = predicate.and(kt -> keyFilter.check(kt.getKey(), kt.getContentId(), kt.getType())); } Predicate keyPredicate = predicate; Stream log = readCommitLogStream(ctx, hash, inMemoryCommits); log = takeUntilIncludeLast(log, e -> e.getKeyList() != null); return log.flatMap( e -> { // Add CommitLogEntry.deletes to "seen" so these keys won't be returned seen.addAll(e.getDeletes()); // Return from CommitLogEntry.puts first Stream stream = e.getPuts().stream() .map( put -> KeyListEntry.of( put.getKey(), put.getContentId(), put.getType(), e.getHash())) .filter(keyPredicate); if (e.getKeyList() != null) { // Return from CommitLogEntry.keyList after the keys in CommitLogEntry.puts Stream embedded = e.getKeyList().getKeys().stream().filter(keyPredicate); stream = Stream.concat(stream, embedded); if (!e.getKeyListsIds().isEmpty()) { // If there are nested key-lists, retrieve those lazily and add the keys from these Stream entities = Stream.of(e.getKeyListsIds()) .flatMap(ids -> fetchKeyLists(ctx, ids)) .map(KeyListEntity::getKeys) .map(KeyList::getKeys) .flatMap(Collection::stream) .filter(keyPredicate); stream = Stream.concat(stream, entities); } } return stream; }); } /** * Fetch the global-state and per-ref content for the given {@link Key}s and {@link Hash * commitSha}. Non-existing keys must not be present in the returned map. */ protected Map> fetchValues( OP_CONTEXT ctx, Hash refHead, Collection keys, KeyFilterPredicate keyFilter) throws ReferenceNotFoundException { Set remainingKeys = new HashSet<>(keys); Map nonGlobal = new HashMap<>(); Map keyToContentIds = new HashMap<>(); Set contentIdsForGlobal = new HashSet<>(); Consumer commitLogEntryHandler = entry -> { // remove deleted keys from keys to look for entry.getDeletes().forEach(remainingKeys::remove); // handle put operations for (KeyWithBytes put : entry.getPuts()) { if (!remainingKeys.remove(put.getKey())) { continue; } if (!keyFilter.check(put.getKey(), put.getContentId(), put.getType())) { continue; } nonGlobal.put(put.getKey(), put.getValue()); keyToContentIds.put(put.getKey(), put.getContentId()); if (storeWorker.requiresGlobalState(put.getValue())) { contentIdsForGlobal.add(put.getContentId()); } } }; // The algorithm is a bit complex, but not horribly complex. // // The commit-log `Stream` in the following try-with-resources fetches the commit-log until // all requested keys have been seen. // // When a commit-log-entry with key-lists is encountered, use the key-lists to determine if // and how the remaining keys need to be retrieved. // - If any of the requested keys is not in the key-lists, ignore it - it doesn't exist. // - If the `KeyListEntry` for a requested key contains the commit-ID, use the commit-ID to // directly access the commit that contains the `Put` operation for that key. // // Both the "outer" `Stream` and the result of the latter case (list of commit-log-entries) // share common functionality implemented in the function `commitLogEntryHandler`, which // handles the 'Put` operations. AtomicBoolean keyListProcessed = new AtomicBoolean(); try (Stream log = takeUntilExcludeLast(readCommitLogStream(ctx, refHead), e -> remainingKeys.isEmpty())) { log.forEach( entry -> { commitLogEntryHandler.accept(entry); if (entry.getKeyList() != null && keyListProcessed.compareAndSet(false, true)) { // CommitLogEntry has a KeyList. // All keys in 'remainingKeys', that are _not_ in the KeyList(s), can be removed, // because at this point we know that these do not exist. // // But do only process the "newest" key-list - older key lists are irrelevant. // KeyListEntry written before Nessie 0.22.0 do not have the commit-ID field set, // which means the remaining commit-log needs to be searched for the commit that // added the key - but processing older key-lists "on the way" makes no sense, // because those will not have the key either. Set remainingInKeyList = new HashSet<>(); try (Stream keyLists = Stream.concat( Stream.of(entry.getKeyList()), fetchKeyLists(ctx, entry.getKeyListsIds()).map(KeyListEntity::getKeys))) { keyLists .flatMap(keyList -> keyList.getKeys().stream()) .filter(keyListEntry -> remainingKeys.contains(keyListEntry.getKey())) .forEach(remainingInKeyList::add); } if (!remainingInKeyList.isEmpty()) { List commitLogEntries = fetchMultipleFromCommitLog( ctx, remainingInKeyList.stream() .map(KeyListEntry::getCommitId) .filter(Objects::nonNull) .distinct() .collect(Collectors.toList()), h -> null); commitLogEntries.forEach(commitLogEntryHandler); } remainingKeys.retainAll( remainingInKeyList.stream() .map(KeyListEntry::getKey) .collect(Collectors.toSet())); } }); } Map globals = contentIdsForGlobal.isEmpty() ? Collections.emptyMap() : fetchGlobalStates(ctx, contentIdsForGlobal); return nonGlobal.entrySet().stream() .collect( Collectors.toMap( Entry::getKey, e -> ContentAndState.of( e.getValue(), globals.get(keyToContentIds.get(e.getKey()))))); } /** * Fetches the global-state information for the given content-ids. * * @param ctx technical context * @param contentIds the content-ids to fetch * @return map of content-id to state */ protected final Map fetchGlobalStates( OP_CONTEXT ctx, Set contentIds) throws ReferenceNotFoundException { try (Traced ignore = trace("fetchGlobalStates").tag(TAG_COUNT, contentIds.size())) { return doFetchGlobalStates(ctx, contentIds); } } protected abstract Map doFetchGlobalStates( OP_CONTEXT ctx, Set contentIds) throws ReferenceNotFoundException; @VisibleForTesting public final Stream fetchKeyLists(OP_CONTEXT ctx, List keyListsIds) { if (keyListsIds.isEmpty()) { return Stream.empty(); } try (Traced ignore = trace("fetchKeyLists").tag(TAG_COUNT, keyListsIds.size())) { return doFetchKeyLists(ctx, keyListsIds); } } protected abstract Stream doFetchKeyLists(OP_CONTEXT ctx, List keyListsIds); /** * Write a new commit-entry, the given commit entry is to be persisted as is. All values of the * given {@link CommitLogEntry} can be considered valid and consistent. * *

Implementations however can enforce strict consistency checks/guarantees, like a best-effort * approach to prevent hash-collisions but without any other consistency checks/guarantees. */ protected final void writeIndividualCommit(OP_CONTEXT ctx, CommitLogEntry entry) throws ReferenceConflictException { try (Traced ignore = trace("writeIndividualCommit")) { doWriteIndividualCommit(ctx, entry); } } protected abstract void doWriteIndividualCommit(OP_CONTEXT ctx, CommitLogEntry entry) throws ReferenceConflictException; /** * Write multiple new commit-entries, the given commit entries are to be persisted as is. All * values of the * given {@link CommitLogEntry} can be considered valid and consistent. * *

Implementations however can enforce strict consistency checks/guarantees, like a best-effort * approach to prevent hash-collisions but without any other consistency checks/guarantees. */ protected final void writeMultipleCommits(OP_CONTEXT ctx, List entries) throws ReferenceConflictException { try (Traced ignore = trace("writeMultipleCommits").tag(TAG_COUNT, entries.size())) { doWriteMultipleCommits(ctx, entries); } } protected abstract void doWriteMultipleCommits(OP_CONTEXT ctx, List entries) throws ReferenceConflictException; @VisibleForTesting public final void writeKeyListEntities(OP_CONTEXT ctx, List newKeyListEntities) { try (Traced ignore = trace("writeKeyListEntities").tag(TAG_COUNT, newKeyListEntities.size())) { doWriteKeyListEntities(ctx, newKeyListEntities); } } protected abstract void doWriteKeyListEntities( OP_CONTEXT ctx, List newKeyListEntities); protected static final class ConflictingKeyCheckResult { private boolean sinceSeen; private CommitLogEntry headCommit; public boolean isSinceSeen() { return sinceSeen; } public CommitLogEntry getHeadCommit() { return headCommit; } } /** * Check whether the commits in the range {@code sinceCommitExcluding] .. [upToCommitIncluding} * contain any of the given {@link Key}s. * *

Conflicts are reported via {@code mismatches}. */ protected ConflictingKeyCheckResult checkConflictingKeysForCommit( OP_CONTEXT ctx, Hash upToCommitIncluding, Hash sinceCommitExcluding, Set keys, Consumer mismatches) throws ReferenceNotFoundException { ConflictingKeyCheckResult result = new ConflictingKeyCheckResult(); Stream log = readCommitLogStream(ctx, upToCommitIncluding); log = takeUntilExcludeLast( log, e -> { if (e.getHash().equals(upToCommitIncluding)) { result.headCommit = e; } if (e.getHash().equals(sinceCommitExcluding)) { result.sinceSeen = true; return true; } return false; }); Set handled = new HashSet<>(); log.forEach( e -> { e.getPuts() .forEach( a -> { if (keys.contains(a.getKey()) && handled.add(a.getKey())) { mismatches.accept( String.format( "Key '%s' has conflicting put-operation from commit '%s'.", a.getKey(), e.getHash().asString())); } }); e.getDeletes() .forEach( a -> { if (keys.contains(a) && handled.add(a)) { mismatches.accept( String.format( "Key '%s' has conflicting delete-operation from commit '%s'.", a, e.getHash().asString())); } }); }); return result; } protected final class CommonAncestorState { final Iterator toLog; final List toCommitHashesList; final Set toCommitHashes = new HashSet<>(); public CommonAncestorState(OP_CONTEXT ctx, Hash toHead, boolean trackCount) { this.toLog = Spliterators.iterator(readCommitLogHashes(ctx, toHead)); this.toCommitHashesList = trackCount ? new ArrayList<>() : null; } boolean fetchNext() { if (toLog.hasNext()) { Hash hash = toLog.next(); toCommitHashes.add(hash); if (toCommitHashesList != null) { toCommitHashesList.add(hash); } return true; } return false; } public boolean contains(Hash candidate) { return toCommitHashes.contains(candidate); } public int indexOf(Hash hash) { return toCommitHashesList.indexOf(hash); } } /** * Finds the common-ancestor of two commit-log-entries. If no common-ancestor is found, throws a * {@link ReferenceConflictException} or. Otherwise, this method returns the hash of the * common-ancestor. */ protected Hash findCommonAncestor(OP_CONTEXT ctx, Hash from, NamedRef toBranch, Hash toHead) throws ReferenceConflictException { // TODO this implementation requires guardrails: // max number of "to"-commits to fetch, max number of "from"-commits to fetch, // both impact the cost (CPU, memory, I/O) of a merge operation. CommonAncestorState commonAncestorState = new CommonAncestorState(ctx, toHead, false); Hash commonAncestorHash = findCommonAncestor(ctx, from, commonAncestorState, (dist, hash) -> hash); if (commonAncestorHash == null) { throw new ReferenceConflictException( String.format( "No common ancestor found for merge of '%s' into branch '%s'", from, toBranch.getName())); } return commonAncestorHash; } protected R findCommonAncestor( OP_CONTEXT ctx, Hash from, CommonAncestorState state, BiFunction result) { Iterator fromLog = Spliterators.iterator(readCommitLogHashes(ctx, from)); List fromCommitHashes = new ArrayList<>(); while (true) { boolean anyFetched = false; for (int i = 0; i < config.getParentsPerCommit(); i++) { if (state.fetchNext()) { anyFetched = true; } if (fromLog.hasNext()) { fromCommitHashes.add(fromLog.next()); anyFetched = true; } } if (!anyFetched) { return null; } for (int diffOnFrom = 0; diffOnFrom < fromCommitHashes.size(); diffOnFrom++) { Hash f = fromCommitHashes.get(diffOnFrom); if (state.contains(f)) { return result.apply(diffOnFrom, f); } } } } /** * For merge/transplant, verifies that the given commits do not touch any of the given keys. * * @param commitsChronological list of commit-log-entries, in order of commit-operations, * chronological order */ protected void checkForKeyCollisions( OP_CONTEXT ctx, Hash refHead, Set keysTouchedOnTarget, List commitsChronological) throws ReferenceConflictException, ReferenceNotFoundException { Set keyCollisions = new HashSet<>(); for (int i = commitsChronological.size() - 1; i >= 0; i--) { CommitLogEntry sourceCommit = commitsChronological.get(i); Stream.concat( sourceCommit.getPuts().stream().map(KeyWithBytes::getKey), sourceCommit.getDeletes().stream()) .filter(keysTouchedOnTarget::contains) .forEach(keyCollisions::add); } if (!keyCollisions.isEmpty()) { removeKeyCollisionsForNamespaces( ctx, refHead, commitsChronological.get(commitsChronological.size() - 1).getHash(), keyCollisions); if (!keyCollisions.isEmpty()) { throw new ReferenceConflictException( String.format( "The following keys have been changed in conflict: %s", keyCollisions.stream() .map(k -> String.format("'%s'", k.toString())) .collect(Collectors.joining(", ")))); } } } /** * If any key collision was found, we need to check whether the key collision was happening on a * Namespace and if so, remove that key collision, since Namespaces can be merged/transplanted * without problems. * * @param ctx The context * @param hashFromTarget The hash from the target branch * @param hashFromSource The hash from the source branch * @param keyCollisions The found key collisions * @throws ReferenceNotFoundException If the given reference could not be found */ private void removeKeyCollisionsForNamespaces( OP_CONTEXT ctx, Hash hashFromTarget, Hash hashFromSource, Set keyCollisions) throws ReferenceNotFoundException { Predicate>> isNamespace = e -> storeWorker.isNamespace(e.getValue().getRefState()); Set namespacesOnTarget = fetchValues(ctx, hashFromTarget, keyCollisions, ALLOW_ALL).entrySet().stream() .filter(isNamespace) .map(Entry::getKey) .collect(Collectors.toSet()); // this will be an implicit set intersection between the namespaces on source & target Set intersection = fetchValues(ctx, hashFromSource, namespacesOnTarget, ALLOW_ALL).entrySet().stream() .filter(isNamespace) .map(Entry::getKey) .collect(Collectors.toSet()); // remove all keys related to namespaces from the existing collisions intersection.forEach(keyCollisions::remove); } /** * For merge/transplant, collect the content-keys that were modified in the given list of entries. * * @param commitsReverseChronological list of commit-log-entries, in reverse order of * commit-operations, reverse chronological order */ protected Set collectModifiedKeys(List commitsReverseChronological) { Set keysTouchedOnTarget = new HashSet<>(); commitsReverseChronological.forEach( e -> { e.getPuts().stream().map(KeyWithBytes::getKey).forEach(keysTouchedOnTarget::add); e.getDeletes().forEach(keysTouchedOnTarget::remove); }); return keysTouchedOnTarget; } /** * For merge/transplant, applies one squashed commit derived from the given commits onto the * target-hash. */ protected Hash squashCommits( OP_CONTEXT ctx, long timeInMicros, Hash toHead, List commitsToMergeChronological, Consumer newKeyLists, MetadataRewriter rewriteMetadata, Predicate includeKeyPredicate) throws ReferenceConflictException, ReferenceNotFoundException { List commitMeta = new ArrayList<>(); Map puts = new HashMap<>(); Set deletes = new HashSet<>(); for (int i = commitsToMergeChronological.size() - 1; i >= 0; i--) { CommitLogEntry source = commitsToMergeChronological.get(i); for (Key delete : source.getDeletes()) { if (includeKeyPredicate.test(delete)) { deletes.add(delete); puts.remove(delete); } } for (KeyWithBytes put : source.getPuts()) { if (includeKeyPredicate.test(put.getKey())) { deletes.remove(put.getKey()); puts.put(put.getKey(), put); } } commitMeta.add(source.getMetadata()); } if (puts.isEmpty() && deletes.isEmpty()) { // Copied commit will not contain any operation, skip. return toHead; } ByteString newCommitMeta = rewriteMetadata.squash(commitMeta); CommitLogEntry targetHeadCommit = fetchFromCommitLog(ctx, toHead); int parentsPerCommit = config.getParentsPerCommit(); List parents = new ArrayList<>(parentsPerCommit); parents.add(toHead); long commitSeq; int keyListDistance; if (targetHeadCommit != null) { List p = targetHeadCommit.getParents(); parents.addAll(p.subList(0, Math.min(p.size(), parentsPerCommit - 1))); commitSeq = targetHeadCommit.getCommitSeq() + 1; keyListDistance = targetHeadCommit.getKeyListDistance(); } else { commitSeq = 1; keyListDistance = 0; } CommitLogEntry squashedCommit = buildIndividualCommit( ctx, timeInMicros, parents, commitSeq, newCommitMeta, puts.values(), deletes, keyListDistance, newKeyLists, h -> null); writeIndividualCommit(ctx, squashedCommit); return squashedCommit.getHash(); } /** For merge/transplant, applies the given commits onto the target-hash. */ protected Hash copyCommits( OP_CONTEXT ctx, long timeInMicros, Hash targetHead, List commitsChronological, Consumer newKeyLists, MetadataRewriter rewriteMetadata, Predicate includeKeyPredicate) throws ReferenceNotFoundException { int parentsPerCommit = config.getParentsPerCommit(); List parents = new ArrayList<>(parentsPerCommit); CommitLogEntry targetHeadCommit = fetchFromCommitLog(ctx, targetHead); long commitSeq; if (targetHeadCommit != null) { parents.addAll(targetHeadCommit.getParents()); commitSeq = targetHeadCommit.getCommitSeq() + 1; } else { commitSeq = 1L; } int keyListDistance = targetHeadCommit != null ? targetHeadCommit.getKeyListDistance() : 0; Map unwrittenCommits = new HashMap<>(); // Rewrite commits to transplant and store those in 'commitsToTransplantReverse' for (int i = commitsChronological.size() - 1; i >= 0; i--, commitSeq++) { CommitLogEntry sourceCommit = commitsChronological.get(i); List puts = sourceCommit.getPuts().stream() .filter(p -> includeKeyPredicate.test(p.getKey())) .collect(Collectors.toList()); List deletes = sourceCommit.getDeletes().stream() .filter(includeKeyPredicate) .collect(Collectors.toList()); if (puts.isEmpty() && deletes.isEmpty()) { // Copied commit will not contain any operation, skip. commitsChronological.remove(i); continue; } while (parents.size() > parentsPerCommit - 1) { parents.remove(parentsPerCommit - 1); } if (parents.isEmpty()) { parents.add(targetHead); } else { parents.add(0, targetHead); } ByteString updatedMetadata = rewriteMetadata.rewriteSingle(sourceCommit.getMetadata()); CommitLogEntry newEntry = buildIndividualCommit( ctx, timeInMicros, parents, commitSeq, updatedMetadata, puts, deletes, keyListDistance, newKeyLists, unwrittenCommits::get); keyListDistance = newEntry.getKeyListDistance(); unwrittenCommits.put(newEntry.getHash(), newEntry); if (!newEntry.getHash().equals(sourceCommit.getHash())) { commitsChronological.set(i, newEntry); } else { // Newly built CommitLogEntry is equal to the CommitLogEntry to transplant. // This can happen, if the commit to transplant has NO_ANCESTOR as its parent. commitsChronological.remove(i); } targetHead = newEntry.getHash(); } return targetHead; } /** * Verifies that the current global-states match the {@code expectedStates}, produces human * readable messages for the violations. */ protected void checkExpectedGlobalStates( OP_CONTEXT ctx, CommitParams commitParams, Consumer mismatches) throws ReferenceNotFoundException { Map> expectedStates = commitParams.getExpectedStates(); if (expectedStates.isEmpty()) { return; } Map globalStates = fetchGlobalStates(ctx, expectedStates.keySet()); for (Entry> expectedState : expectedStates.entrySet()) { ByteString currentState = globalStates.get(expectedState.getKey()); if (currentState == null) { if (expectedState.getValue().isPresent()) { mismatches.accept( String.format( "No current global-state for content-id '%s'.", expectedState.getKey())); } } else { if (!expectedState.getValue().isPresent()) { // This happens, when a table's being created on a branch, but that table already exists. mismatches.accept( String.format( "Global-state for content-id '%s' already exists.", expectedState.getKey())); } else if (!currentState.equals(expectedState.getValue().get())) { mismatches.accept( String.format( "Mismatch in global-state for content-id '%s'.", expectedState.getKey())); } } } } /** Load the refLog entry for the given hash, return {@code null}, if not found. */ protected final RefLog fetchFromRefLog(OP_CONTEXT ctx, Hash refLogId) { try (Traced ignore = trace("fetchFromRefLog").tag(TAG_HASH, refLogId != null ? refLogId.asString() : "HEAD")) { return doFetchFromRefLog(ctx, refLogId); } } protected abstract RefLog doFetchFromRefLog(OP_CONTEXT ctx, Hash refLogId); /** * Fetch multiple {@link RefLog refLog-entries} from the refLog. The returned list must have * exactly as many elements as in the parameter {@code hashes}. Non-existing hashes are returned * as {@code null}. */ protected final List fetchPageFromRefLog(OP_CONTEXT ctx, List hashes) { if (hashes.isEmpty()) { return Collections.emptyList(); } try (Traced ignore = trace("fetchPageFromRefLog") .tag(TAG_HASH, hashes.get(0).asString()) .tag(TAG_COUNT, hashes.size())) { return doFetchPageFromRefLog(ctx, hashes); } } protected abstract List doFetchPageFromRefLog(OP_CONTEXT ctx, List hashes); /** Reads from the refLog starting at the given refLog-hash. */ protected Stream readRefLogStream(OP_CONTEXT ctx, Hash initialHash) throws RefLogNotFoundException { Spliterator split = readRefLog(ctx, initialHash); return StreamSupport.stream(split, false); } protected Spliterator readRefLog(OP_CONTEXT ctx, Hash initialHash) throws RefLogNotFoundException { if (NO_ANCESTOR.equals(initialHash)) { return Spliterators.emptySpliterator(); } RefLog initial = fetchFromRefLog(ctx, initialHash); if (initial == null) { throw RefLogNotFoundException.forRefLogId(initialHash.asString()); } return logFetcher(ctx, initial, this::fetchPageFromRefLog, RefLog::getParents); } protected void tryLoopStateCompletion(@Nonnull Boolean success, TryLoopState state) { tryLoopFinished( success ? "success" : "fail", state.getRetries(), state.getDuration(NANOSECONDS)); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy