All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.projectnessie.versioned.persist.adapter.spi.AbstractDatabaseAdapter Maven / Gradle / Ivy

There is a newer version: 0.59.0
Show newest version
/*
 * Copyright (C) 2020 Dremio
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.projectnessie.versioned.persist.adapter.spi;

import static java.util.Collections.emptyList;
import static java.util.Collections.singletonList;
import static java.util.concurrent.TimeUnit.NANOSECONDS;
import static org.projectnessie.versioned.persist.adapter.KeyFilterPredicate.ALLOW_ALL;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterMetrics.tryLoopFinished;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.hashKey;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.hashNotFound;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.newHasher;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.referenceNotFound;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.takeUntilExcludeLast;
import static org.projectnessie.versioned.persist.adapter.spi.DatabaseAdapterUtil.takeUntilIncludeLast;
import static org.projectnessie.versioned.persist.adapter.spi.Traced.trace;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Sets;
import com.google.common.hash.Hasher;
import com.google.errorprone.annotations.MustBeClosed;
import com.google.protobuf.ByteString;
import com.google.protobuf.UnsafeByteOperations;
import io.opentracing.Span;
import io.opentracing.Tracer;
import io.opentracing.log.Fields;
import io.opentracing.tag.Tags;
import io.opentracing.util.GlobalTracer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.Spliterators.AbstractSpliterator;
import java.util.concurrent.Callable;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import java.util.stream.IntStream;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import javax.annotation.Nonnull;
import org.projectnessie.model.Content;
import org.projectnessie.versioned.BranchName;
import org.projectnessie.versioned.ContentAttachment;
import org.projectnessie.versioned.Diff;
import org.projectnessie.versioned.GetNamedRefsParams;
import org.projectnessie.versioned.GetNamedRefsParams.RetrieveOptions;
import org.projectnessie.versioned.Hash;
import org.projectnessie.versioned.ImmutableKeyDetails;
import org.projectnessie.versioned.ImmutableMergeResult;
import org.projectnessie.versioned.ImmutableReferenceInfo;
import org.projectnessie.versioned.Key;
import org.projectnessie.versioned.MergeConflictException;
import org.projectnessie.versioned.MergeResult;
import org.projectnessie.versioned.MergeResult.ConflictType;
import org.projectnessie.versioned.MergeResult.KeyDetails;
import org.projectnessie.versioned.MergeType;
import org.projectnessie.versioned.MetadataRewriter;
import org.projectnessie.versioned.NamedRef;
import org.projectnessie.versioned.RefLogNotFoundException;
import org.projectnessie.versioned.ReferenceConflictException;
import org.projectnessie.versioned.ReferenceInfo;
import org.projectnessie.versioned.ReferenceInfo.CommitsAheadBehind;
import org.projectnessie.versioned.ReferenceNotFoundException;
import org.projectnessie.versioned.StoreWorker;
import org.projectnessie.versioned.TagName;
import org.projectnessie.versioned.persist.adapter.CommitLogEntry;
import org.projectnessie.versioned.persist.adapter.CommitLogEntry.KeyListVariant;
import org.projectnessie.versioned.persist.adapter.CommitParams;
import org.projectnessie.versioned.persist.adapter.ContentAndState;
import org.projectnessie.versioned.persist.adapter.ContentId;
import org.projectnessie.versioned.persist.adapter.DatabaseAdapter;
import org.projectnessie.versioned.persist.adapter.DatabaseAdapterConfig;
import org.projectnessie.versioned.persist.adapter.Difference;
import org.projectnessie.versioned.persist.adapter.ImmutableCommitLogEntry;
import org.projectnessie.versioned.persist.adapter.KeyFilterPredicate;
import org.projectnessie.versioned.persist.adapter.KeyList;
import org.projectnessie.versioned.persist.adapter.KeyListEntity;
import org.projectnessie.versioned.persist.adapter.KeyListEntry;
import org.projectnessie.versioned.persist.adapter.KeyWithBytes;
import org.projectnessie.versioned.persist.adapter.MergeParams;
import org.projectnessie.versioned.persist.adapter.MetadataRewriteParams;
import org.projectnessie.versioned.persist.adapter.RefLog;
import org.projectnessie.versioned.persist.adapter.TransplantParams;
import org.projectnessie.versioned.persist.adapter.events.AdapterEvent;
import org.projectnessie.versioned.persist.adapter.events.AdapterEventConsumer;
import org.projectnessie.versioned.store.DefaultStoreWorker;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Contains all the database-independent logic for a Database-adapter.
 *
 * 

This class does not implement everything from {@link * org.projectnessie.versioned.persist.adapter.DatabaseAdapter}. * *

Implementations must consider that production environments may use instances of this class in * JAX-RS {@code RequestScope}, which means that it must be very cheap to create new instances of * the implementations. * *

Managed resources like a connection-pool must be managed outside of {@link * AbstractDatabaseAdapter} implementations. The recommended way to "inject" such managed resources * into short-lived {@link AbstractDatabaseAdapter} implementations is via a special configuration * attribute. * * @param context for each operation, so for each operation in {@link * org.projectnessie.versioned.persist.adapter.DatabaseAdapter} that requires database access. * For example, used to have one "borrowed" database connection per database-adapter operation. * @param configuration interface type for the concrete implementation */ public abstract class AbstractDatabaseAdapter< OP_CONTEXT extends AutoCloseable, CONFIG extends DatabaseAdapterConfig> implements DatabaseAdapter { private static final Logger LOGGER = LoggerFactory.getLogger(AbstractDatabaseAdapter.class); protected static final String TAG_HASH = "hash"; protected static final String TAG_COUNT = "count"; protected final CONFIG config; protected static final StoreWorker STORE_WORKER = DefaultStoreWorker.instance(); private final AdapterEventConsumer eventConsumer; @SuppressWarnings("UnstableApiUsage") public static final Hash NO_ANCESTOR = Hash.of( UnsafeByteOperations.unsafeWrap( newHasher().putString("empty", StandardCharsets.UTF_8).hash().asBytes())); protected static long COMMIT_LOG_HASH_SEED = 946928273206945677L; protected AbstractDatabaseAdapter(CONFIG config, AdapterEventConsumer eventConsumer) { Objects.requireNonNull(config, "config parameter must not be null"); this.config = config; this.eventConsumer = eventConsumer; } @Override public CONFIG getConfig() { return config; } @VisibleForTesting public AdapterEventConsumer getEventConsumer() { return eventConsumer; } @VisibleForTesting public abstract OP_CONTEXT borrowConnection(); @Override public Hash noAncestorHash() { return NO_ANCESTOR; } @Override public Stream fetchCommitLogEntries(Stream hashes) { OP_CONTEXT ctx = borrowConnection(); BatchSpliterator commitFetcher = new BatchSpliterator<>( config.getParentsPerCommit(), hashes, h -> fetchMultipleFromCommitLog(ctx, h, ignore -> null).spliterator(), 0); return StreamSupport.stream(commitFetcher, false) .onClose( () -> { try { ctx.close(); } catch (Exception e) { throw new RuntimeException(e); } }); } @Override public CommitLogEntry rebuildKeyList( CommitLogEntry entry, @Nonnull Function inMemoryCommits) throws ReferenceNotFoundException { try (OP_CONTEXT ctx = borrowConnection()) { return buildKeyList(ctx, entry, h -> {}, inMemoryCommits); } catch (ReferenceNotFoundException e) { throw e; } catch (Exception e) { throw new RuntimeException(e); } } // ///////////////////////////////////////////////////////////////////////////////////////////// // DatabaseAdapter subclass API (protected) // ///////////////////////////////////////////////////////////////////////////////////////////// /** * Logic implementation of a commit-attempt. * * @param ctx technical operation-context * @param commitParams commit parameters * @param branchHead current HEAD of {@code branch} * @param newKeyLists consumer for optimistically written {@link KeyListEntity}s * @return optimistically written commit-log-entry */ protected CommitLogEntry commitAttempt( OP_CONTEXT ctx, long timeInMicros, Hash branchHead, CommitParams commitParams, Consumer newKeyLists) throws ReferenceNotFoundException, ReferenceConflictException { List mismatches = new ArrayList<>(); Callable validator = commitParams.getValidator(); if (validator != null) { try { validator.call(); } catch (RuntimeException e) { // just propagate the RuntimeException up throw e; } catch (Exception e) { throw new RuntimeException(e); } } checkContentKeysUnique(commitParams); // verify expected global-states checkExpectedGlobalStates(ctx, commitParams, mismatches::add); CommitLogEntry currentBranchEntry = checkForModifiedKeysBetweenExpectedAndCurrentCommit( ctx, commitParams, branchHead, mismatches); if (!mismatches.isEmpty()) { throw new ReferenceConflictException(String.join("\n", mismatches)); } int parentsPerCommit = config.getParentsPerCommit(); List newParents = new ArrayList<>(parentsPerCommit); newParents.add(branchHead); long commitSeq; if (currentBranchEntry != null) { List p = currentBranchEntry.getParents(); newParents.addAll(p.subList(0, Math.min(p.size(), parentsPerCommit - 1))); commitSeq = currentBranchEntry.getCommitSeq() + 1; } else { commitSeq = 1; } // Helps when building a new key-list to not fetch the current commit again. Function currentCommit = h -> h.equals(branchHead) ? currentBranchEntry : null; CommitLogEntry newBranchCommit = buildIndividualCommit( ctx, timeInMicros, newParents, commitSeq, commitParams.getCommitMetaSerialized(), commitParams.getPuts(), commitParams.getDeletes(), currentBranchEntry != null ? currentBranchEntry.getKeyListDistance() : 0, newKeyLists, currentCommit, emptyList()); writeIndividualCommit(ctx, newBranchCommit); persistAttachments(ctx, commitParams.getAttachments().stream()); return newBranchCommit; } private static void checkContentKeysUnique(CommitParams commitParams) { Set keys = new HashSet<>(); Set duplicates = new HashSet<>(); Stream.concat( Stream.concat( commitParams.getDeletes().stream(), commitParams.getPuts().stream().map(KeyWithBytes::getKey)), commitParams.getUnchanged().stream()) .forEach( key -> { if (!keys.add(key)) { duplicates.add(key); } }); if (!duplicates.isEmpty()) { throw new IllegalArgumentException( String.format( "Duplicate keys are not allowed in a commit: %s", duplicates.stream().map(Key::toString).collect(Collectors.joining(", ")))); } } /** * Logic implementation of a merge-attempt. * * @param ctx technical operation context * @param toHead current HEAD of {@code toBranch} * @param branchCommits consumer for the individual commits to merge * @param newKeyLists consumer for optimistically written {@link KeyListEntity}s * @return hash of the last commit-log-entry written to {@code toBranch} */ protected Hash mergeAttempt( OP_CONTEXT ctx, long timeInMicros, Hash toHead, Consumer branchCommits, Consumer newKeyLists, Consumer writtenCommits, MergeParams mergeParams, ImmutableMergeResult.Builder mergeResult) throws ReferenceNotFoundException, ReferenceConflictException { validateHashExists(ctx, mergeParams.getMergeFromHash()); // 1. ensure 'expectedHash' is a parent of HEAD-of-'toBranch' hashOnRef(ctx, mergeParams.getToBranch(), mergeParams.getExpectedHead(), toHead); mergeParams.getExpectedHead().ifPresent(mergeResult::expectedHash); mergeResult.targetBranch(mergeParams.getToBranch()).effectiveTargetHash(toHead); // 2. find nearest common-ancestor between 'from' + 'fromHash' Hash commonAncestor = findCommonAncestor(ctx, mergeParams.getMergeFromHash(), mergeParams.getToBranch(), toHead); mergeResult.commonAncestor(commonAncestor); // 3. Collect commit-log-entries List toEntriesReverseChronological; try (Stream commits = readCommitLogStream(ctx, toHead)) { toEntriesReverseChronological = takeUntilExcludeLast(commits, e -> e.getHash().equals(commonAncestor)) .collect(Collectors.toList()); } toEntriesReverseChronological.forEach(mergeResult::addTargetCommits); List commitsToMergeChronological; try (Stream commits = readCommitLogStream(ctx, mergeParams.getMergeFromHash())) { commitsToMergeChronological = takeUntilExcludeLast(commits, e -> e.getHash().equals(commonAncestor)) .collect(Collectors.toList()); } if (commitsToMergeChronological.isEmpty()) { // Nothing to merge, shortcut throw new IllegalArgumentException( String.format( "No hashes to merge from '%s' onto '%s' @ '%s'.", mergeParams.getMergeFromHash().asString(), mergeParams.getToBranch().getName(), toHead)); } commitsToMergeChronological.forEach(mergeResult::addSourceCommits); return mergeTransplantCommon( ctx, timeInMicros, toHead, branchCommits, newKeyLists, commitsToMergeChronological, toEntriesReverseChronological, mergeParams, mergeResult, writtenCommits, singletonList(commitsToMergeChronological.get(0).getHash())); } /** * Logic implementation of a transplant-attempt. * * @param ctx technical operation context * @param targetHead current HEAD of {@code targetBranch} * @param branchCommits consumer for the individual commits to merge * @param newKeyLists consumer for optimistically written {@link KeyListEntity}s * @return hash of the last commit-log-entry written to {@code targetBranch} */ protected Hash transplantAttempt( OP_CONTEXT ctx, long timeInMicros, Hash targetHead, Consumer branchCommits, Consumer newKeyLists, Consumer writtenCommits, TransplantParams transplantParams, ImmutableMergeResult.Builder mergeResult) throws ReferenceNotFoundException, ReferenceConflictException { if (transplantParams.getSequenceToTransplant().isEmpty()) { throw new IllegalArgumentException("No hashes to transplant given."); } transplantParams.getExpectedHead().ifPresent(mergeResult::expectedHash); mergeResult.targetBranch(transplantParams.getToBranch()).effectiveTargetHash(targetHead); // 1. ensure 'expectedHash' is a parent of HEAD-of-'targetBranch' & collect keys List targetEntriesReverseChronological = new ArrayList<>(); hashOnRef( ctx, targetHead, transplantParams.getToBranch(), transplantParams.getExpectedHead(), targetEntriesReverseChronological::add); targetEntriesReverseChronological.forEach(mergeResult::addTargetCommits); // Exclude the expected-hash on the target-branch from key-collisions check if (!targetEntriesReverseChronological.isEmpty() && transplantParams.getExpectedHead().isPresent() && targetEntriesReverseChronological .get(0) .getHash() .equals(transplantParams.getExpectedHead().get())) { targetEntriesReverseChronological.remove(0); } // 4. ensure 'sequenceToTransplant' is sequential int[] index = {transplantParams.getSequenceToTransplant().size() - 1}; Hash lastHash = transplantParams .getSequenceToTransplant() .get(transplantParams.getSequenceToTransplant().size() - 1); List commitsToTransplantChronological; try (Stream commits = readCommitLogStream(ctx, lastHash)) { commitsToTransplantChronological = takeUntilExcludeLast( commits, e -> { int i = index[0]--; if (i == -1) { return true; } if (!e.getHash().equals(transplantParams.getSequenceToTransplant().get(i))) { throw new IllegalArgumentException("Sequence of hashes is not contiguous."); } return false; }) .collect(Collectors.toList()); } commitsToTransplantChronological.forEach(mergeResult::addSourceCommits); // 5. check for key-collisions return mergeTransplantCommon( ctx, timeInMicros, targetHead, branchCommits, newKeyLists, commitsToTransplantChronological, targetEntriesReverseChronological, transplantParams, mergeResult, writtenCommits, emptyList()); } protected Hash mergeTransplantCommon( OP_CONTEXT ctx, long timeInMicros, Hash toHead, Consumer branchCommits, Consumer newKeyLists, List commitsToMergeChronological, List toEntriesReverseChronological, MetadataRewriteParams params, ImmutableMergeResult.Builder mergeResult, Consumer writtenCommits, List additionalParents) throws ReferenceConflictException, ReferenceNotFoundException { // Collect modified keys. Collections.reverse(toEntriesReverseChronological); Map keyDetailsMap = new HashMap<>(); Function mergeType = key -> params.getMergeTypes().getOrDefault(key, params.getDefaultMergeType()); Function keyDetails = key -> keyDetailsMap.computeIfAbsent( key, x -> KeyDetails.builder().mergeType(mergeType.apply(key))); BiConsumer, BiConsumer>> keysFromCommitsToKeyDetails = (commits, receiver) -> { Map> keyHashesMap = new HashMap<>(); Function> keyHashes = key -> keyHashesMap.computeIfAbsent(key, x -> new LinkedHashSet<>()); commits.forEach( commit -> { commit .getDeletes() .forEach(delete -> keyHashes.apply(delete).add(commit.getHash())); commit .getPuts() .forEach(put -> keyHashes.apply(put.getKey()).add(commit.getHash())); }); keyHashesMap.forEach((key, hashes) -> receiver.accept(keyDetails.apply(key), hashes)); }; Set keysTouchedOnTarget = new HashSet<>(); keysFromCommitsToKeyDetails.accept( commitsToMergeChronological.stream(), ImmutableKeyDetails.Builder::addAllSourceCommits); keysFromCommitsToKeyDetails.accept( toEntriesReverseChronological.stream() .peek( e -> { e.getPuts().stream().map(KeyWithBytes::getKey).forEach(keysTouchedOnTarget::add); e.getDeletes().forEach(keysTouchedOnTarget::remove); }), ImmutableKeyDetails.Builder::addAllTargetCommits); Predicate skipCheckPredicate = k -> mergeType.apply(k).isSkipCheck(); Predicate mergePredicate = k -> mergeType.apply(k).isMerge(); // Ignore keys in collision-check that will not be merged or collision-checked keysTouchedOnTarget.removeIf(skipCheckPredicate); boolean hasCollisions = hasKeyCollisions(ctx, toHead, keysTouchedOnTarget, commitsToMergeChronological, keyDetails); keyDetailsMap.forEach((key, details) -> mergeResult.putDetails(key, details.build())); mergeResult.wasSuccessful(!hasCollisions); if (hasCollisions && !params.isDryRun()) { MergeResult result = mergeResult.resultantTargetHash(toHead).build(); throw new MergeConflictException( String.format( "The following keys have been changed in conflict: %s", result.getDetails().entrySet().stream() .filter(e -> e.getValue().getConflictType() != ConflictType.NONE) .map(Map.Entry::getKey) .sorted() .map(key -> String.format("'%s'", key)) .collect(Collectors.joining(", "))), result); } if (params.isDryRun() || hasCollisions) { return toHead; } if (params.keepIndividualCommits()) { // re-apply commits in 'sequenceToTransplant' onto 'targetBranch' toHead = copyCommits( ctx, timeInMicros, toHead, commitsToMergeChronological, newKeyLists, params.getUpdateCommitMetadata(), mergePredicate); // Write commits writeMultipleCommits(ctx, commitsToMergeChronological); commitsToMergeChronological.stream() .peek(writtenCommits) .map(CommitLogEntry::getHash) .forEach(branchCommits); } else { CommitLogEntry squashed = squashCommits( ctx, timeInMicros, toHead, commitsToMergeChronological, newKeyLists, params.getUpdateCommitMetadata(), mergePredicate, additionalParents); if (squashed != null) { writtenCommits.accept(squashed); toHead = squashed.getHash(); } } return toHead; } /** * Compute the diff between two references. * * @param ctx technical operation context * @param from "from" reference to compute the difference from, appears on the "from" side in * {@link Diff} with hash in {@code from} to compute the diff for, must exist in {@code from} * @param to "to" reference to compute the difference from, appears on the "to" side in {@link * Diff} with hash in {@code to} to compute the diff for, must exist in {@code to} * @param keyFilter optional filter on key + content-id + content-type * @return computed difference */ @MustBeClosed protected Stream buildDiff( OP_CONTEXT ctx, Hash from, Hash to, KeyFilterPredicate keyFilter) throws ReferenceNotFoundException { // TODO this implementation works, but is definitely not the most efficient one. Set allKeys = new HashSet<>(); try (Stream s = keysForCommitEntry(ctx, from, keyFilter).map(KeyListEntry::getKey)) { s.forEach(allKeys::add); } try (Stream s = keysForCommitEntry(ctx, to, keyFilter).map(KeyListEntry::getKey)) { s.forEach(allKeys::add); } if (allKeys.isEmpty()) { // no keys, shortcut return Stream.empty(); } List allKeysList = new ArrayList<>(allKeys); Map fromValues = fetchValues(ctx, from, allKeysList, keyFilter); Map toValues = fetchValues(ctx, to, allKeysList, keyFilter); Function> valToContent = cs -> cs != null ? Optional.of(cs.getRefState()) : Optional.empty(); return IntStream.range(0, allKeys.size()) .mapToObj(allKeysList::get) .map( k -> { ContentAndState fromVal = fromValues.get(k); ContentAndState toVal = toValues.get(k); Optional f = valToContent.apply(fromVal); Optional t = valToContent.apply(toVal); if (f.equals(t)) { return null; } byte payload = fromVal != null ? fromVal.getPayload() : (toVal != null ? toVal.getPayload() : (byte) 0); Optional g = Optional.ofNullable( fromVal != null ? fromVal.getGlobalState() : (toVal != null ? toVal.getGlobalState() : null)); return Difference.of(payload, k, g, f, t); }) .filter(Objects::nonNull); } /** * Common functionality to filter and enhance based on the given {@link GetNamedRefsParams}. * * @param ctx database-adapter context * @param params defines which kind of references and which additional information shall be * retrieved * @param defaultBranchHead prerequisite, the hash of the default branch's HEAD commit (depending * on the database-adapter implementation). If {@code null}, {@link * #namedRefsWithDefaultBranchRelatedInfo(AutoCloseable, GetNamedRefsParams, Stream, Hash)} * will not add additional default-branch related information (common ancestor and commits * behind/ahead). * @param refs current {@link Stream} of {@link ReferenceInfo} to be enhanced. * @return filtered/enhanced stream based on {@code refs}. */ @MustBeClosed @SuppressWarnings("MustBeClosedChecker") protected Stream> namedRefsFilterAndEnhance( OP_CONTEXT ctx, GetNamedRefsParams params, Hash defaultBranchHead, Stream> refs) { refs = namedRefsMaybeFilter(params, refs); refs = namedRefsWithDefaultBranchRelatedInfo(ctx, params, refs, defaultBranchHead); refs = namedReferenceWithCommitMeta(ctx, params, refs); return refs; } /** Applies the reference type filter (tags or branches) to the Java stream. */ protected static Stream> namedRefsMaybeFilter( GetNamedRefsParams params, Stream> refs) { if (params.getBranchRetrieveOptions().isRetrieve() && params.getTagRetrieveOptions().isRetrieve()) { // No filtering necessary, if all named-reference types (tags and branches) are being fetched. return refs; } return refs.filter(ref -> namedRefsRetrieveOptionsForReference(params, ref).isRetrieve()); } protected static boolean namedRefsRequiresBaseReference(GetNamedRefsParams params) { return namedRefsRequiresBaseReference(params.getBranchRetrieveOptions()) || namedRefsRequiresBaseReference(params.getTagRetrieveOptions()); } protected static boolean namedRefsRequiresBaseReference( GetNamedRefsParams.RetrieveOptions retrieveOptions) { return retrieveOptions.isComputeAheadBehind() || retrieveOptions.isComputeCommonAncestor(); } protected static boolean namedRefsAnyRetrieves(GetNamedRefsParams params) { return params.getBranchRetrieveOptions().isRetrieve() || params.getTagRetrieveOptions().isRetrieve(); } protected static GetNamedRefsParams.RetrieveOptions namedRefsRetrieveOptionsForReference( GetNamedRefsParams params, ReferenceInfo ref) { return namedRefsRetrieveOptionsForReference(params, ref.getNamedRef()); } protected static GetNamedRefsParams.RetrieveOptions namedRefsRetrieveOptionsForReference( GetNamedRefsParams params, NamedRef ref) { if (ref instanceof BranchName) { return params.getBranchRetrieveOptions(); } if (ref instanceof TagName) { return params.getTagRetrieveOptions(); } throw new IllegalArgumentException("ref must be either BranchName or TabName, but is " + ref); } /** * Returns an updated {@link ReferenceInfo} with the commit-meta of the reference's HEAD commit. */ @MustBeClosed protected Stream> namedReferenceWithCommitMeta( OP_CONTEXT ctx, GetNamedRefsParams params, Stream> refs) { return refs.map( ref -> { if (!namedRefsRetrieveOptionsForReference(params, ref).isRetrieveCommitMetaForHead()) { return ref; } CommitLogEntry logEntry = fetchFromCommitLog(ctx, ref.getHash()); if (logEntry == null) { return ref; } return ImmutableReferenceInfo.builder() .from(ref) .headCommitMeta(logEntry.getMetadata()) .commitSeq(logEntry.getCommitSeq()) .build(); }); } /** * If necessary based on the given {@link GetNamedRefsParams}, updates the returned {@link * ReferenceInfo}s with the common-ancestor of the named reference and the default branch and the * number of commits behind/ahead compared to the default branch. * *

The common ancestor and/or information of commits behind/ahead is meaningless ({@code null}) * for the default branch. Both fields are also {@code null} if the named reference points to the * {@link #noAncestorHash()} (beginning of time). */ @MustBeClosed protected Stream> namedRefsWithDefaultBranchRelatedInfo( OP_CONTEXT ctx, GetNamedRefsParams params, Stream> refs, Hash defaultBranchHead) { if (defaultBranchHead == null) { // No enhancement of common ancestor and/or commits behind/ahead. return refs; } CommonAncestorState commonAncestorState = new CommonAncestorState( ctx, defaultBranchHead, params.getBranchRetrieveOptions().isComputeAheadBehind() || params.getTagRetrieveOptions().isComputeAheadBehind()); return refs.map( ref -> { if (ref.getNamedRef().equals(params.getBaseReference())) { return ref; } RetrieveOptions retrieveOptions = namedRefsRetrieveOptionsForReference(params, ref); ReferenceInfo updated = namedRefsRequiresBaseReference(retrieveOptions) ? findCommonAncestor( ctx, ref.getHash(), commonAncestorState, (diffOnFrom, hash) -> { ReferenceInfo newRef = ref; if (retrieveOptions.isComputeCommonAncestor()) { newRef = newRef.withCommonAncestor(hash); } if (retrieveOptions.isComputeAheadBehind()) { int behind = commonAncestorState.indexOf(hash); CommitsAheadBehind aheadBehind = CommitsAheadBehind.of(diffOnFrom, behind); newRef = newRef.withAheadBehind(aheadBehind); } return newRef; }) : null; return updated != null ? updated : ref; }); } /** * Convenience for {@link #hashOnRef(AutoCloseable, Hash, NamedRef, Optional, Consumer) * hashOnRef(ctx, knownHead, ref.getReference(), ref.getHashOnReference(), null)}. */ protected Hash hashOnRef( OP_CONTEXT ctx, NamedRef reference, Optional hashOnRef, Hash knownHead) throws ReferenceNotFoundException { return hashOnRef(ctx, knownHead, reference, hashOnRef, null); } /** * Ensures that {@code ref} exists and that the hash in {@code hashOnRef} exists in that * reference. * * @param ctx technical operation context * @param ref reference that must contain {@code hashOnRef} * @param knownHead current HEAD of {@code ref} * @param hashOnRef hash to verify whether it exists in {@code ref} * @param commitLogVisitor optional consumer that will receive all visited {@link * CommitLogEntry}s, can be {@code null}. * @return value of {@code hashOnRef} or, if {@code hashOnRef} is empty, {@code knownHead} * @throws ReferenceNotFoundException if either {@code ref} does not exist or {@code hashOnRef} * does not exist on {@code ref} */ protected Hash hashOnRef( OP_CONTEXT ctx, Hash knownHead, NamedRef ref, Optional hashOnRef, Consumer commitLogVisitor) throws ReferenceNotFoundException { if (hashOnRef.isPresent()) { Hash suspect = hashOnRef.get(); // If the client requests 'NO_ANCESTOR' (== beginning of time), skip the existence-check. if (suspect.equals(NO_ANCESTOR)) { if (commitLogVisitor != null) { try (Stream commits = readCommitLogStream(ctx, knownHead)) { commits.forEach(commitLogVisitor); } } return suspect; } try (Stream hashes = commitsForHashOnRef(ctx, knownHead, commitLogVisitor)) { if (hashes.noneMatch(suspect::equals)) { throw hashNotFound(ref, suspect); } return suspect; } } else { return knownHead; } } @MustBeClosed private Stream commitsForHashOnRef( OP_CONTEXT ctx, Hash knownHead, Consumer commitLogVisitor) throws ReferenceNotFoundException { if (commitLogVisitor != null) { return readCommitLogStream(ctx, knownHead) .peek(commitLogVisitor) .map(CommitLogEntry::getHash); } else { return readCommitLogHashesStream(ctx, knownHead); } } protected void validateHashExists(OP_CONTEXT ctx, Hash hash) throws ReferenceNotFoundException { if (!NO_ANCESTOR.equals(hash) && fetchFromCommitLog(ctx, hash) == null) { throw referenceNotFound(hash); } } /** Load the commit-log entry for the given hash, return {@code null}, if not found. */ @VisibleForTesting public final CommitLogEntry fetchFromCommitLog(OP_CONTEXT ctx, Hash hash) { if (hash.equals(NO_ANCESTOR)) { // Do not try to fetch NO_ANCESTOR - it won't exist. return null; } try (Traced ignore = trace("fetchFromCommitLog").tag(TAG_HASH, hash.asString())) { return doFetchFromCommitLog(ctx, hash); } } protected abstract CommitLogEntry doFetchFromCommitLog(OP_CONTEXT ctx, Hash hash); @Override @SuppressWarnings("MustBeClosedChecker") public Stream scanAllCommitLogEntries() { OP_CONTEXT ctx = borrowConnection(); return doScanAllCommitLogEntries(ctx) .onClose( () -> { try { ctx.close(); } catch (Exception e) { throw new RuntimeException(e); } }); } @MustBeClosed protected abstract Stream doScanAllCommitLogEntries(OP_CONTEXT c); /** * Fetch multiple {@link CommitLogEntry commit-log-entries} from the commit-log. The returned list * must have exactly as many elements as in the parameter {@code hashes}. Non-existing hashes are * returned as {@code null}. */ private List fetchMultipleFromCommitLog( OP_CONTEXT ctx, List hashes, @Nonnull Function inMemoryCommits) { List result = new ArrayList<>(hashes.size()); BitSet remainingHashes = null; // Prefetch commits already available in memory. Record indexes for the missing commits to // enable placing them in the correct positions later, when they are fetched from storage. for (int i = 0; i < hashes.size(); i++) { Hash hash = hashes.get(i); if (NO_ANCESTOR.equals(hash)) { result.add(null); continue; } CommitLogEntry found = inMemoryCommits.apply(hash); if (found != null) { result.add(found); } else { if (remainingHashes == null) { remainingHashes = new BitSet(); } result.add(null); // to be replaced with storage result below remainingHashes.set(i); } } if (remainingHashes != null) { List fromStorage; try (Traced ignore = trace("fetchPageFromCommitLog") .tag(TAG_HASH, hashes.get(0).asString()) .tag(TAG_COUNT, hashes.size())) { fromStorage = doFetchMultipleFromCommitLog( ctx, remainingHashes.stream().mapToObj(hashes::get).collect(Collectors.toList())); } // Fill the gaps in the final result list. Note that fetchPageFromCommitLog must return the // list of the same size as its `remainingHashes` parameter. Iterator iter = fromStorage.iterator(); remainingHashes.stream() .forEach( i -> { result.set(i, iter.next()); }); } return result; } protected abstract List doFetchMultipleFromCommitLog( OP_CONTEXT ctx, List hashes); /** Reads from the commit-log starting at the given commit-log-hash. */ @MustBeClosed protected Stream readCommitLogStream(OP_CONTEXT ctx, Hash initialHash) throws ReferenceNotFoundException { Spliterator split = readCommitLog(ctx, initialHash, h -> null); return StreamSupport.stream(split, false); } @MustBeClosed protected Stream readCommitLogStream( OP_CONTEXT ctx, Hash initialHash, @Nonnull Function inMemoryCommits) throws ReferenceNotFoundException { Spliterator split = readCommitLog(ctx, initialHash, inMemoryCommits); return StreamSupport.stream(split, false); } protected Spliterator readCommitLog( OP_CONTEXT ctx, Hash initialHash, @Nonnull Function inMemoryCommits) throws ReferenceNotFoundException { Preconditions.checkNotNull(inMemoryCommits, "in-memory commits cannot be null"); if (NO_ANCESTOR.equals(initialHash)) { return Spliterators.emptySpliterator(); } CommitLogEntry initial = inMemoryCommits.apply(initialHash); if (initial == null) { initial = fetchFromCommitLog(ctx, initialHash); } if (initial == null) { throw referenceNotFound(initialHash); } BiFunction, List> fetcher = (c, hashes) -> fetchMultipleFromCommitLog(c, hashes, inMemoryCommits); return logFetcher(ctx, initial, fetcher, CommitLogEntry::getParents); } /** * Like {@link #readCommitLogStream(AutoCloseable, Hash)}, but only returns the {@link Hash * commit-log-entry hashes}, which can be taken from {@link CommitLogEntry#getParents()}, thus no * need to perform a read-operation against every hash. */ @MustBeClosed protected Stream readCommitLogHashesStream(OP_CONTEXT ctx, Hash initialHash) { Spliterator split = readCommitLogHashes(ctx, initialHash); return StreamSupport.stream(split, false); } protected Spliterator readCommitLogHashes(OP_CONTEXT ctx, Hash initialHash) { return logFetcher( ctx, initialHash, (c, hashes) -> hashes, hash -> { CommitLogEntry entry = fetchFromCommitLog(ctx, hash); if (entry == null) { return emptyList(); } return entry.getParents(); }); } /** * Constructs a {@link Stream} of entries for either the global-state-log or a commit-log or a * reflog entry. Use {@link #readCommitLogStream(AutoCloseable, Hash)} or the similar * implementation for the global-log or reflog entry for non-transactional adapters. */ protected Spliterator logFetcher( OP_CONTEXT ctx, T initial, BiFunction, List> fetcher, Function> nextPage) { return logFetcherCommon(ctx, Collections.singletonList(initial), fetcher, nextPage); } protected Spliterator logFetcherWithPage( OP_CONTEXT ctx, List initialPage, BiFunction, List> fetcher, Function> nextPage) { return logFetcherCommon(ctx, fetcher.apply(ctx, initialPage), fetcher, nextPage); } private Spliterator logFetcherCommon( OP_CONTEXT ctx, List initial, BiFunction, List> fetcher, Function> nextPage) { return new AbstractSpliterator(Long.MAX_VALUE, 0) { private Iterator currentBatch; private boolean eof; private T previous; @Override public boolean tryAdvance(Consumer consumer) { if (eof) { return false; } else if (currentBatch == null) { currentBatch = initial.iterator(); } else if (!currentBatch.hasNext()) { if (previous == null) { eof = true; return false; } List page = nextPage.apply(previous); previous = null; if (!page.isEmpty()) { currentBatch = fetcher.apply(ctx, page).iterator(); if (!currentBatch.hasNext()) { eof = true; return false; } } else { eof = true; return false; } } T v = currentBatch.next(); if (v != null) { consumer.accept(v); previous = v; } return true; } }; } /** * Builds a {@link CommitLogEntry} using the given values. This function also includes a {@link * KeyList}, if triggered by the values of {@code currentKeyListDistance} and {@link * DatabaseAdapterConfig#getKeyListDistance()}, so read operations may happen. */ protected CommitLogEntry buildIndividualCommit( OP_CONTEXT ctx, long timeInMicros, List parentHashes, long commitSeq, ByteString commitMeta, Iterable puts, Iterable deletes, int currentKeyListDistance, Consumer newKeyLists, @Nonnull Function inMemoryCommits, Iterable additionalParents) throws ReferenceNotFoundException { Hash commitHash = individualCommitHash(parentHashes, commitMeta, puts, deletes); int keyListDistance = currentKeyListDistance + 1; CommitLogEntry entry = CommitLogEntry.of( timeInMicros, commitHash, commitSeq, parentHashes, commitMeta, puts, deletes, keyListDistance, null, emptyList(), emptyList(), additionalParents); if (keyListDistance >= config.getKeyListDistance()) { entry = buildKeyList(ctx, entry, newKeyLists, inMemoryCommits); } return entry; } /** Calculate the hash for the content of a {@link CommitLogEntry}. */ @SuppressWarnings("UnstableApiUsage") protected Hash individualCommitHash( List parentHashes, ByteString commitMeta, Iterable puts, Iterable deletes) { Hasher hasher = newHasher(); hasher.putLong(COMMIT_LOG_HASH_SEED); parentHashes.forEach(h -> hasher.putBytes(h.asBytes().asReadOnlyByteBuffer())); hasher.putBytes(commitMeta.asReadOnlyByteBuffer()); puts.forEach( e -> { hashKey(hasher, e.getKey()); hasher.putString(e.getContentId().getId(), StandardCharsets.UTF_8); hasher.putBytes(e.getValue().asReadOnlyByteBuffer()); }); deletes.forEach(e -> hashKey(hasher, e)); return Hash.of(UnsafeByteOperations.unsafeWrap(hasher.hash().asBytes())); } /** * Adds a complete key-list to the given {@link CommitLogEntry}, will read from the database. * *

The implementation fills {@link CommitLogEntry#getKeyList()} with the most recently updated * {@link Key}s. * *

If the calculated size of the database-object/row gets larger than {@link * DatabaseAdapterConfig#getMaxKeyListSize()}, the next {@link Key}s will be added to new {@link * KeyListEntity}s, each with a maximum size of {@link DatabaseAdapterConfig#getMaxKeyListSize()}. * *

The current implementation fetches all keys and "blindly" populated {@link * CommitLogEntry#getKeyList()} and nested {@link KeyListEntity} via {@link * CommitLogEntry#getKeyListsIds()}. So this implementation does not yet reuse previous {@link * KeyListEntity}s. A follow-up improvement should check if already existing {@link * KeyListEntity}s contain the same keys. This proposed optimization should be accompanied by an * optimized read of the keys: for example, if the set of changed keys only affects {@link * CommitLogEntry#getKeyList()} but not the keys via {@link KeyListEntity}, it is just unnecessary * to both read and re-write those rows for {@link KeyListEntity}. */ protected CommitLogEntry buildKeyList( OP_CONTEXT ctx, CommitLogEntry unwrittenEntry, Consumer newKeyLists, @Nonnull Function inMemoryCommits) throws ReferenceNotFoundException { // Read commit-log until the previous persisted key-list Hash startHash = unwrittenEntry.getParents().get(0); // Return the new commit-log-entry with the complete-key-list ImmutableCommitLogEntry.Builder newCommitEntry = ImmutableCommitLogEntry.builder() .from(unwrittenEntry) .keyListDistance(0) .keyListVariant(KeyListVariant.OPEN_ADDRESSING); KeyListBuildState buildState = new KeyListBuildState( newCommitEntry, maxEntitySize(config.getMaxKeyListSize()) - entitySize(unwrittenEntry), maxEntitySize(config.getMaxKeyListEntitySize()), config.getKeyListHashLoadFactor(), this::entitySize); Set keysToEnhanceWithCommitId = new HashSet<>(); try (Stream keys = keysForCommitEntry(ctx, startHash, null, inMemoryCommits)) { keys.forEach( keyListEntry -> { if (keyListEntry.getCommitId() == null) { keysToEnhanceWithCommitId.add(keyListEntry.getKey()); } else { buildState.add(keyListEntry); } }); } if (!keysToEnhanceWithCommitId.isEmpty()) { // Found KeyListEntry w/o commitId, need to add that information. Spliterator clSplit = readCommitLog(ctx, startHash, inMemoryCommits); while (true) { boolean more = clSplit.tryAdvance( e -> { e.getDeletes().forEach(keysToEnhanceWithCommitId::remove); for (KeyWithBytes put : e.getPuts()) { if (keysToEnhanceWithCommitId.remove(put.getKey())) { KeyListEntry entry = KeyListEntry.of( put.getKey(), put.getContentId(), put.getPayload(), e.getHash()); buildState.add(entry); } } }); if (!more || keysToEnhanceWithCommitId.isEmpty()) { break; } } } List newKeyListEntities = buildState.finish(); // Inform the (CAS)-op-loop about the IDs of the KeyListEntities being optimistically written. newKeyListEntities.stream().map(KeyListEntity::getId).forEach(newKeyLists); // Write the new KeyListEntities if (!newKeyListEntities.isEmpty()) { writeKeyListEntities(ctx, newKeyListEntities); } // Return the new commit-log-entry with the complete-key-list return newCommitEntry.build(); } protected int maxEntitySize(int value) { return value; } /** Calculate the expected size of the given {@link CommitLogEntry} in the database (in bytes). */ protected abstract int entitySize(CommitLogEntry entry); /** Calculate the expected size of the given {@link CommitLogEntry} in the database (in bytes). */ protected abstract int entitySize(KeyListEntry entry); /** * If the current HEAD of the target branch for a commit/transplant/merge is not equal to the * expected/reference HEAD, verify that there is no conflict, like keys in the operations of the * commit(s) contained in keys of the commits 'expectedHead (excluding) .. currentHead * (including)'. * * @return commit log entry at {@code branchHead} */ protected CommitLogEntry checkForModifiedKeysBetweenExpectedAndCurrentCommit( OP_CONTEXT ctx, CommitParams commitParams, Hash branchHead, List mismatches) throws ReferenceNotFoundException { CommitLogEntry commitAtHead = null; if (commitParams.getExpectedHead().isPresent()) { Hash expectedHead = commitParams.getExpectedHead().get(); if (!expectedHead.equals(branchHead)) { Set operationKeys = Sets.newHashSetWithExpectedSize( commitParams.getDeletes().size() + commitParams.getUnchanged().size() + commitParams.getPuts().size()); operationKeys.addAll(commitParams.getDeletes()); operationKeys.addAll(commitParams.getUnchanged()); commitParams.getPuts().stream().map(KeyWithBytes::getKey).forEach(operationKeys::add); ConflictingKeyCheckResult conflictingKeyCheckResult = checkConflictingKeysForCommit( ctx, branchHead, expectedHead, operationKeys, mismatches::add); // If the expectedHead is the special value NO_ANCESTOR, which is not persisted, // ignore the fact that it has not been seen. Otherwise, raise a // ReferenceNotFoundException that the expected-hash does not exist on the target // branch. if (!conflictingKeyCheckResult.sinceSeen && !expectedHead.equals(NO_ANCESTOR)) { throw hashNotFound(commitParams.getToBranch(), expectedHead); } commitAtHead = conflictingKeyCheckResult.headCommit; } } if (commitAtHead == null) { commitAtHead = fetchFromCommitLog(ctx, branchHead); } return commitAtHead; } /** Retrieve the content-keys and their types for the commit-log-entry with the given hash. */ @MustBeClosed protected Stream keysForCommitEntry( OP_CONTEXT ctx, Hash hash, KeyFilterPredicate keyFilter) throws ReferenceNotFoundException { return keysForCommitEntry(ctx, hash, keyFilter, h -> null); } /** Retrieve the content-keys and their types for the commit-log-entry with the given hash. */ @MustBeClosed protected Stream keysForCommitEntry( OP_CONTEXT ctx, Hash hash, KeyFilterPredicate keyFilter, @Nonnull Function inMemoryCommits) throws ReferenceNotFoundException { // walk the commit-logs in reverse order - starting with the last persisted key-list Set seen = new HashSet<>(); Predicate predicate = keyListEntry -> keyListEntry != null && seen.add(keyListEntry.getKey()); if (keyFilter != null) { predicate = predicate.and(kt -> keyFilter.check(kt.getKey(), kt.getContentId(), kt.getPayload())); } Predicate keyPredicate = predicate; @SuppressWarnings("MustBeClosedChecker") Stream log = takeUntilIncludeLast( readCommitLogStream(ctx, hash, inMemoryCommits), CommitLogEntry::hasKeySummary); return log.flatMap( e -> { // Add CommitLogEntry.deletes to "seen" so these keys won't be returned seen.addAll(e.getDeletes()); // Return from CommitLogEntry.puts first Stream stream = e.getPuts().stream() .map( put -> KeyListEntry.of( put.getKey(), put.getContentId(), put.getPayload(), e.getHash())) .filter(keyPredicate); if (e.hasKeySummary()) { // Return from CommitLogEntry.keyList after the keys in CommitLogEntry.puts KeyList embeddedKeyList = e.getKeyList(); if (embeddedKeyList != null) { Stream embedded = embeddedKeyList.getKeys().stream().filter(keyPredicate); stream = Stream.concat(stream, embedded); } List keyListIds = e.getKeyListsIds(); if (keyListIds != null && !keyListIds.isEmpty()) { // If there are nested key-lists, retrieve those lazily and add the keys from these Stream entities = Stream.of(keyListIds) .flatMap( ids -> { @SuppressWarnings("MustBeClosedChecker") Stream r = fetchKeyLists(ctx, ids); return r; }) .map(KeyListEntity::getKeys) .map(KeyList::getKeys) .flatMap(Collection::stream) .filter(keyPredicate); stream = Stream.concat(stream, entities); } } return stream; }); } /** * Fetch the global-state and per-ref content for the given {@link Key}s and {@link Hash * commitSha}. Non-existing keys must not be present in the returned map. */ protected Map fetchValues( OP_CONTEXT ctx, Hash refHead, Collection keys, KeyFilterPredicate keyFilter) throws ReferenceNotFoundException { Set remainingKeys = new HashSet<>(keys); Map nonGlobal = new HashMap<>(); Map keyToContentIds = new HashMap<>(); Set contentIdsForGlobal = new HashSet<>(); Consumer commitLogEntryHandler = entry -> { // remove deleted keys from keys to look for entry.getDeletes().forEach(remainingKeys::remove); // handle put operations for (KeyWithBytes put : entry.getPuts()) { if (!remainingKeys.remove(put.getKey())) { continue; } if (!keyFilter.check(put.getKey(), put.getContentId(), put.getPayload())) { continue; } nonGlobal.put(put.getKey(), ContentAndState.of(put.getPayload(), put.getValue())); keyToContentIds.put(put.getKey(), put.getContentId()); if (STORE_WORKER.requiresGlobalState(put.getPayload(), put.getValue())) { contentIdsForGlobal.add(put.getContentId()); } } }; // The algorithm is a bit complex, but not horribly complex. // // The commit-log `Stream` in the following try-with-resources fetches the commit-log until // all requested keys have been seen. // // When a commit-log-entry with key-lists is encountered, use the key-lists to determine if // and how the remaining keys need to be retrieved. // - If any of the requested keys is not in the key-lists, ignore it - it doesn't exist. // - If the `KeyListEntry` for a requested key contains the commit-ID, use the commit-ID to // directly access the commit that contains the `Put` operation for that key. // // Both the "outer" `Stream` and the result of the latter case (list of commit-log-entries) // share common functionality implemented in the function `commitLogEntryHandler`, which // handles the 'Put` operations. AtomicBoolean keyListProcessed = new AtomicBoolean(); try (Stream baseLog = readCommitLogStream(ctx, refHead); Stream log = takeUntilExcludeLast(baseLog, e -> remainingKeys.isEmpty())) { log.forEach( entry -> { commitLogEntryHandler.accept(entry); if (entry.hasKeySummary() && keyListProcessed.compareAndSet(false, true)) { // CommitLogEntry has a KeyList. // All keys in 'remainingKeys', that are _not_ in the KeyList(s), can be removed, // because at this point we know that these do not exist. // // But do only process the "newest" key-list - older key lists are irrelevant. // KeyListEntry written before Nessie 0.22.0 do not have the commit-ID field set, // which means the remaining commit-log needs to be searched for the commit that // added the key - but processing older key-lists "on the way" makes no sense, // because those will not have the key either. if (!remainingKeys.isEmpty()) { List keyListEntries; switch (entry.getKeyListVariant()) { case OPEN_ADDRESSING: keyListEntries = fetchValuesHandleOpenAddressingKeyList(ctx, remainingKeys, entry); break; case EMBEDDED_AND_EXTERNAL_MRU: keyListEntries = fetchValuesHandleKeyList(ctx, remainingKeys, entry); break; default: throw new IllegalStateException( "Unknown key list variant " + entry.getKeyListVariant()); } if (!keyListEntries.isEmpty()) { List commitLogEntries = fetchMultipleFromCommitLog( ctx, keyListEntries.stream() .map(KeyListEntry::getCommitId) .filter(Objects::nonNull) .distinct() .collect(Collectors.toList()), h -> null); commitLogEntries.forEach(commitLogEntryHandler); } // Older Nessie versions before 0.22.0 did not record the commit-ID for a key // so that we have to continue scanning the commit log for the remaining keys. // Newer Nessie versions since 0.22.0 do record the commit-ID, so we can safely // assume that remainingKeys do not exist. remainingKeys.retainAll( keyListEntries.stream() .filter(e -> e.getCommitId() == null) .map(KeyListEntry::getKey) .collect(Collectors.toSet())); } } }); } Map globals = contentIdsForGlobal.isEmpty() ? Collections.emptyMap() : fetchGlobalStates(ctx, contentIdsForGlobal); return nonGlobal.entrySet().stream() .collect( Collectors.toMap( Entry::getKey, e -> { ContentAndState cs = e.getValue(); ByteString global = globals.get(keyToContentIds.get(e.getKey())); return global != null ? ContentAndState.of(cs.getPayload(), cs.getRefState(), global) : cs; })); } /** * Handles key lists written with {@link CommitLogEntry.KeyListVariant#OPEN_ADDRESSING} (since * Nessie 0.31.0). */ private List fetchValuesHandleOpenAddressingKeyList( OP_CONTEXT ctx, Collection remainingKeys, CommitLogEntry entry) { FetchValuesUsingOpenAddressing helper = new FetchValuesUsingOpenAddressing(entry); List keyListEntries = new ArrayList<>(); // If one or more `Key`s could not be immediately found in their "natural" segment in round 0, // because the end of the segment was hit, the next segment(s) will be consulted for the // remaining keys. Each "wrap around" increments the 'round'. for (int round = 0; !remainingKeys.isEmpty(); round++) { // Figure out the key-list-entities to load List entitiesToFetch = helper.entityIdsToFetch(round, config.getKeyListEntityPrefetch(), remainingKeys); // Fetch the key-list-entities for the identified segments, store those if (!entitiesToFetch.isEmpty()) { try (Stream keyLists = fetchKeyLists(ctx, entitiesToFetch)) { keyLists.forEach(helper::entityLoaded); } } // Try to extract the key-list-entries for the remainingKeys and add to the result list remainingKeys = helper.checkForKeys(round, remainingKeys, keyListEntries::add); } return keyListEntries; } /** * Handles key lists written with {@link CommitLogEntry.KeyListVariant#EMBEDDED_AND_EXTERNAL_MRU} * (before Nessie 0.31.0). */ private List fetchValuesHandleKeyList( OP_CONTEXT ctx, Set remainingKeys, CommitLogEntry entry) { List keyListEntries = new ArrayList<>(); try (Stream keyLists = keyListsFromCommitLogEntry(ctx, entry)) { keyLists .flatMap(keyList -> keyList.getKeys().stream()) .filter(Objects::nonNull) .filter(keyListEntry -> remainingKeys.contains(keyListEntry.getKey())) .forEach(keyListEntries::add); } return keyListEntries; } @MustBeClosed private Stream keyListsFromCommitLogEntry(OP_CONTEXT ctx, CommitLogEntry entry) { KeyList embeddedKeyList = entry.getKeyList(); Stream keyList = embeddedKeyList != null ? Stream.of(embeddedKeyList) : Stream.empty(); List keyListIds = entry.getKeyListsIds(); if (keyListIds != null && !keyListIds.isEmpty()) { keyList = Stream.concat( Stream.of(entry.getKeyList()), Stream.of(keyListIds) // lazy fetch .flatMap( ids -> { @SuppressWarnings("MustBeClosedChecker") Stream r = fetchKeyLists(ctx, ids).map(KeyListEntity::getKeys); return r; })); } return keyList; } /** * Fetches the global-state information for the given content-ids. * * @param ctx technical context * @param contentIds the content-ids to fetch * @return map of content-id to state */ protected final Map fetchGlobalStates( OP_CONTEXT ctx, Set contentIds) throws ReferenceNotFoundException { try (Traced ignore = trace("fetchGlobalStates").tag(TAG_COUNT, contentIds.size())) { return doFetchGlobalStates(ctx, contentIds); } } protected abstract Map doFetchGlobalStates( OP_CONTEXT ctx, Set contentIds) throws ReferenceNotFoundException; @VisibleForTesting @MustBeClosed public final Stream fetchKeyLists(OP_CONTEXT ctx, List keyListsIds) { if (keyListsIds.isEmpty()) { return Stream.empty(); } try (Traced ignore = trace("fetchKeyLists").tag(TAG_COUNT, keyListsIds.size())) { return doFetchKeyLists(ctx, keyListsIds); } } @MustBeClosed protected abstract Stream doFetchKeyLists(OP_CONTEXT ctx, List keyListsIds); /** * Write a new commit-entry, the given commit entry is to be persisted as is. All values of the * given {@link CommitLogEntry} can be considered valid and consistent. * *

Implementations however can enforce strict consistency checks/guarantees, like a best-effort * approach to prevent hash-collisions but without any other consistency checks/guarantees. */ protected final void writeIndividualCommit(OP_CONTEXT ctx, CommitLogEntry entry) throws ReferenceConflictException { try (Traced ignore = trace("writeIndividualCommit")) { doWriteIndividualCommit(ctx, entry); } } protected abstract void doWriteIndividualCommit(OP_CONTEXT ctx, CommitLogEntry entry) throws ReferenceConflictException; protected final void writeMultipleCommits(OP_CONTEXT ctx, List entries) throws ReferenceConflictException { try (Traced ignore = trace("writeMultipleCommits").tag(TAG_COUNT, entries.size())) { doWriteMultipleCommits(ctx, entries); } } protected abstract void doWriteMultipleCommits(OP_CONTEXT ctx, List entries) throws ReferenceConflictException; protected abstract void doUpdateMultipleCommits(OP_CONTEXT ctx, List entries) throws ReferenceNotFoundException; @VisibleForTesting public final void writeKeyListEntities(OP_CONTEXT ctx, List newKeyListEntities) { try (Traced ignore = trace("writeKeyListEntities").tag(TAG_COUNT, newKeyListEntities.size())) { doWriteKeyListEntities(ctx, newKeyListEntities); } } protected abstract void doWriteKeyListEntities( OP_CONTEXT ctx, List newKeyListEntities); protected static final class ConflictingKeyCheckResult { private boolean sinceSeen; private CommitLogEntry headCommit; public boolean isSinceSeen() { return sinceSeen; } public CommitLogEntry getHeadCommit() { return headCommit; } } /** * Check whether the commits in the range {@code sinceCommitExcluding] .. [upToCommitIncluding} * contain any of the given {@link Key}s. * *

Conflicts are reported via {@code mismatches}. */ protected ConflictingKeyCheckResult checkConflictingKeysForCommit( OP_CONTEXT ctx, Hash upToCommitIncluding, Hash sinceCommitExcluding, Set keys, Consumer mismatches) throws ReferenceNotFoundException { ConflictingKeyCheckResult result = new ConflictingKeyCheckResult(); try (Stream commits = readCommitLogStream(ctx, upToCommitIncluding)) { Stream log = takeUntilExcludeLast( commits, e -> { if (e.getHash().equals(upToCommitIncluding)) { result.headCommit = e; } if (e.getHash().equals(sinceCommitExcluding)) { result.sinceSeen = true; return true; } return false; }); Set handled = new HashSet<>(); log.forEach( e -> { e.getPuts() .forEach( a -> { if (keys.contains(a.getKey()) && handled.add(a.getKey())) { mismatches.accept( String.format( "Key '%s' has conflicting put-operation from commit '%s'.", a.getKey(), e.getHash().asString())); } }); e.getDeletes() .forEach( a -> { if (keys.contains(a) && handled.add(a)) { mismatches.accept( String.format( "Key '%s' has conflicting delete-operation from commit '%s'.", a, e.getHash().asString())); } }); }); } return result; } protected final class CommonAncestorState { final Iterator toLog; final List toCommitHashesList; final Set toCommitHashes = new HashSet<>(); public CommonAncestorState(OP_CONTEXT ctx, Hash toHead, boolean trackCount) { this.toLog = Spliterators.iterator(readCommitLogHashes(ctx, toHead)); this.toCommitHashesList = trackCount ? new ArrayList<>() : null; } boolean fetchNext() { if (toLog.hasNext()) { Hash hash = toLog.next(); toCommitHashes.add(hash); if (toCommitHashesList != null) { toCommitHashesList.add(hash); } return true; } return false; } public boolean contains(Hash candidate) { return toCommitHashes.contains(candidate); } public int indexOf(Hash hash) { return toCommitHashesList.indexOf(hash); } } /** * Finds the common-ancestor of two commit-log-entries. If no common-ancestor is found, throws a * {@link ReferenceConflictException} or. Otherwise, this method returns the hash of the * common-ancestor. */ protected Hash findCommonAncestor(OP_CONTEXT ctx, Hash from, NamedRef toBranch, Hash toHead) throws ReferenceConflictException { // TODO this implementation requires guardrails: // max number of "to"-commits to fetch, max number of "from"-commits to fetch, // both impact the cost (CPU, memory, I/O) of a merge operation. CommonAncestorState commonAncestorState = new CommonAncestorState(ctx, toHead, false); Hash commonAncestorHash = findCommonAncestor(ctx, from, commonAncestorState, (dist, hash) -> hash); if (commonAncestorHash == null) { throw new ReferenceConflictException( String.format( "No common ancestor found for merge of '%s' into branch '%s'", from, toBranch.getName())); } return commonAncestorHash; } protected R findCommonAncestor( OP_CONTEXT ctx, Hash from, CommonAncestorState state, BiFunction result) { Iterator fromLog = Spliterators.iterator(readCommitLogHashes(ctx, from)); List fromCommitHashes = new ArrayList<>(); while (true) { boolean anyFetched = false; for (int i = 0; i < config.getParentsPerCommit(); i++) { if (state.fetchNext()) { anyFetched = true; } if (fromLog.hasNext()) { fromCommitHashes.add(fromLog.next()); anyFetched = true; } } if (!anyFetched) { return null; } for (int diffOnFrom = 0; diffOnFrom < fromCommitHashes.size(); diffOnFrom++) { Hash f = fromCommitHashes.get(diffOnFrom); if (state.contains(f)) { return result.apply(diffOnFrom, f); } } } } /** * For merge/transplant, verifies that the given commits do not touch any of the given keys. * * @param commitsChronological list of commit-log-entries, in order of commit-operations, * chronological order */ protected boolean hasKeyCollisions( OP_CONTEXT ctx, Hash refHead, Set keysTouchedOnTarget, List commitsChronological, Function keyDetails) throws ReferenceNotFoundException { Set keyCollisions = new HashSet<>(); for (int i = commitsChronological.size() - 1; i >= 0; i--) { CommitLogEntry sourceCommit = commitsChronological.get(i); Stream.concat( sourceCommit.getPuts().stream().map(KeyWithBytes::getKey), sourceCommit.getDeletes().stream()) .filter(keysTouchedOnTarget::contains) .forEach(keyCollisions::add); } if (!keyCollisions.isEmpty()) { removeKeyCollisionsForNamespaces( ctx, refHead, commitsChronological.get(commitsChronological.size() - 1).getHash(), keyCollisions); if (!keyCollisions.isEmpty()) { keyCollisions.forEach(key -> keyDetails.apply(key).conflictType(ConflictType.UNRESOLVABLE)); return true; } } return false; } /** * If any key collision was found, we need to check whether the key collision was happening on a * Namespace and if so, remove that key collision, since Namespaces can be merged/transplanted * without problems. * * @param ctx The context * @param hashFromTarget The hash from the target branch * @param hashFromSource The hash from the source branch * @param keyCollisions The found key collisions * @throws ReferenceNotFoundException If the given reference could not be found */ private void removeKeyCollisionsForNamespaces( OP_CONTEXT ctx, Hash hashFromTarget, Hash hashFromSource, Set keyCollisions) throws ReferenceNotFoundException { Predicate> isNamespace = e -> STORE_WORKER .getType(e.getValue().getPayload(), e.getValue().getRefState()) .equals(Content.Type.NAMESPACE); Set namespacesOnTarget = fetchValues(ctx, hashFromTarget, keyCollisions, ALLOW_ALL).entrySet().stream() .filter(isNamespace) .map(Entry::getKey) .collect(Collectors.toSet()); // this will be an implicit set intersection between the namespaces on source & target Set intersection = fetchValues(ctx, hashFromSource, namespacesOnTarget, ALLOW_ALL).entrySet().stream() .filter(isNamespace) .map(Entry::getKey) .collect(Collectors.toSet()); // remove all keys related to namespaces from the existing collisions intersection.forEach(keyCollisions::remove); } /** * For merge/transplant, applies one squashed commit derived from the given commits onto the * target-hash. */ protected CommitLogEntry squashCommits( OP_CONTEXT ctx, long timeInMicros, Hash toHead, List commitsToMergeChronological, Consumer newKeyLists, MetadataRewriter rewriteMetadata, Predicate includeKeyPredicate, Iterable additionalParents) throws ReferenceConflictException, ReferenceNotFoundException { List commitMeta = new ArrayList<>(); Map puts = new LinkedHashMap<>(); Set deletes = new LinkedHashSet<>(); for (int i = commitsToMergeChronological.size() - 1; i >= 0; i--) { CommitLogEntry source = commitsToMergeChronological.get(i); for (Key delete : source.getDeletes()) { if (includeKeyPredicate.test(delete)) { deletes.add(delete); puts.remove(delete); } } for (KeyWithBytes put : source.getPuts()) { if (includeKeyPredicate.test(put.getKey())) { deletes.remove(put.getKey()); puts.put(put.getKey(), put); } } commitMeta.add(source.getMetadata()); } if (puts.isEmpty() && deletes.isEmpty()) { // Copied commit will not contain any operation, skip. return null; } ByteString newCommitMeta = rewriteMetadata.squash(commitMeta); CommitLogEntry targetHeadCommit = fetchFromCommitLog(ctx, toHead); int parentsPerCommit = config.getParentsPerCommit(); List parents = new ArrayList<>(parentsPerCommit); parents.add(toHead); long commitSeq; int keyListDistance; if (targetHeadCommit != null) { List p = targetHeadCommit.getParents(); parents.addAll(p.subList(0, Math.min(p.size(), parentsPerCommit - 1))); commitSeq = targetHeadCommit.getCommitSeq() + 1; keyListDistance = targetHeadCommit.getKeyListDistance(); } else { commitSeq = 1; keyListDistance = 0; } CommitLogEntry squashedCommit = buildIndividualCommit( ctx, timeInMicros, parents, commitSeq, newCommitMeta, puts.values(), deletes, keyListDistance, newKeyLists, h -> null, additionalParents); if (commitsToMergeChronological.size() == 1) { CommitLogEntry single = commitsToMergeChronological.get(0); if (squashedCommit.getHash().equals(single.getHash()) && squashedCommit.getPuts().equals(single.getPuts()) && squashedCommit.getDeletes().equals(single.getDeletes()) && squashedCommit.getMetadata().equals(single.getMetadata())) { // Fast-forward merge return single; } } writeIndividualCommit(ctx, squashedCommit); return squashedCommit; } /** For merge/transplant, applies the given commits onto the target-hash. */ protected Hash copyCommits( OP_CONTEXT ctx, long timeInMicros, Hash targetHead, List commitsChronological, Consumer newKeyLists, MetadataRewriter rewriteMetadata, Predicate includeKeyPredicate) throws ReferenceNotFoundException { int parentsPerCommit = config.getParentsPerCommit(); List parents = new ArrayList<>(parentsPerCommit); CommitLogEntry targetHeadCommit = fetchFromCommitLog(ctx, targetHead); long commitSeq; if (targetHeadCommit != null) { parents.addAll(targetHeadCommit.getParents()); commitSeq = targetHeadCommit.getCommitSeq() + 1; } else { commitSeq = 1L; } int keyListDistance = targetHeadCommit != null ? targetHeadCommit.getKeyListDistance() : 0; Map unwrittenCommits = new HashMap<>(); // Rewrite commits to transplant and store those in 'commitsToTransplantReverse' for (int i = commitsChronological.size() - 1; i >= 0; i--, commitSeq++) { CommitLogEntry sourceCommit = commitsChronological.get(i); List puts = sourceCommit.getPuts().stream() .filter(p -> includeKeyPredicate.test(p.getKey())) .collect(Collectors.toList()); List deletes = sourceCommit.getDeletes().stream() .filter(includeKeyPredicate) .collect(Collectors.toList()); if (puts.isEmpty() && deletes.isEmpty()) { // Copied commit will not contain any operation, skip. commitsChronological.remove(i); continue; } while (parents.size() > parentsPerCommit - 1) { parents.remove(parentsPerCommit - 1); } if (parents.isEmpty()) { parents.add(targetHead); } else { parents.add(0, targetHead); } ByteString updatedMetadata = rewriteMetadata.rewriteSingle(sourceCommit.getMetadata()); CommitLogEntry newEntry = buildIndividualCommit( ctx, timeInMicros, parents, commitSeq, updatedMetadata, puts, deletes, keyListDistance, newKeyLists, unwrittenCommits::get, emptyList()); keyListDistance = newEntry.getKeyListDistance(); unwrittenCommits.put(newEntry.getHash(), newEntry); if (!newEntry.getHash().equals(sourceCommit.getHash())) { commitsChronological.set(i, newEntry); } else { // Newly built CommitLogEntry is equal to the CommitLogEntry to transplant. // This can happen, if the commit to transplant has NO_ANCESTOR as its parent. commitsChronological.remove(i); } targetHead = newEntry.getHash(); } return targetHead; } /** * Verifies that the current global-states match the {@code expectedStates}, produces human * readable messages for the violations. */ protected void checkExpectedGlobalStates( OP_CONTEXT ctx, CommitParams commitParams, Consumer mismatches) throws ReferenceNotFoundException { Map> expectedStates = commitParams.getExpectedStates(); if (expectedStates.isEmpty()) { return; } Map globalStates = fetchGlobalStates(ctx, expectedStates.keySet()); for (Entry> expectedState : expectedStates.entrySet()) { ByteString currentState = globalStates.get(expectedState.getKey()); if (currentState == null) { if (expectedState.getValue().isPresent()) { mismatches.accept( String.format( "No current global-state for content-id '%s'.", expectedState.getKey())); } } else { if (!expectedState.getValue().isPresent()) { // This happens, when a table's being created on a branch, but that table already exists. mismatches.accept( String.format( "Global-state for content-id '%s' already exists.", expectedState.getKey())); } else if (!currentState.equals(expectedState.getValue().get())) { mismatches.accept( String.format( "Mismatch in global-state for content-id '%s'.", expectedState.getKey())); } } } } /** Load the refLog entry for the given hash, return {@code null}, if not found. */ protected final RefLog fetchFromRefLog(OP_CONTEXT ctx, Hash refLogId) { try (Traced ignore = trace("fetchFromRefLog").tag(TAG_HASH, refLogId != null ? refLogId.asString() : "HEAD")) { return doFetchFromRefLog(ctx, refLogId); } } protected abstract RefLog doFetchFromRefLog(OP_CONTEXT ctx, Hash refLogId); /** * Fetch multiple {@link RefLog refLog-entries} from the refLog. The returned list must have * exactly as many elements as in the parameter {@code hashes}. Non-existing hashes are returned * as {@code null}. */ protected final List fetchPageFromRefLog(OP_CONTEXT ctx, List hashes) { if (hashes.isEmpty()) { return emptyList(); } try (Traced ignore = trace("fetchPageFromRefLog") .tag(TAG_HASH, hashes.get(0).asString()) .tag(TAG_COUNT, hashes.size())) { return doFetchPageFromRefLog(ctx, hashes); } } protected abstract List doFetchPageFromRefLog(OP_CONTEXT ctx, List hashes); /** Reads from the refLog starting at the given refLog-hash. */ @MustBeClosed protected Stream readRefLogStream(OP_CONTEXT ctx, Hash initialHash) throws RefLogNotFoundException { Spliterator split = readRefLog(ctx, initialHash); return StreamSupport.stream(split, false); } protected abstract Spliterator readRefLog(OP_CONTEXT ctx, Hash initialHash) throws RefLogNotFoundException; protected void tryLoopStateCompletion(@Nonnull Boolean success, TryLoopState state) { tryLoopFinished( success ? "success" : "fail", state.getRetries(), state.getDuration(NANOSECONDS)); } protected void repositoryEvent(Supplier> eventBuilder) { if (eventConsumer != null && eventBuilder != null) { AdapterEvent event = eventBuilder.get().eventTimeMicros(config.currentTimeInMicros()).build(); try { eventConsumer.accept(event); } catch (RuntimeException e) { repositoryEventDeliveryFailed(event, e); } } } private static void repositoryEventDeliveryFailed(AdapterEvent event, RuntimeException e) { LOGGER.warn( "Repository event delivery failed for operation type {}", event.getOperationType(), e); Tracer t = GlobalTracer.get(); Span span = t.activeSpan(); Span log = span.log( ImmutableMap.of( Fields.EVENT, Tags.ERROR.getKey(), Fields.MESSAGE, "Repository event delivery failed", Fields.ERROR_OBJECT, e)); Tags.ERROR.set(log, true); } protected abstract void persistAttachments(OP_CONTEXT ctx, Stream attachments); }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy