All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.projectnessie.versioned.persist.adapter.spi.FetchValuesUsingOpenAddressing Maven / Gradle / Ivy

/*
 * Copyright (C) 2022 Dremio
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.projectnessie.versioned.persist.adapter.spi;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.function.Consumer;
import org.agrona.collections.Hashing;
import org.agrona.collections.Object2IntHashMap;
import org.projectnessie.model.ContentKey;
import org.projectnessie.versioned.Hash;
import org.projectnessie.versioned.persist.adapter.CommitLogEntry;
import org.projectnessie.versioned.persist.adapter.KeyListEntity;
import org.projectnessie.versioned.persist.adapter.KeyListEntry;

/**
 * Processes {@link CommitLogEntry#getKeyList() embedded} and {@link CommitLogEntry#getKeyListsIds()
 * external} key-lists written by {@link KeyListBuildState}.
 *
 * 

This implementation can process arbitrary numbers of segments, the number of segments does not * need to be a power of 2 here. */ final class FetchValuesUsingOpenAddressing { final int[] keyListSegmentOffsets; final int keyListCount; final int openAddressingMask; // The major dimension is sized to match the number of segments (including embedded) // The zeroth element corresponds with the embedded segment final KeyListEntry[][] keyListsArray; final Object2IntHashMap entityIdToSegment; final List keyListIds; FetchValuesUsingOpenAddressing(CommitLogEntry entry) { List keyListSegmentOffsetsList = entry.getKeyListEntityOffsets(); keyListSegmentOffsets = keyListSegmentOffsetsList != null ? keyListSegmentOffsetsList.stream().mapToInt(Integer::intValue).toArray() : new int[0]; keyListCount = 1 + keyListSegmentOffsets.length; keyListsArray = new KeyListEntry[keyListCount][]; keyListsArray[0] = entry.getKeyList().getKeys().toArray(new KeyListEntry[0]); entityIdToSegment = new Object2IntHashMap<>(keyListCount, Hashing.DEFAULT_LOAD_FACTOR, -1); openAddressingMask = entry.getKeyListBucketCount() - 1; keyListIds = entry.getKeyListsIds(); } /** Calculates the open-addressing bucket for a key. */ int bucketForKey(ContentKey key) { return key.hashCode() & openAddressingMask; } /** * Identifies the segment for a bucket. Segment 0 is the embedded key list, segment 1 is the first * key-list-entity. */ @VisibleForTesting int segmentForKey(int bucket, int round) { int binIdx = Arrays.binarySearch(keyListSegmentOffsets, bucket); int segment = binIdx >= 0 ? binIdx + 1 : -binIdx - 1; return (segment + round) % keyListsArray.length; } /** * Identify the {@link KeyListEntity}s that need to be fetched to get the {@link KeyListEntry}s * for {@code remainingKeys}. */ List entityIdsToFetch( int round, int prefetchEntities, Collection remainingKeys) { // prefetchEntities is user-configurable. Throw an exception if it is negative, otherwise clamp // the nonnegative value to the range [0, keyListCount-1] = [0, keyListArray.length-1]. Preconditions.checkArgument( 0 <= prefetchEntities, "Key-list segment prefetch parameter %s cannot be negative", prefetchEntities); prefetchEntities = Math.min(prefetchEntities, keyListCount - 1); // Identify the key-list segments to fetch List entitiesToFetch = new ArrayList<>(); for (ContentKey key : remainingKeys) { int keyBucket = bucketForKey(key); int segment = segmentForKey(keyBucket, round); for (int prefetch = 0; ; prefetch++) { if (segment > 0) { // Decrement accounts for the embedded key-list segment int entitySegment = segment - 1; Hash entityId = keyListIds.get(entitySegment); if (entityIdToSegment.put(entityId, segment) == -1) { entitiesToFetch.add(entityId); } } if (prefetch >= prefetchEntities) { break; } segment++; if (segment == keyListCount) { segment = 0; } } } return entitiesToFetch; } /** * Callback to memoize a loaded {@link KeyListEntity} so it can be used in {@link * #checkForKeys(int, Collection, Consumer)}. */ void entityLoaded(KeyListEntity keyListEntity) { Integer index = entityIdToSegment.get(keyListEntity.getId()); keyListsArray[index] = keyListEntity.getKeys().getKeys().toArray(new KeyListEntry[0]); } /** * Find the {@link KeyListEntry}s for the {@code remainingKeys}, return the {@link ContentKey}s * that could not be found yet, but are likely in the next segment. */ Collection checkForKeys( int round, Collection remainingKeys, Consumer resultConsumer) { List keysForNextRound = new ArrayList<>(); // If we've already examined every segment (implying a miss on a completely full hashmap), // then return an empty collection early. Note that the initial segment may need to be examined // twice when the key is moved in front of its natural position due to key collisions. This // is achieved by the `less than` comparison with `round`. On the first round the initial // segment is scanned from the natural key position. On the last round the initial segment is // scanned from index zero. This may incur some extra key comparisons if the segment is full and // the scanning overtakes the "natural" key position on the last round. However, this is // probably a rare case and not worth complicating the lookup logic to avoid it. In any case, // the double lookup in the initial segment does not cause extra I/O because the segment is // pre-fetched by the time it is reused. if (keyListsArray.length < round) { return keysForNextRound; } for (ContentKey key : remainingKeys) { int keyBucket = bucketForKey(key); int segment = segmentForKey(keyBucket, round); int offsetInSegment = 0; if (round == 0) { offsetInSegment = keyBucket; if (segment > 0) { offsetInSegment -= keyListSegmentOffsets[segment - 1]; } } KeyListEntry[] keys = keyListsArray[segment]; for (int i = offsetInSegment; ; i++) { KeyListEntry keyListEntry = i < keys.length ? keys[i] : null; if (keyListEntry == null) { // key _not_ found if (keys.length == 0 && segment == 0) { // ... however, the first (embedded) segment may be empty due to size restrictions. // This means its keys had to be moved to key list entities - keep searching. keysForNextRound.add(key); } break; } else if (keyListEntry.getKey().equals(key)) { resultConsumer.accept(keyListEntry); break; } else if (i == keys.length - 1) { keysForNextRound.add(key); break; } } } return keysForNextRound; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy