All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.apple.foundationdb.async.RangeSet Maven / Gradle / Ivy

There is a newer version: 2.8.110.0
Show newest version
/*
 * RangeSet.java
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2015-2018 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.apple.foundationdb.async;

import com.apple.foundationdb.annotation.API;
import com.apple.foundationdb.KeyValue;
import com.apple.foundationdb.Range;
import com.apple.foundationdb.ReadTransaction;
import com.apple.foundationdb.ReadTransactionContext;
import com.apple.foundationdb.TransactionContext;
import com.apple.foundationdb.subspace.Subspace;
import com.apple.foundationdb.tuple.ByteArrayUtil;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;

/**
 * RangeSet supports efficient adding of ranges of keys into the database to support marking
 * work done elsewhere as completed as well as checking if specific keys are already completed.
 *
 * 

* This is useful if one is going to be doing work that will carve out pieces from another * subspace and work on those separately. The methods in here will allow for a (more-or-less) * append only set that can be used to keep track of the progress that that job is making. *

*/ @API(API.Status.MAINTAINED) public class RangeSet { @Nonnull private Subspace subspace; @Nonnull private static final byte[] FIRST_KEY = new byte[]{(byte)0x00}; @Nonnull private static final byte[] FINAL_KEY = new byte[]{(byte)0xff}; /** * Value indicating that there should be no limit. This should * be passed to {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[], int) missingRanges} * to indicate that the read should not limit the number of results it returns. */ public static final int UNLIMITED = Integer.MAX_VALUE; /** * Creates a new RangeSet that will write its data to the given subspace provided. * The contents of this subspace should either be empty or contain the data * used by another RangeSet object. * @param subspace the subspace in which to write data */ public RangeSet(@Nonnull Subspace subspace) { this.subspace = subspace; } private static void checkKey(@Nonnull byte[] key) { if (key.length == 0 || ByteArrayUtil.compareUnsigned(key, FINAL_KEY) >= 0) { // NOTE: Perhaps this should instead return a completable future completed in exceptional state... throw new IllegalArgumentException("Key " + ByteArrayUtil.printable(key) + " outside of accepted key range of [\\x00,\\xff)"); } } private static void checkRange(@Nonnull byte[] begin, @Nonnull byte[] end) { if (ByteArrayUtil.compareUnsigned(begin, end) > 0) { throw new IllegalArgumentException("Inverted range; " + ByteArrayUtil.printable(begin) + " is greater than " + ByteArrayUtil.printable(end)); } } // This returns the next possible key after another key (i.e., a key that is greater than current key but // every key greater than this key will be greater than or equal to the returned key). @Nonnull private byte[] keyAfter(@Nonnull byte[] key) { byte[] ret = new byte[key.length + 1]; System.arraycopy(key, 0, ret, 0, key.length); ret[key.length] = (byte)0; return ret; } /** * Determines if a single key is contained within the range set. If it is, this will return * true, and if it is not, it will return false. In terms of isolation, this adds a read- * conflict to the key corresponding to the key being checked but to nothing else even * though it has to do a range read that might be larger. This means that updates to keys * before this won't conflict unless they actually change whether this key is contained within * the range set. * @param tc transaction or database in which to run operation * @param key the key to check presence in set * @return a future that contains whether some range in the set contains the key */ @Nonnull public CompletableFuture contains(@Nonnull TransactionContext tc, @Nonnull byte[] key) { checkKey(key); return tc.runAsync(tr -> { // Add a read conflict to only the key being checked so that if this gets // overwritten somewhere else, this causes a conflict. byte[] frobnicated = subspace.pack(key); tr.addReadConflictKey(frobnicated); AsyncIterator iterator = tr.snapshot().getRange(subspace.range().begin, keyAfter(frobnicated), 1, true).iterator(); return iterator.onHasNext().thenApply(hasNext -> { if (!hasNext) { return false; } else { byte[] endRange = iterator.next().getValue(); return ByteArrayUtil.compareUnsigned(key, endRange) < 0; } }); }); } /** * Inserts a range into the set. This behaves the same way as the four-parameter version of * {@link RangeSet#insertRange(TransactionContext, byte[], byte[], boolean) RangeSet.insertRange} (including conflict * settings), but it gets its begin and end from the given {@link Range} object and assumes that * requiresEmpty is false, i.e., it is okay for there already to be data within the * given range. * * @param tc the transaction or database in which to operate * @param r the range to add to the set * @return a future that is true if there were any modifications to the database and false otherwise */ @Nonnull public CompletableFuture insertRange(@Nonnull TransactionContext tc, @Nonnull Range r) { return insertRange(tc, r.begin, r.end); } /** * Inserts a range into the set. This behaves the same way as the four-parmater version of * {@link RangeSet#insertRange(TransactionContext, byte[], byte[], boolean) RangeSet.insertRange} (including conflict * settings), but it gets its begin and end from the given {@link Range} object. * * @param tc the transaction or database in which to operate * @param r the range to add to the set * @param requireEmpty whether this should only be added if this range is initally empty * @return a future that is true if there were any modifications to the database and false otherwise */ @Nonnull public CompletableFuture insertRange(@Nonnull TransactionContext tc, @Nonnull Range r, boolean requireEmpty) { return insertRange(tc, r.begin, r.end, requireEmpty); } /** * Inserts a range into the set. This behaves the same way as the four-parameter version of * {@link RangeSet#insertRange(TransactionContext, byte[], byte[], boolean) RangeSet.insertRange} (including conflict * settings), but it assumes that requiresEmpty is false, i.e., it is okay for * there already to be data within the given range. * * @param tc the transaction or database in which to operate * @param begin the (inclusive) beginning of the range to add * @param end the (exclusive) end of the range to add * @return a future that is true if there were any modifications to the database and false otherwise */ @Nonnull public CompletableFuture insertRange(@Nonnull TransactionContext tc, @Nullable byte[] begin, @Nullable byte[] end) { return insertRange(tc, begin, end, false); } /** * Inserts a range into the set. The range inserted will begin at begin (inclusive) and end at * end (exclusive). If the requireEmpty is set, then this will only actually change the * database in the case that the range being added is not yet included in the set. If this flag is set to * false, then this will "fill in the gaps" between ranges present so that the whole range is * present following this transactions operation. The return value will (when ready) be equal to true * if and only if there are changes (i.e., writes) to the database that need to be made, i.e., the range was not * already included in the set. If the initial end point is less than the begin point, then this will * throw an {@link IllegalArgumentException} indicating that one has passed an inverted range. If begin * and end are equal, then this will immediately return a future that is set to false * (corresponding to adding an empty range). If null is set for either endpoint, this will insert * a range all the way to the end of the total range. * *

* In terms of isolation, this method will add both read- and write-conflict ranges. It adds a read-conflict range * corresponding to the range being added, i.e., for the keys within the range from begin to end. * This is so that if this range is modified concurrently by another writer, this transaction will fail (as the exact * writes done depend on these keys not being modified.) It will also a write-conflict ranges corresponding * to all of the individual ranges added to the database. That means that if the range is initially empty, * a write-conflict range corresponding to the keys from begin to end. This is done * so that if another transaction checks to see if a key in the range we are writing is within the range set * and finds that it is not, this write will then cause that transaction to fail if it is committed after this * one. If the range is not empty initially, write conflict ranges are added for all of the "gaps" that have * to be added. (So, if the range is already full, then no write conflict ranges are added at all.) *

* * @param tc the transaction or database in which to operate * @param begin the (inclusive) beginning of the range to add * @param end the (exclusive) end of the range to add * @param requireEmpty whether this should only be added if this range is initially empty * @return a future that is true if there were any modifications to the database and false otherwise */ @Nonnull public CompletableFuture insertRange(@Nonnull TransactionContext tc, @Nullable byte[] begin, @Nullable byte[] end, boolean requireEmpty) { byte[] beginNonNull = (begin == null) ? FIRST_KEY : begin; byte[] endNonNull = (end == null) ? FINAL_KEY : end; checkKey(beginNonNull); checkRange(beginNonNull, endNonNull); if (ByteArrayUtil.compareUnsigned(beginNonNull, endNonNull) == 0) { return AsyncUtil.READY_FALSE; } return tc.runAsync(tr -> { // Add a read range for the keys corresponding to the bounds of this range. byte[] frobnicatedBegin = subspace.pack(beginNonNull); byte[] frobnicatedEnd = subspace.pack(endNonNull); tr.addReadConflictRange(frobnicatedBegin, frobnicatedEnd); // Look to see what is already in this database to see what of this range is already present. // Note: the two range reads are done in parallel, which essentially means we get the before read // "for free". byte[] keyAfterBegin = keyAfter(frobnicatedBegin); ReadTransaction snapshot = tr.snapshot(); AsyncIterator beforeIterator = snapshot.getRange(subspace.range().begin, keyAfterBegin, 1, true).iterator(); AsyncIterator afterIterator = snapshot.getRange(keyAfterBegin, frobnicatedEnd, (requireEmpty ? 1 : ReadTransaction.ROW_LIMIT_UNLIMITED), false).iterator(); return beforeIterator.onHasNext().thenCompose(hasBefore -> { AtomicReference lastSeen = new AtomicReference<>(frobnicatedBegin); KeyValue before = hasBefore ? beforeIterator.next() : null; // If the before key is in some range, we don't have to update from before to the // end of that range. if (hasBefore) { byte[] beforeEnd = before.getValue(); if (ByteArrayUtil.compareUnsigned(beginNonNull, beforeEnd) < 0) { if (requireEmpty) { return AsyncUtil.READY_FALSE; } else { lastSeen.set(subspace.pack(beforeEnd)); } } } if (requireEmpty) { // If we will only add on the empty case, then the after iterator has to be empty. return afterIterator.onHasNext().thenApply(hasNext -> { if (hasNext) { return false; } else { if (before != null && ByteArrayUtil.compareUnsigned(beginNonNull, before.getValue()) == 0) { // This consolidation is done to make the simple case of a single writer // going forward more space compact. tr.addReadConflictKey(before.getKey()); tr.set(before.getKey(), endNonNull); } else { tr.set(frobnicatedBegin, endNonNull); } tr.addWriteConflictRange(frobnicatedBegin, frobnicatedEnd); return true; } }); } else { AtomicBoolean changed = new AtomicBoolean(false); // If we are allowing non-empty ranges, then we just need to fill in the gaps. return AsyncUtil.whileTrue(() -> { byte[] lastSeenBytes = lastSeen.get(); if (MoreAsyncUtil.isCompletedNormally(afterIterator.onHasNext()) && afterIterator.hasNext()) { KeyValue kv = afterIterator.next(); if (ByteArrayUtil.compareUnsigned(lastSeenBytes, kv.getKey()) < 0) { tr.set(lastSeenBytes, subspace.unpack(kv.getKey()).getBytes(0)); tr.addWriteConflictRange(lastSeenBytes, kv.getKey()); changed.set(true); } lastSeen.set(subspace.pack(kv.getValue())); } return afterIterator.onHasNext(); }, tc.getExecutor()).thenApply(vignore -> { byte[] lastSeenBytes = lastSeen.get(); // Get from lastSeen to the end (the last gap). if (ByteArrayUtil.compareUnsigned(lastSeenBytes, frobnicatedEnd) < 0) { tr.set(lastSeenBytes, endNonNull); tr.addWriteConflictRange(lastSeenBytes, frobnicatedEnd); changed.set(true); } return changed.get(); }); } }); }); } /** * Returns all of the ranges that are missing within this set as list. See the three-parameter * version of {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[]) RangeSet.missingRanges} * for more details, but this will look from the beginning of the valid keys within this set to * the end and find any gaps between ranges that need to be filled. * * @param tc transaction that will be used to access the database * @return an iterable that will produce all of the missing ranges */ @Nonnull public CompletableFuture> missingRanges(@Nonnull ReadTransactionContext tc) { return tc.readAsync(tr -> { AsyncIterable ranges = missingRanges(tr); return ranges.asList(); }); } /** * Returns all of the ranges that are missing within this set. See the three-parameter * version of {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[]) RangeSet.missingRanges} * for more details, but this will look from the beginning of the valid keys within this set to * the end and find any gaps between ranges that need to be filled. * * @param tr transaction that will be used to access the database * @return an iterable that will produce all of the missing ranges */ @Nonnull public AsyncIterable missingRanges(@Nonnull ReadTransaction tr) { return missingRanges(tr, null, null); } /** * Returns all of the ranges that are missing within a given range as a list. See the four-parameter * version of {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[], int) RangeSet.missingRanges} * for more details, but this will look for ranges that aren't already within the set. * * @param tc transaction that will be used to access the database * @param superRange the range within to search for additional ranges * @return an iterable that will produce all of the missing ranges */ @Nonnull public CompletableFuture> missingRanges(@Nonnull ReadTransactionContext tc, @Nonnull Range superRange) { return tc.readAsync(tr -> { AsyncIterable ranges = missingRanges(tr, superRange); return ranges.asList(); }); } /** * Returns all of the ranges that are missing within a given range. See the four-parameter * version of {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[], int) RangeSet.missingRanges} * for more details, but this will look for ranges that aren't already within the set. * * @param tr transaction that will be used to access the database * @param superRange the range within to search for additional ranges * @return an iterable that will produce all of the missing ranges */ @Nonnull public AsyncIterable missingRanges(@Nonnull ReadTransaction tr, @Nonnull Range superRange) { return missingRanges(tr, superRange.begin, superRange.end); } /** * Returns all of the ranges that are missing within a given set of bounds as a list. See the four-parameter * version of {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[], int) RangeSet.missingRanges} * for more details, but this will look for ranges that aren't already within the set. * * @param tc transaction that will be used to access the database * @param begin the beginning (inclusive) of the range to look for gaps * @param end the end (inclusive) of the range to look for gaps * @return an iterable that will produce all of the missing ranges */ @Nonnull public CompletableFuture> missingRanges(@Nonnull ReadTransactionContext tc, @Nullable byte[] begin, @Nullable byte[] end) { return tc.readAsync(tr -> { AsyncIterable ranges = missingRanges(tr, begin, end); return ranges.asList(); }); } /** * Returns all of the ranges that are missing within a given set of bounds as a list. See the four-parameter * version of {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[], int) RangeSet.missingRanges} * for more details, but this will look for ranges that aren't already within the set. It will not * limit the number of results that it will return. * * @param tr transaction that will be used to access the database * @param begin the beginning (inclusive) of the range to look for gaps * @param end the end (inclusive) of the range to look for gaps * @return an iterable that will produce all of the missing ranges */ @Nonnull public AsyncIterable missingRanges(@Nonnull ReadTransaction tr, @Nullable byte[] begin, @Nullable byte[] end) { return missingRanges(tr, begin, end, Integer.MAX_VALUE); } /** * Returns all of the ranges that are missing within a given set of bounds as a list. See the four-parameter * version of {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[], int) RangeSet.missingRanges} * for more details, but this will look for ranges that aren't already within the set. It will not * limit the number of results that it will return. * * @param tc transaction that will be used to access the database * @param begin the beginning (inclusive) of the range to look for gaps * @param end the end (inclusive) of the range to look for gaps * @param limit the maximum number of results to return * @return an iterable that will produce all of the missing ranges */ @Nonnull public CompletableFuture> missingRanges(@Nonnull ReadTransactionContext tc, @Nullable byte[] begin, @Nullable byte[] end, int limit) { return tc.readAsync(tr -> { AsyncIterable ranges = missingRanges(tr, begin, end, limit); return ranges.asList(); }); } /** * Returns all of the ranges that are missing within a given set of bounds. In particular, this will look * for "gaps" in the key-value pairs between begin (inclusive) and end (exclusive) so that at the end, we * know what is missing. This takes in a read transaction (which could, theoretically, be a snapshot read * if we so desired). If this transaction is committed before the iterator is cancelled or completes, * this can cause problems. * * @param tr transaction that will be used to access the database * @param begin the beginning (inclusive) of the range to look for gaps * @param end the end (inclusive) of the range to look for gaps * @param limit the maximum number of results to return * @return an iterable that will produce all of the missing ranges */ @Nonnull public AsyncIterable missingRanges(@Nonnull ReadTransaction tr, @Nullable byte[] begin, @Nullable byte[] end, int limit) { byte[] beginNonNull = (begin == null) ? FIRST_KEY : begin; byte[] endNonNull = (end == null) ? FINAL_KEY : end; checkKey(beginNonNull); checkRange(beginNonNull, endNonNull); // Return an AsyncIterable with the pertinent information. return new AsyncIterable() { @Override public AsyncIterator iterator() { return new MissingRangeIterator(tr, beginNonNull, endNonNull, limit); } @Override public CompletableFuture> asList() { return AsyncUtil.collect(this); } }; } // Iterator that computes the missing ranges. It will go through and find gaps within the // range. It will stop after the limit has been acheived unless the limit is set // to UNLIMITED. private class MissingRangeIterator implements CloseableAsyncIterator { @Nonnull private final byte[] endNonNull; @Nonnull private AsyncIterator before; @Nonnull private AsyncIterator after; @Nonnull private byte[] currBegin; @Nullable private Range next; private boolean found; private int limit; private int numFound; private final Executor executor; @Nonnull private CompletableFuture nextFuture; public MissingRangeIterator(@Nonnull ReadTransaction tr, @Nonnull byte[] beginNonNull, @Nonnull byte[] endNonNull, int limit) { this.endNonNull = endNonNull; this.numFound = 0; this.limit = limit; byte[] frobnicatedBegin = subspace.pack(beginNonNull); byte[] frobnicatedEnd = subspace.pack(endNonNull); before = tr.getRange(subspace.range().begin, keyAfter(frobnicatedBegin), 1, true).iterator(); after = tr.getRange(keyAfter(frobnicatedBegin), frobnicatedEnd).iterator(); next = null; currBegin = beginNonNull; found = false; executor = tr.getExecutor(); nextFuture = before.onHasNext().thenAccept(hasBefore -> { if (hasBefore) { byte[] lastEnd = before.next().getValue(); //subspace.unpack(before.next().getValue()).getBytes(0); if (ByteArrayUtil.compareUnsigned(beginNonNull, lastEnd) < 0) { currBegin = lastEnd; } } }).thenCompose(vignore -> getNext()); } private CompletableFuture getNext() { return AsyncUtil.whileTrue(() -> { if (MoreAsyncUtil.isCompletedNormally(after.onHasNext())) { if (after.hasNext()) { KeyValue kv = after.next(); byte[] currBeg = currBegin; byte[] nextBeg = subspace.unpack(kv.getKey()).getBytes(0); if (ByteArrayUtil.compareUnsigned(currBeg, nextBeg) < 0) { next = new Range(currBeg, nextBeg); found = true; } currBegin = kv.getValue(); if (found) { return AsyncUtil.READY_FALSE; // stop looping. } else { return after.onHasNext(); } } else { return AsyncUtil.READY_FALSE; } } else { return after.onHasNext(); } }, executor).thenApply(vignore -> { if (found) { numFound += 1; return true; } else { if (ByteArrayUtil.compareUnsigned(currBegin, endNonNull) < 0) { next = new Range(currBegin, endNonNull); currBegin = endNonNull; return true; } else { close(); return false; } } }); } @Override public CompletableFuture onHasNext() { return nextFuture; } @Override public boolean hasNext() { return nextFuture.join(); } @Override public Range next() { if (!hasNext()) { throw new NoSuchElementException("Attempted to get next missing range when none were present"); } Range ret = next; found = false; if (limit == UNLIMITED || numFound < limit) { nextFuture = getNext(); } else { close(); nextFuture = AsyncUtil.READY_FALSE; } return ret; } @Override public void close() { MoreAsyncUtil.closeIterator(before); MoreAsyncUtil.closeIterator(after); } } /** * Clears the subspace used by this RangeSet instance. This will delete the records of any * data used by this set. * @param tc transaction or database in which to run operation * @return a future that is completed when the range has been cleared */ @Nonnull public CompletableFuture clear(@Nonnull TransactionContext tc) { return tc.runAsync(tr -> { tr.clear(subspace.range()); return AsyncUtil.DONE; }); } @Nonnull public CompletableFuture rep(@Nonnull ReadTransactionContext tc) { return tc.readAsync(tr -> { StringBuilder sb = new StringBuilder(); AsyncIterable iterable = tr.getRange(subspace.range()); return iterable.asList().thenApply((List list) -> { for (KeyValue kv : list) { byte[] key = subspace.unpack(kv.getKey()).getBytes(0); byte[] value = kv.getValue(); sb.append(ByteArrayUtil.printable(key)); sb.append(" -> "); sb.append(ByteArrayUtil.printable(value)); sb.append('\n'); } return sb.toString(); }); }); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy