com.apple.foundationdb.async.RangeSet Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of fdb-extensions Show documentation
Extensions to the FoundationDB Java API.
There is a newer version: 2.8.110.0
/*
 * RangeSet.java
 *
 * This source file is part of the FoundationDB open source project
 *
 * Copyright 2015-2018 Apple Inc. and the FoundationDB project authors
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.apple.foundationdb.async;

import com.apple.foundationdb.annotation.API;
import com.apple.foundationdb.KeyValue;
import com.apple.foundationdb.Range;
import com.apple.foundationdb.ReadTransaction;
import com.apple.foundationdb.ReadTransactionContext;
import com.apple.foundationdb.TransactionContext;
import com.apple.foundationdb.subspace.Subspace;
import com.apple.foundationdb.tuple.ByteArrayUtil;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.Executor;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;

/**
 * RangeSet supports efficient adding of ranges of keys into the database to support marking
 * work done elsewhere as completed as well as checking if specific keys are already completed.
 *
 * 
 * This is useful if one is going to be doing work that will carve out pieces from another
 * subspace and work on those separately. The methods in here will allow for a (more-or-less)
 * append only set that can be used to keep track of the progress that that job is making.
 * 
 */
@API(API.Status.MAINTAINED)
public class RangeSet {
    @Nonnull private Subspace subspace;
    @Nonnull private static final byte[] FIRST_KEY = new byte[]{(byte)0x00};
    @Nonnull private static final byte[] FINAL_KEY = new byte[]{(byte)0xff};

    /**
     * Value indicating that there should be no limit. This should
     * be passed to {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[], int) missingRanges}
     * to indicate that the read should not limit the number of results it returns.
     */
    public static final int UNLIMITED = Integer.MAX_VALUE;

    /**
     * Creates a new RangeSet that will write its data to the given subspace provided.
     * The contents of this subspace should either be empty or contain the data
     * used by another RangeSet object.
     * @param subspace the subspace in which to write data
     */
    public RangeSet(@Nonnull Subspace subspace) {
        this.subspace = subspace;
    }

    private static void checkKey(@Nonnull byte[] key) {
        if (key.length == 0 || ByteArrayUtil.compareUnsigned(key, FINAL_KEY) >= 0) {
            // NOTE: Perhaps this should instead return a completable future completed in exceptional state...
            throw new IllegalArgumentException("Key " + ByteArrayUtil.printable(key) + " outside of accepted key range of [\\x00,\\xff)");
        }
    }

    private static void checkRange(@Nonnull byte[] begin, @Nonnull byte[] end) {
        if (ByteArrayUtil.compareUnsigned(begin, end) > 0) {
            throw new IllegalArgumentException("Inverted range; " + ByteArrayUtil.printable(begin) + " is greater than " + ByteArrayUtil.printable(end));
        }
    }

    // This returns the next possible key after another key (i.e., a key that is greater than current key but
    // every key greater than this key will be greater than or equal to the returned key).
    @Nonnull
    private byte[] keyAfter(@Nonnull byte[] key) {
        byte[] ret = new byte[key.length + 1];
        System.arraycopy(key, 0, ret, 0, key.length);
        ret[key.length] = (byte)0;
        return ret;
    }

    /**
     * Determines if a single key is contained within the range set. If it is, this will return
     * true, and if it is not, it will return false. In terms of isolation, this adds a read-
     * conflict to the key corresponding to the key being checked but to nothing else even
     * though it has to do a range read that might be larger. This means that updates to keys
     * before this won't conflict unless they actually change whether this key is contained within
     * the range set.
     * @param tc transaction or database in which to run operation
     * @param key the key to check presence in set
     * @return a future that contains whether some range in the set contains the key
     */
    @Nonnull
    public CompletableFuture contains(@Nonnull TransactionContext tc, @Nonnull byte[] key) {
        checkKey(key);
        return tc.runAsync(tr -> {
            // Add a read conflict to only the key being checked so that if this gets
            // overwritten somewhere else, this causes a conflict.
            byte[] frobnicated = subspace.pack(key);
            tr.addReadConflictKey(frobnicated);
            AsyncIterator iterator = tr.snapshot().getRange(subspace.range().begin, keyAfter(frobnicated), 1, true).iterator();
            return iterator.onHasNext().thenApply(hasNext -> {
                if (!hasNext) {
                    return false;
                } else {
                    byte[] endRange = iterator.next().getValue();
                    return ByteArrayUtil.compareUnsigned(key, endRange) < 0;
                }
            });
        });
    }

    /**
     * Inserts a range into the set. This behaves the same way as the four-parameter version of
     * {@link RangeSet#insertRange(TransactionContext, byte[], byte[], boolean) RangeSet.insertRange} (including conflict
     * settings), but it gets its begin and end from the given {@link Range} object and assumes that
     * requiresEmpty is false, i.e., it is okay for there already to be data within the
     * given range.
     *
     * @param tc the transaction or database in which to operate
     * @param r the range to add to the set
     * @return a future that is true if there were any modifications to the database and false otherwise
     */
    @Nonnull
    public CompletableFuture insertRange(@Nonnull TransactionContext tc, @Nonnull Range r) {
        return insertRange(tc, r.begin, r.end);
    }

    /**
     * Inserts a range into the set. This behaves the same way as the four-parmater version of
     * {@link RangeSet#insertRange(TransactionContext, byte[], byte[], boolean) RangeSet.insertRange} (including conflict
     * settings), but it gets its begin and end from the given {@link Range} object.
     *
     * @param tc the transaction or database in which to operate
     * @param r the range to add to the set
     * @param requireEmpty whether this should only be added if this range is initally empty
     * @return a future that is true if there were any modifications to the database and false otherwise
     */
    @Nonnull
    public CompletableFuture insertRange(@Nonnull TransactionContext tc, @Nonnull Range r, boolean requireEmpty) {
        return insertRange(tc, r.begin, r.end, requireEmpty);
    }

    /**
     * Inserts a range into the set. This behaves the same way as the four-parameter version of
     * {@link RangeSet#insertRange(TransactionContext, byte[], byte[], boolean) RangeSet.insertRange} (including conflict
     * settings), but it assumes that requiresEmpty is false, i.e., it is okay for
     * there already to be data within the given range.
     *
     * @param tc the transaction or database in which to operate
     * @param begin the (inclusive) beginning of the range to add
     * @param end the (exclusive) end of the range to add
     * @return a future that is true if there were any modifications to the database and false otherwise
     */
    @Nonnull
    public CompletableFuture insertRange(@Nonnull TransactionContext tc, @Nullable byte[] begin, @Nullable byte[] end) {
        return insertRange(tc, begin, end, false);
    }

    /**
     * Inserts a range into the set. The range inserted will begin at begin (inclusive) and end at
     * end (exclusive). If the requireEmpty is set, then this will only actually change the
     * database in the case that the range being added is not yet included in the set. If this flag is set to
     * false, then this will "fill in the gaps" between ranges present so that the whole range is
     * present following this transactions operation. The return value will (when ready) be equal to true
     * if and only if there are changes (i.e., writes) to the database that need to be made, i.e., the range was not
     * already included in the set. If the initial end point is less than the begin point, then this will
     * throw an {@link IllegalArgumentException} indicating that one has passed an inverted range. If begin
     * and end are equal, then this will immediately return a future that is set to false
     * (corresponding to adding an empty range). If null is set for either endpoint, this will insert
     * a range all the way to the end of the total range.
     *
     * 
     * In terms of isolation, this method will add both read- and write-conflict ranges. It adds a read-conflict range
     * corresponding to the range being added, i.e., for the keys within the range from begin to end.
     * This is so that if this range is modified concurrently by another writer, this transaction will fail (as the exact
     * writes done depend on these keys not being modified.) It will also a write-conflict ranges corresponding
     * to all of the individual ranges added to the database. That means that if the range is initially empty,
     * a write-conflict range corresponding to the keys from begin to end. This is done
     * so that if another transaction checks to see if a key in the range we are writing is within the range set
     * and finds that it is not, this write will then cause that transaction to fail if it is committed after this
     * one. If the range is not empty initially, write conflict ranges are added for all of the "gaps" that have
     * to be added. (So, if the range is already full, then no write conflict ranges are added at all.)
     * 
     *
     * @param tc the transaction or database in which to operate
     * @param begin the (inclusive) beginning of the range to add
     * @param end the (exclusive) end of the range to add
     * @param requireEmpty whether this should only be added if this range is initially empty
     * @return a future that is true if there were any modifications to the database and false otherwise
     */
    @Nonnull
    public CompletableFuture insertRange(@Nonnull TransactionContext tc, @Nullable byte[] begin, @Nullable byte[] end, boolean requireEmpty) {
        byte[] beginNonNull = (begin == null) ? FIRST_KEY : begin;
        byte[] endNonNull = (end == null) ? FINAL_KEY : end;
        checkKey(beginNonNull);
        checkRange(beginNonNull, endNonNull);

        if (ByteArrayUtil.compareUnsigned(beginNonNull, endNonNull) == 0) {
            return AsyncUtil.READY_FALSE;
        }

        return tc.runAsync(tr -> {
            // Add a read range for the keys corresponding to the bounds of this range.
            byte[] frobnicatedBegin = subspace.pack(beginNonNull);
            byte[] frobnicatedEnd = subspace.pack(endNonNull);
            tr.addReadConflictRange(frobnicatedBegin, frobnicatedEnd);

            // Look to see what is already in this database to see what of this range is already present.
            // Note: the two range reads are done in parallel, which essentially means we get the before read
            // "for free".
            byte[] keyAfterBegin = keyAfter(frobnicatedBegin);
            ReadTransaction snapshot = tr.snapshot();
            AsyncIterator beforeIterator = snapshot.getRange(subspace.range().begin, keyAfterBegin, 1, true).iterator();
            AsyncIterator afterIterator = snapshot.getRange(keyAfterBegin, frobnicatedEnd,
                    (requireEmpty ? 1 : ReadTransaction.ROW_LIMIT_UNLIMITED), false).iterator();

            return beforeIterator.onHasNext().thenCompose(hasBefore -> {
                AtomicReference lastSeen = new AtomicReference<>(frobnicatedBegin);
                KeyValue before = hasBefore ? beforeIterator.next() : null;

                // If the before key is in some range, we don't have to update from before to the
                // end of that range.
                if (hasBefore) {
                    byte[] beforeEnd = before.getValue();
                    if (ByteArrayUtil.compareUnsigned(beginNonNull, beforeEnd) < 0) {
                        if (requireEmpty) {
                            return AsyncUtil.READY_FALSE;
                        } else {
                            lastSeen.set(subspace.pack(beforeEnd));
                        }
                    }
                }

                if (requireEmpty) {
                    // If we will only add on the empty case, then the after iterator has to be empty.
                    return afterIterator.onHasNext().thenApply(hasNext -> {
                        if (hasNext) {
                            return false;
                        } else {
                            if (before != null && ByteArrayUtil.compareUnsigned(beginNonNull, before.getValue()) == 0) {
                                // This consolidation is done to make the simple case of a single writer
                                // going forward more space compact.
                                tr.addReadConflictKey(before.getKey());
                                tr.set(before.getKey(), endNonNull);
                            } else {
                                tr.set(frobnicatedBegin, endNonNull);
                            }
                            tr.addWriteConflictRange(frobnicatedBegin, frobnicatedEnd);
                            return true;
                        }
                    });
                } else {
                    AtomicBoolean changed = new AtomicBoolean(false);
                    // If we are allowing non-empty ranges, then we just need to fill in the gaps.
                    return AsyncUtil.whileTrue(() -> {
                        byte[] lastSeenBytes = lastSeen.get();
                        if (MoreAsyncUtil.isCompletedNormally(afterIterator.onHasNext()) && afterIterator.hasNext()) {
                            KeyValue kv = afterIterator.next();
                            if (ByteArrayUtil.compareUnsigned(lastSeenBytes, kv.getKey()) < 0) {
                                tr.set(lastSeenBytes, subspace.unpack(kv.getKey()).getBytes(0));
                                tr.addWriteConflictRange(lastSeenBytes, kv.getKey());
                                changed.set(true);
                            }
                            lastSeen.set(subspace.pack(kv.getValue()));
                        }
                        return afterIterator.onHasNext();
                    }, tc.getExecutor()).thenApply(vignore -> {
                        byte[] lastSeenBytes = lastSeen.get();
                        // Get from lastSeen to the end (the last gap).
                        if (ByteArrayUtil.compareUnsigned(lastSeenBytes, frobnicatedEnd) < 0) {
                            tr.set(lastSeenBytes, endNonNull);
                            tr.addWriteConflictRange(lastSeenBytes, frobnicatedEnd);
                            changed.set(true);
                        }
                        return changed.get();
                    });
                }
            });
        });
    }

    /**
     * Returns all of the ranges that are missing within this set as list. See the three-parameter
     * version of {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[]) RangeSet.missingRanges}
     * for more details, but this will look from the beginning of the valid keys within this set to
     * the end and find any gaps between ranges that need to be filled.
     *
     * @param tc transaction that will be used to access the database
     * @return an iterable that will produce all of the missing ranges
     */
    @Nonnull
    public CompletableFuture> missingRanges(@Nonnull ReadTransactionContext tc) {
        return tc.readAsync(tr -> {
            AsyncIterable ranges = missingRanges(tr);
            return ranges.asList();
        });
    }

    /**
     * Returns all of the ranges that are missing within this set. See the three-parameter
     * version of {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[]) RangeSet.missingRanges}
     * for more details, but this will look from the beginning of the valid keys within this set to
     * the end and find any gaps between ranges that need to be filled.
     *
     * @param tr transaction that will be used to access the database
     * @return an iterable that will produce all of the missing ranges
     */
    @Nonnull
    public AsyncIterable missingRanges(@Nonnull ReadTransaction tr) {
        return missingRanges(tr, null, null);
    }

    /**
     * Returns all of the ranges that are missing within a given range as a list. See the four-parameter
     * version of {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[], int) RangeSet.missingRanges}
     * for more details, but this will look for ranges that aren't already within the set.
     *
     * @param tc transaction that will be used to access the database
     * @param superRange the range within to search for additional ranges
     * @return an iterable that will produce all of the missing ranges
     */
    @Nonnull
    public CompletableFuture> missingRanges(@Nonnull ReadTransactionContext tc, @Nonnull Range superRange) {
        return tc.readAsync(tr -> {
            AsyncIterable ranges = missingRanges(tr, superRange);
            return ranges.asList();
        });
    }

    /**
     * Returns all of the ranges that are missing within a given range. See the four-parameter
     * version of {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[], int) RangeSet.missingRanges}
     * for more details, but this will look for ranges that aren't already within the set.
     *
     * @param tr transaction that will be used to access the database
     * @param superRange the range within to search for additional ranges
     * @return an iterable that will produce all of the missing ranges
     */
    @Nonnull
    public AsyncIterable missingRanges(@Nonnull ReadTransaction tr, @Nonnull Range superRange) {
        return missingRanges(tr, superRange.begin, superRange.end);
    }

    /**
     * Returns all of the ranges that are missing within a given set of bounds as a list. See the four-parameter
     * version of {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[], int) RangeSet.missingRanges}
     * for more details, but this will look for ranges that aren't already within the set.
     *
     * @param tc transaction that will be used to access the database
     * @param begin the beginning (inclusive) of the range to look for gaps
     * @param end the end (inclusive) of the range to look for gaps
     * @return an iterable that will produce all of the missing ranges
     */
    @Nonnull
    public CompletableFuture> missingRanges(@Nonnull ReadTransactionContext tc, @Nullable byte[] begin, @Nullable byte[] end) {
        return tc.readAsync(tr -> {
            AsyncIterable ranges = missingRanges(tr, begin, end);
            return ranges.asList();
        });
    }

    /**
     * Returns all of the ranges that are missing within a given set of bounds as a list. See the four-parameter
     * version of {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[], int) RangeSet.missingRanges}
     * for more details, but this will look for ranges that aren't already within the set. It will not
     * limit the number of results that it will return.
     *
     * @param tr transaction that will be used to access the database
     * @param begin the beginning (inclusive) of the range to look for gaps
     * @param end the end (inclusive) of the range to look for gaps
     * @return an iterable that will produce all of the missing ranges
     */
    @Nonnull
    public AsyncIterable missingRanges(@Nonnull ReadTransaction tr, @Nullable byte[] begin, @Nullable byte[] end) {
        return missingRanges(tr, begin, end, Integer.MAX_VALUE);
    }

    /**
     * Returns all of the ranges that are missing within a given set of bounds as a list. See the four-parameter
     * version of {@link RangeSet#missingRanges(ReadTransaction, byte[], byte[], int) RangeSet.missingRanges}
     * for more details, but this will look for ranges that aren't already within the set. It will not
     * limit the number of results that it will return.
     *
     * @param tc transaction that will be used to access the database
     * @param begin the beginning (inclusive) of the range to look for gaps
     * @param end the end (inclusive) of the range to look for gaps
     * @param limit the maximum number of results to return
     * @return an iterable that will produce all of the missing ranges
     */
    @Nonnull
    public CompletableFuture> missingRanges(@Nonnull ReadTransactionContext tc, @Nullable byte[] begin, @Nullable byte[] end, int limit) {
        return tc.readAsync(tr -> {
            AsyncIterable ranges = missingRanges(tr, begin, end, limit);
            return ranges.asList();
        });
    }

    /**
     * Returns all of the ranges that are missing within a given set of bounds. In particular, this will look
     * for "gaps" in the key-value pairs between begin (inclusive) and end (exclusive) so that at the end, we
     * know what is missing. This takes in a read transaction (which could, theoretically, be a snapshot read
     * if we so desired). If this transaction is committed before the iterator is cancelled or completes,
     * this can cause problems.
     *
     * @param tr transaction that will be used to access the database
     * @param begin the beginning (inclusive) of the range to look for gaps
     * @param end the end (inclusive) of the range to look for gaps
     * @param limit the maximum number of results to return
     * @return an iterable that will produce all of the missing ranges
     */
    @Nonnull
    public AsyncIterable missingRanges(@Nonnull ReadTransaction tr, @Nullable byte[] begin, @Nullable byte[] end, int limit) {
        byte[] beginNonNull = (begin == null) ? FIRST_KEY : begin;
        byte[] endNonNull = (end == null) ? FINAL_KEY : end;
        checkKey(beginNonNull);
        checkRange(beginNonNull, endNonNull);

        // Return an AsyncIterable with the pertinent information.
        return new AsyncIterable() {
            @Override
            public AsyncIterator iterator() {
                return new MissingRangeIterator(tr, beginNonNull, endNonNull, limit);
            }

            @Override
            public CompletableFuture> asList() {
                return AsyncUtil.collect(this);
            }
        };
    }

    // Iterator that computes the missing ranges. It will go through and find gaps within the
    // range. It will stop after the limit has been acheived unless the limit is set
    // to UNLIMITED.
    private class MissingRangeIterator implements CloseableAsyncIterator {
        @Nonnull private final byte[] endNonNull;
        @Nonnull private AsyncIterator before;
        @Nonnull private AsyncIterator after;

        @Nonnull private byte[] currBegin;
        @Nullable private Range next;
        private boolean found;
        private int limit;
        private int numFound;
        private final Executor executor;
        @Nonnull private CompletableFuture nextFuture;

        public MissingRangeIterator(@Nonnull ReadTransaction tr, @Nonnull byte[] beginNonNull, @Nonnull byte[] endNonNull, int limit) {
            this.endNonNull = endNonNull;
            this.numFound = 0;
            this.limit = limit;

            byte[] frobnicatedBegin = subspace.pack(beginNonNull);
            byte[] frobnicatedEnd = subspace.pack(endNonNull);
            before = tr.getRange(subspace.range().begin, keyAfter(frobnicatedBegin), 1, true).iterator();
            after = tr.getRange(keyAfter(frobnicatedBegin), frobnicatedEnd).iterator();
            next = null;
            currBegin = beginNonNull;
            found = false;

            executor = tr.getExecutor();
            nextFuture = before.onHasNext().thenAccept(hasBefore -> {
                if (hasBefore) {
                    byte[] lastEnd = before.next().getValue(); //subspace.unpack(before.next().getValue()).getBytes(0);
                    if (ByteArrayUtil.compareUnsigned(beginNonNull, lastEnd) < 0) {
                        currBegin = lastEnd;
                    }
                }
            }).thenCompose(vignore -> getNext());
        }

        private CompletableFuture getNext() {
            return AsyncUtil.whileTrue(() -> {
                if (MoreAsyncUtil.isCompletedNormally(after.onHasNext())) {
                    if (after.hasNext()) {
                        KeyValue kv = after.next();
                        byte[] currBeg = currBegin;
                        byte[] nextBeg = subspace.unpack(kv.getKey()).getBytes(0);
                        if (ByteArrayUtil.compareUnsigned(currBeg, nextBeg) < 0) {
                            next = new Range(currBeg, nextBeg);
                            found = true;
                        }
                        currBegin = kv.getValue();
                        if (found) {
                            return AsyncUtil.READY_FALSE; // stop looping.
                        } else {
                            return after.onHasNext();
                        }
                    } else {
                        return AsyncUtil.READY_FALSE;
                    }
                } else {
                    return after.onHasNext();
                }
            }, executor).thenApply(vignore -> {
                if (found) {
                    numFound += 1;
                    return true;
                } else {
                    if (ByteArrayUtil.compareUnsigned(currBegin, endNonNull) < 0) {
                        next = new Range(currBegin, endNonNull);
                        currBegin = endNonNull;
                        return true;
                    } else {
                        close();
                        return false;
                    }
                }
            });
        }

        @Override
        public CompletableFuture onHasNext() {
            return nextFuture;
        }

        @Override
        public boolean hasNext() {
            return nextFuture.join();
        }

        @Override
        public Range next() {
            if (!hasNext()) {
                throw new NoSuchElementException("Attempted to get next missing range when none were present");
            }
            Range ret = next;
            found = false;
            if (limit == UNLIMITED || numFound < limit) {
                nextFuture = getNext();
            } else {
                close();
                nextFuture = AsyncUtil.READY_FALSE;
            }
            return ret;
        }

        @Override
        public void close() {
            MoreAsyncUtil.closeIterator(before);
            MoreAsyncUtil.closeIterator(after);
        }
    }

    /**
     * Clears the subspace used by this RangeSet instance. This will delete the records of any
     * data used by this set.
     * @param tc transaction or database in which to run operation
     * @return a future that is completed when the range has been cleared
     */
    @Nonnull
    public CompletableFuture clear(@Nonnull TransactionContext tc) {
        return tc.runAsync(tr -> {
            tr.clear(subspace.range());
            return AsyncUtil.DONE;
        });
    }

    @Nonnull
    public CompletableFuture rep(@Nonnull ReadTransactionContext tc) {
        return tc.readAsync(tr -> {
            StringBuilder sb = new StringBuilder();
            AsyncIterable iterable = tr.getRange(subspace.range());
            return iterable.asList().thenApply((List list) -> {
                for (KeyValue kv : list) {
                    byte[] key = subspace.unpack(kv.getKey()).getBytes(0);
                    byte[] value = kv.getValue();
                    sb.append(ByteArrayUtil.printable(key));
                    sb.append(" -> ");
                    sb.append(ByteArrayUtil.printable(value));
                    sb.append('\n');
                }
                return sb.toString();
            });
        });
    }

}