All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.documentapi.VisitorIterator Maven / Gradle / Ivy

There is a newer version: 8.441.21
Show newest version
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.documentapi;

import com.yahoo.document.BucketId;
import com.yahoo.document.BucketIdFactory;
import com.yahoo.document.select.BucketSelector;
import com.yahoo.document.select.parser.ParseException;
import java.util.logging.Level;

import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.logging.Logger;

/**
 * 

Enables transparent iteration of super/sub-buckets

* *

Thread safety: safe for threads to hold their own iterators (no shared state), * as long as they also hold the ProgressToken object associated with it. No two * VisitorIterator instances may share the same progress token instance at the * same time. * Concurrent access to a single VisitorIterator instance is not safe and must * be handled atomically by the caller.

* * @author vekterli */ public class VisitorIterator { private final ProgressToken progressToken; private final BucketSource bucketSource; private int distributionBitCount; private static final Logger log = Logger.getLogger(VisitorIterator.class.getName()); public static class BucketProgress { private BucketId superbucket; private BucketId progress; public BucketProgress(BucketId superbucket, BucketId progress) { this.superbucket = superbucket; this.progress = progress; } public BucketId getProgress() { return progress; } public BucketId getSuperbucket() { return superbucket; } } /** * Provides an abstract interface to VisitorIterator for * how pending buckets are acquired, decoupling this from the iteration * itself. * * Important: it is the responsibility of the {@link BucketSource} implementation * to ensure that progress information is honored for (partially) finished buckets. * From the point of view of the iterator itself, it should not have to deal with * filtering away already finished buckets, as this is a detail best left to * bucket sources. */ protected static interface BucketSource { public boolean hasNext(); public boolean shouldYield(); public boolean visitsAllBuckets(); public BucketProgress getNext(); public long getTotalBucketCount(); public int getDistributionBitCount(); public void setDistributionBitCount(int distributionBitCount, ProgressToken progress); public void update(BucketId superbucket, BucketId progress, ProgressToken token); } /** * Provides a bucket source that encompasses the entire range available * through a given value of distribution bits */ protected static class DistributionRangeBucketSource implements BucketSource { private boolean flushActive = false; private int distributionBitCount; private long totalBucketsSplit; private long totalBucketsMerged; private final int slices; private final int sliceId; // Wouldn't need this if this were a non-static class, but do it for // the sake of keeping things identical in Java and C++ private ProgressToken progressToken; public DistributionRangeBucketSource(int distributionBitCount, ProgressToken progress, int slices, int sliceId) { if (slices < 1) { throw new IllegalArgumentException("slices must be positive, but was " + slices); } if (sliceId < 0 || sliceId >= slices) { throw new IllegalArgumentException("sliceId must be in [0, " + slices + "), but was " + sliceId); } this.slices = slices; this.sliceId = sliceId; this.totalBucketsSplit = 0; this.totalBucketsMerged = 0; this.progressToken = progress; // New progress token (could also be empty, in which this is a // no-op anyway) if (progressToken.getTotalBucketCount() == 0) { assert(progressToken.isEmpty()) : "inconsistent progress state"; progressToken.setTotalBucketCount(1L << distributionBitCount); progressToken.setDistributionBitCount(distributionBitCount); progressToken.setBucketCursor(0); progressToken.setFinishedBucketCount(0); this.distributionBitCount = distributionBitCount; } else { this.distributionBitCount = progressToken.getDistributionBitCount(); // Quick consistency check to ensure the user isn't trying to eg. // pass a progress token for an explicit document selection if (progressToken.getTotalBucketCount() != (1L << progressToken.getDistributionBitCount())) { throw new IllegalArgumentException("Total bucket count in existing progress is not " + "consistent with that of the current document selection"); } } if (!progress.isFinished()) { if (log.isLoggable(Level.FINE)) { log.log(Level.FINE, "Importing unfinished progress token with " + "bits: " + progressToken.getDistributionBitCount() + ", active: " + progressToken.getActiveBucketCount() + ", pending: " + progressToken.getPendingBucketCount() + ", cursor: " + progressToken.getBucketCursor() + ", finished: " + progressToken.getFinishedBucketCount() + ", total: " + progressToken.getTotalBucketCount()); } if (!progress.isEmpty()) { // Lower all active to pending if (progressToken.getActiveBucketCount() > 0) { if (log.isLoggable(Level.FINE)) { log.log(Level.FINE, "Progress token had active buckets upon range " + "construction. Setting these as pending"); } progressToken.setAllBucketsToState(ProgressToken.BucketState.BUCKET_PENDING); } // Fixup for any buckets that were active when progress was written // but are now pending and with wrong dist bits (used-bits). Buckets // split here may very well be split/merged again if we set a new dist // bit count, but that is the desired process correctInconsistentPending(progressToken.getDistributionBitCount()); // Fixup for bucket cursor in case of bucket space downscaling correctTruncatedBucketCursor(); if (log.isLoggable(Level.FINE)) { log.log(Level.FINE, "Partial bucket space progress; continuing "+ "from position " + progressToken.getBucketCursor()); } } progressToken.setFinishedBucketCount(progressToken.getBucketCursor() - progressToken.getPendingBucketCount()); } else { assert(progressToken.getBucketCursor() == progressToken.getTotalBucketCount()); } // Should be all fixed up and good to go progressToken.setInconsistentState(false); skipToSlice(); } protected boolean isLosslessResetPossible() { // #pending must be equal to cursor, i.e. all buckets ever fetched // must be located in the set of pending if (progressToken.getPendingBucketCount() != progressToken.getBucketCursor()) { return false; } // Check if all pending buckets have a progress of 0 for (Map.Entry entry : progressToken.getBuckets().entrySet()) { if (entry.getValue().getState() != ProgressToken.BucketState.BUCKET_PENDING) { return false; } if (entry.getValue().getProgress().getId() != 0) { return false; } } return true; } /** * Ensure that a given ProgressToken instance only has * buckets pending that have a used-bits count of that of the * targetDistCits. This is done by splitting or merging * all inconsistent buckets until the desired state is reached. * * Time complexity is approx O(4bn) where b is the maximum * delta of bits to change anywhere in the set of pending and n * is the number of pending. This includes the time spent making shallow * map copies. * * @param targetDistBits The desired distribution bit count of the buckets */ private void correctInconsistentPending(int targetDistBits) { boolean maybeInconsistent = true; long bucketsSplit = 0, bucketsMerged = 0; long pendingBefore = progressToken.getPendingBucketCount(); ProgressToken p = progressToken; // Optimization: before doing any splitting/merging at all, we check // to see if we can't simply just reset the entire internal state // with the new distribution bit count. This ensures that if we go // from eg. 1 bit to 20 bits, we won't have to perform a grueling // half a million splits to cover the same bucket space as that 1 // single-bit bucket once did if (isLosslessResetPossible()) { if (log.isLoggable(Level.FINE)) { log.log(Level.FINE, "At start of bucket space and all " + "buckets have no progress; doing a lossless reset " + "instead of splitting/merging"); } assert(p.getActiveBucketCount() == 0); p.clearAllBuckets(); p.setBucketCursor(0); return; } while (maybeInconsistent) { BucketId lastMergedBucket = null; maybeInconsistent = false; // Make a shallow working copy of the bucket map. BucketKeyWrapper // keys are considered immutable, and should thus not be at risk // for being changed during the inner loop // Do separate passes for splitting and merging just to make // absolutely sure that the two ops won't step on each others' // toes. This isn't wildly efficient, but the data sets in question // are presumed to be low in size and this is presumed to be a very // infrequent operation TreeMap buckets = new TreeMap(p.getBuckets()); for (Map.Entry entry : buckets.entrySet()) { assert(entry.getValue().getState() == ProgressToken.BucketState.BUCKET_PENDING); BucketId pending = new BucketId(ProgressToken.keyToBucketId(entry.getKey().key())); if (pending.getUsedBits() < targetDistBits) { if (pending.getUsedBits() + 1 < targetDistBits) { maybeInconsistent = true; // Do another pass } p.splitPendingBucket(pending); ++bucketsSplit; } } // Make new map copy with potentially split buckets buckets = new TreeMap(p.getBuckets()); for (Map.Entry entry : buckets.entrySet()) { assert(entry.getValue().getState() == ProgressToken.BucketState.BUCKET_PENDING); BucketId pending = new BucketId(ProgressToken.keyToBucketId(entry.getKey().key())); if (pending.getUsedBits() > targetDistBits) { // If this is the right sibling of an already merged left sibling, // it's already been merged away, so we should skip it if (lastMergedBucket != null) { BucketId rightCheck = new BucketId(lastMergedBucket.getUsedBits(), lastMergedBucket.getId() | (1L << (lastMergedBucket.getUsedBits() - 1))); if (pending.equals(rightCheck)) { if (log.isLoggable(Level.FINEST)) { log.log(Level.FINEST, "Skipped " + pending + ", as it was right sibling of " + lastMergedBucket); } continue; } } if (pending.getUsedBits() - 1 > targetDistBits) { maybeInconsistent = true; // Do another pass } p.mergePendingBucket(pending); ++bucketsMerged; lastMergedBucket = pending; } } } if ((bucketsSplit > 0 || bucketsMerged > 0) && log.isLoggable(Level.FINE)) { log.log(Level.FINE, "Existing progress' pending buckets had inconsistent " + "distribution bits; performed " + bucketsSplit + " split ops and " + bucketsMerged + " merge ops. Pending: " + pendingBefore + " -> " + p.getPendingBucketCount()); } totalBucketsSplit += bucketsSplit; totalBucketsMerged += bucketsMerged; } private void correctTruncatedBucketCursor() { // We've truncated the bucket cursor, but in doing so we might // have moved back beyond where there are pending buckets. Consider // having a cursor value of 3 at 31 bits and then moving to 11 bits. // With 1 pending we'll normally reach a cursor of 0, even though it // should be 1 for (ProgressToken.BucketKeyWrapper bucketKey : progressToken.getBuckets().keySet()) { BucketId bid = bucketKey.toBucketId(); long idx = bucketKey.key() >>> (64 - bid.getUsedBits()); if (bid.getUsedBits() == distributionBitCount && idx >= progressToken.getBucketCursor()) { progressToken.setBucketCursor(idx + 1); } } if (log.isLoggable(Level.FINEST)) { log.log(Level.FINEST, "New range bucket cursor is " + progressToken.getBucketCursor()); } } public boolean hasNext() { // There is a next bucket iff. there is a bucket no earlier than the cursor which // is contained in the bucket space, and is also 0 modulo our sliceId; or if we're // not yet properly initialised, with a real distribution bit count, we ignore this. long nextBucket = progressToken.getBucketCursor(); if (distributionBitCount != 1) { nextBucket += Math.floorMod(sliceId - nextBucket, slices); } return nextBucket < (1L << distributionBitCount); } public boolean shouldYield() { // If we need to flush all active buckets, stall the iteration until // this has been done return flushActive; } public boolean visitsAllBuckets() { return true; } public long getTotalBucketCount() { return 1L << distributionBitCount; } public BucketProgress getNext() { assert(hasNext()) : "getNext() called with hasNext() == false"; // Create the progress to return for creating visitors, and advance bucket cursor. BucketProgress progress = new BucketProgress(progressToken.getCurrentBucketId(), new BucketId()); progressToken.setBucketCursor(progressToken.getBucketCursor() + 1); // Skip ahead to our next next slice, to ensure we also exhaust the bucket space when // hasNext() turns false, but there are still super buckets left after the current. skipToSlice(); return progress; } // Advances the wrapped progress token's bucket cursor to our next slice, marking any skipped // buckets as complete, but only if we've been initialised with a proper distribution bit count. private void skipToSlice() { if (distributionBitCount == 1) return; while (progressToken.getBucketCursor() < getTotalBucketCount() && (progressToken.getBucketCursor() % slices) != sliceId) { progressToken.skipCurrentBucket(); } } public int getDistributionBitCount() { return distributionBitCount; } public void setDistributionBitCount(int distributionBitCount, ProgressToken progress) { this.distributionBitCount = distributionBitCount; // There might be a case where we're waiting for active buckets // already when a new distribution bit change comes in. If so, // don't do anything at all yet with the set of pending if (progressToken.getActiveBucketCount() > 0) { flushActive = true; if (log.isLoggable(Level.FINE)) { log.log(Level.FINE, "Holding off new/pending buckets and consistency " + "correction until all " + progress.getActiveBucketCount() + " active buckets have been updated"); } progressToken.setInconsistentState(true); } else { // Only perform the actual distribution bit bucket ops if we've // got no pending buckets int delta = distributionBitCount - progressToken.getDistributionBitCount(); // Must do this before setting the bucket cursor to allow // reset-checking to be performed correctInconsistentPending(distributionBitCount); if (delta > 0) { if (log.isLoggable(Level.FINE)) { log.log(Level.FINE, "Increasing distribution bits for full bucket " + "space range source from " + progressToken.getDistributionBitCount() + " to " + distributionBitCount); } progressToken.setFinishedBucketCount(progressToken.getFinishedBucketCount() << delta); // By n-doubling the position, the bucket key ordering ensures // we go from eg. 3:0x02 to 4:0x02 to 5:02 etc. progressToken.setBucketCursor(progressToken.getBucketCursor() << delta); } else if (delta < 0) { if (log.isLoggable(Level.FINE)) { log.log(Level.FINE, "Decreasing distribution bits for full bucket " + "space range source from " + progressToken.getDistributionBitCount() + " to " + distributionBitCount + " bits"); } // Scale down bucket space and cursor progressToken.setBucketCursor(progressToken.getBucketCursor() >>> -delta); progressToken.setFinishedBucketCount(progressToken.getFinishedBucketCount() >>> -delta); } progressToken.setTotalBucketCount(1L << distributionBitCount); progressToken.setDistributionBitCount(distributionBitCount); correctTruncatedBucketCursor(); progressToken.setInconsistentState(false); skipToSlice(); } } public void update(BucketId superbucket, BucketId progress, ProgressToken token) { progressToken.updateProgress(superbucket, progress); if (superbucket.getUsedBits() != distributionBitCount) { if (!progress.equals(ProgressToken.FINISHED_BUCKET)) { // We should now always flush active buckets before doing a // consistency fix. This simplifies things greatly assert(flushActive); if (log.isLoggable(Level.FINE)) { log.log(Level.FINE, "Received non-finished bucket " + superbucket + " with wrong distribution bit count (" + superbucket.getUsedBits() + "). Waiting to correct " + "until all active are done"); } } else { if (log.isLoggable(Level.FINE)) { log.log(Level.FINE, "Received finished bucket " + superbucket + " with wrong distribution bit count (" + superbucket.getUsedBits() + "). Waiting to correct " + "until all active are done"); } } } if (progressToken.getActiveBucketCount() == 0) { if (flushActive) { if (log.isLoggable(Level.FINE)) { log.log(Level.FINE, "All active buckets flushed, " + "correcting progress token and continuing normal operation"); } // Trigger the actual bucket state change this time setDistributionBitCount(distributionBitCount, progressToken); assert(progressToken.getDistributionBitCount() == distributionBitCount); } flushActive = false; // Update #finished since we might have had inconsistent active // buckets that have prevent us from getting a correct value. At // this point, however, all pending buckets should presumably be // at the same, correct dist bit count, so we can safely compute // a new count // TODO: ensure this is consistent if (progressToken.getPendingBucketCount() <= progressToken.getBucketCursor()) { progressToken.setFinishedBucketCount(progressToken.getBucketCursor() - progressToken.getPendingBucketCount()); } } } } /** * Provides an explicit set of bucket IDs to iterate over. Will immediately * set these as pending in the {@link ProgressToken}, as it is presumed this set is * rather small. Changing the distribution bit count for this source is * effectively a no-op, as explicit bucket IDs should not be implicitly * changed. */ protected static class ExplicitBucketSource implements BucketSource { private int distributionBitCount; private long totalBucketCount = 0; public ExplicitBucketSource(Set superbuckets, int distributionBitCount, ProgressToken progress) { this.distributionBitCount = progress.getDistributionBitCount(); this.totalBucketCount = superbuckets.size(); // New progress token? if (progress.getTotalBucketCount() == 0) { progress.setTotalBucketCount(this.totalBucketCount); progress.setDistributionBitCount(distributionBitCount); this.distributionBitCount = distributionBitCount; } else { // Quick consistency check to ensure the user isn't trying to eg. // pass a progress token for another document selection if (progress.getTotalBucketCount() != totalBucketCount || (progress.getFinishedBucketCount() + progress.getPendingBucketCount() + progress.getActiveBucketCount() != totalBucketCount)) { throw new IllegalArgumentException("Total bucket count in existing progress is not " + "consistent with that of the current document selection"); } if (progress.getBucketCursor() != 0) { // Trying to use a range source progress file throw new IllegalArgumentException("Cannot use given progress file with the "+ "current document selection"); } this.distributionBitCount = progress.getDistributionBitCount(); } if (progress.isFinished() || !progress.isEmpty()) return; for (BucketId id : superbuckets) { // Add all superbuckets with zero sub-bucket progress and pending progress.addBucket(id, new BucketId(), ProgressToken.BucketState.BUCKET_PENDING); } } public boolean hasNext() { return false; } public boolean shouldYield() { return false; } public boolean visitsAllBuckets() { return false; } public long getTotalBucketCount() { return totalBucketCount; } // All explicit buckets should have been placed in the progress // token during construction, so this method should never be called public BucketProgress getNext() { throw new IllegalStateException("getNext() called on ExplicitBucketSource"); } public int getDistributionBitCount() { return distributionBitCount; } public void setDistributionBitCount(int distributionBitCount, ProgressToken progress) { // Setting distribution bits for explicit bucket source is essentially // a no-op, since its buckets already are fixed at 32 used bits. progress.setDistributionBitCount(distributionBitCount); this.distributionBitCount = distributionBitCount; if (log.isLoggable(Level.FINE)) { log.log(Level.FINE, "Set distribution bit count to " + distributionBitCount + " for explicit bucket source (no-op)"); } } public void update(BucketId superbucket, BucketId progress, ProgressToken token) { // Simply delegate to ProgressToken, as it maintains all progress state token.updateProgress(superbucket, progress); } } /** * @param bucketSource An instance of {@link BucketSource}, providing the working set for * the iterator * @param progressToken A {@link ProgressToken} instance, allowing the progress of * finished or partially finished buckets to be tracked * * @see BucketSource * @see ProgressToken */ private VisitorIterator(ProgressToken progressToken, BucketSource bucketSource) { assert(progressToken.getDistributionBitCount() == bucketSource.getDistributionBitCount()) : "inconsistent distribution bit counts"; this.distributionBitCount = progressToken.getDistributionBitCount(); this.progressToken = progressToken; this.bucketSource = bucketSource; } /** * @return The pair [superbucket, progress] that specifies the next iterable * bucket. When a superbucket is initially returned, the pair is equal to * that of [superbucket, 0], as there has been no progress into its sub-buckets * yet (if they exist). * * Precondition: hasNext() == true */ public BucketProgress getNext() { assert(progressToken.getDistributionBitCount() == bucketSource.getDistributionBitCount()) : "inconsistent distribution bit counts for progress and source"; assert(hasNext()); // We prioritize returning buckets in the pending map over those // that may be in the bucket source, since we want to avoid growing // the map too much if (progressToken.hasPending()) { // Find first pending bucket in token TreeMap buckets = progressToken.getBuckets(); ProgressToken.BucketEntry pending = null; BucketId superbucket = null; for (Map.Entry entry : buckets.entrySet()) { if (entry.getValue().getState() == ProgressToken.BucketState.BUCKET_PENDING) { pending = entry.getValue(); superbucket = new BucketId(ProgressToken.keyToBucketId(entry.getKey().key())); break; } } assert(pending != null) : "getNext() called with inconsistent state"; // Set bucket to active, since it's not awaiting an update pending.setState(ProgressToken.BucketState.BUCKET_ACTIVE); progressToken.setActiveBucketCount(progressToken.getActiveBucketCount() + 1); progressToken.setPendingBucketCount(progressToken.getPendingBucketCount() - 1); return new BucketProgress(superbucket, pending.getProgress()); } else { BucketProgress ret = bucketSource.getNext(); progressToken.addBucket(ret.getSuperbucket(), ret.getProgress(), ProgressToken.BucketState.BUCKET_ACTIVE); return ret; } } /** *

Check whether or not it is valid to call {@link #getNext()} with the current * iterator state.

* *

There exists a case wherein hasNext may return false before {@link #update} is * called, but true afterwards. This happens when the set of pending buckets is * empty, the bucket source is empty but the set of active buckets is * not. A future progress update on any of the buckets in the active set may * or may not make that bucket available to the pending set again. * This must be handled explicitly by the caller by checking {@link #isDone()} * and ensuring that {@link #update} is called before retrying hasNext.

* *

This method will also return false if the number of distribution bits have * changed and there are active buckets needing to be flushed before the * iterator will allow new buckets to be handed out.

* * @return Whether or not it is valid to call {@link #getNext()} with the current * iterator state. */ public boolean hasNext() { return (progressToken.hasPending() || bucketSource.hasNext()) && !bucketSource.shouldYield(); } /** * Check if the iterator is actually done * * @see #hasNext() * * @return true iff the bucket source is empty and * there are no pending or active buckets in the progress token. */ public boolean isDone() { return !(hasNext() || progressToken.hasActive()); } /** *

Tell the iterator that we've finished processing up to and * including progress. progress may be a sub-bucket or * the invalid 0-bucket (in case the caller fails to process the bucket and * must return it to the set of pending) or the special case BucketId(Integer.MAX_VALUE), * the latter indicating to the iterator that traversal is complete for * superbucket's tree. The null bucket should only be used if no * non-null updates have yet been given for the superbucket.

* *

It is a requirement that each superbucket returned by {@link #getNext()} must * eventually result in 1-n update operations, where the last update operation * has the special progress==super case.

* *

If the document selection used to create the iterator is unknown and there * were active buckets at the time of a distribution bit state change, such * a bucket passed to update() will be in an inconsistent state * with regards to the number of bits it uses. For unfinished buckets, this * is handled by splitting or merging it until it's consistent, depending on * whether or not it had a lower or higher distribution bit count than that of * the current system state. For finished buckets of a lower dist bit count, * the amount of finished buckets in the ProgressToken is adjusted upwards * to compensate for the fact that a bucket using fewer distribution bits * actually covers more of the bucket space than the ones that are currently * in use. For finished buckets of a higher dist bit count, the number of * finished buckets is not increased at that point in time, since * such a bucket doesn't actually cover an entire bucket with the current state.

* *

All this is done automatically and transparently to the caller once all * active buckets have been updated.

* * @param superbucket A valid bucket ID that has been retrieved earlier through * {@link #getNext()} * @param progress A bucket logically contained within super. Subsequent * updates for the same superbucket must have progress be in an increasing * order, where order is defined as the in-order traversal of the bucket split * tree. May also be the null bucket if the superbucket has not seen any "proper" * progress updates yet or the special case Integer.MAX_VALUE. Note that inconsistent * splitting might actually see progress as containing super * rather than vice versa, so this is explicitly allowed to pass by the code. */ public void update(BucketId superbucket, BucketId progress) { // Delegate to bucket source, as it knows how to deal with buckets // that are in an inconsistent state wrt distribution bit count bucketSource.update(superbucket, progress, progressToken); } /** * @return The total number of iterable buckets that remain to be processed * * Note: currently includes all non-finished (i.e. active and pending * buckets) as well */ public long getRemainingBucketCount() { return progressToken.getTotalBucketCount() - progressToken.getFinishedBucketCount(); } /** * @return Internal bucket source instance. Do NOT modify! */ protected BucketSource getBucketSource() { return bucketSource; } public ProgressToken getProgressToken() { return progressToken; } public int getDistributionBitCount() { return distributionBitCount; } /** *

Set the distribution bit count for the iterator and the buckets it * currently maintains and will return in the future.

* *

For document selections that result in a explicit set of buckets, this * is essentially a no-op, so in such a case, disregard the rest of this text.

* *

Changing the number of distribution bits for an unknown document * selection will effectively scale the bucket space that will be visited; * each bit increase or decrease doubling or halving its size, respectively. * When increasing, any pending buckets will be split to ensure the total * bucket space covered remains the same. Correspondingly, when decreasing, * any pending buckets will be merged appropriately.

* *

If there are buckets active at the time of the change, the actual * bucket splitting/merging operations are kept on hold until all active * buckets have been updated, at which point they will be automatically * performed. The iterator will force such an update by not giving out * any new or pending buckets until that happens.

* *

Note: when decreasing the number of distribution bits, * there is a chance of losing superbucket progress in a bucket that * is merged with another bucket, leading to potential duplicate * results.

* * @param distBits New system state distribution bit count */ public void setDistributionBitCount(int distBits) { if (distributionBitCount != distBits) { bucketSource.setDistributionBitCount(distBits, progressToken); distributionBitCount = distBits; if (log.isLoggable(Level.FINE)) { log.log(Level.FINE, "Set visitor iterator distribution bit count to " + distBits); } } } public boolean visitsAllBuckets() { return bucketSource.visitsAllBuckets(); } public static VisitorIterator createFromDocumentSelection( String documentSelection, BucketIdFactory idFactory, int distributionBitCount, ProgressToken progress) throws ParseException { return createFromDocumentSelection(documentSelection, idFactory, distributionBitCount, progress, 1, 0); } /** * Create a new VisitorIterator instance based on the given document * selection string. * * @param documentSelection Document selection string used to create the * VisitorIterator instance. Depending on the characteristics of the * selection, the iterator may iterate over only a small subset of the buckets or * every bucket in the system. Both cases will be handled efficiently. * @param idFactory {@link BucketId} factory specifying the number of distribution bits * to use et al. * @param progress A unique {@link ProgressToken} instance which is used for maintaining the state * of the iterator. Can not be shared with other iterator instances at the same time. * If progress contains work done in an earlier iteration run, the iterator will pick * up from where it left off * @return A new VisitorIterator instance * @throws ParseException if documentSelection fails to properly parse */ public static VisitorIterator createFromDocumentSelection( String documentSelection, BucketIdFactory idFactory, int distributionBitCount, ProgressToken progress, int slices, int sliceId) throws ParseException { BucketSelector bucketSel = new BucketSelector(idFactory); Set rawBuckets = bucketSel.getBucketList(documentSelection); BucketSource src; // Depending on whether the expression yielded an unknown number of // buckets, we create either an explicit bucket source or a distribution // bit-based range source if (rawBuckets == null) { // Range source src = new DistributionRangeBucketSource(distributionBitCount, progress, slices, sliceId); } else { // Explicit source src = new ExplicitBucketSource(rawBuckets, distributionBitCount, progress); } return new VisitorIterator(progress, src); } /** * Create a new VisitorIterator instance based on the given * set of buckets. This is supported for internal use only, and is required * by Synchronization. Use {@link #createFromDocumentSelection} instead for * all normal purposes. * * @param bucketsToVisit The set of buckets that will be visited * @param distributionBitCount Number of distribution bits to use * @param progress A unique ProgressToken instance which is used for maintaining the state * of the iterator. Can not be shared with other iterator instances at the same time. * If progress contains work done in an earlier iteration run, the iterator will pick * up from where it left off * @return A new VisitorIterator instance */ public static VisitorIterator createFromExplicitBucketSet( Set bucketsToVisit, int distributionBitCount, ProgressToken progress) { // For obvious reasons, always create an explicit source here BucketSource src = new ExplicitBucketSource(bucketsToVisit, distributionBitCount, progress); return new VisitorIterator(progress, src); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy