it.unimi.dsi.fastutil.shorts.ShortBigSpliterators Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of fastutil Show documentation
fastutil extends the Java Collections Framework by providing type-specific maps, sets, lists, and queues with a small memory footprint and fast access and insertion; it provides also big (64-bit) arrays, sets and lists, sorting algorithms, fast, practical I/O classes for binary and text files, and facilities for memory mapping large files. Note that if you have both this jar and fastutil-core.jar in your dependencies, fastutil-core.jar should be excluded.
There is a newer version: 8.5.13
Show newest version
/*
	* Copyright (C) 2019-2023 Sebastiano Vigna
	*
	* Licensed under the Apache License, Version 2.0 (the "License");
	* you may not use this file except in compliance with the License.
	* You may obtain a copy of the License at
	*
	*     http://www.apache.org/licenses/LICENSE-2.0
	*
	* Unless required by applicable law or agreed to in writing, software
	* distributed under the License is distributed on an "AS IS" BASIS,
	* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	* See the License for the specific language governing permissions and
	* limitations under the License.
	*/
package it.unimi.dsi.fastutil.shorts;

/**
 * A class providing static methods and objects that do useful things with type-specific
 * spliterators on big (potentially greater then {@link Integer#MAX_VALUE} items long).
 *
 * Since the {@link java.util.Spliterator} interface already natively works in long indexes, most of
 * the utility methods reside in the regular {@code Spliterators} class.
 *
 * @author C. Sean Young <[email protected]>
 *
 * @since 8.5.0
 */
public final class ShortBigSpliterators {
	/**
	 * A skeletal implementation for a spliterator backed by an index based data store. High performance
	 * concrete implementations (like the main Spliterator of BigArrayBigList) generally should avoid
	 * using this and just implement the interface directly, but should be decent for less performance
	 * critical implementations.
	 *
	 * 
	 * As the abstract methods in this class are used in inner loops, it is generally a good idea to
	 * override the class as {@code final} as to encourage the JVM to inline them (or alternatively,
	 * override the abstract methods as final).
	 */
	public static abstract class AbstractIndexBasedSpliterator extends AbstractShortSpliterator {
		/**
		 * The current position index, the index of the item to be given after the next call to
		 * {@link #tryAdvance}.
		 *
		 * 

		 * This value will be between {@code minPos} and {@link #getMaxPos()} (exclusive) (on a best effort,
		 * so concurrent structural modifications may cause this to be violated, but that usually
		 * invalidates spliterators anyways). Thus {@code pos} being {@code minPos + 2} would mean
		 * {@link #tryAdvance} was called twice and the next call will give the third element of this
		 * spliterator.
		 */
		protected long pos;

		protected AbstractIndexBasedSpliterator(long initialPos) {
			this.pos = initialPos;
		}

		// When you implement these, you should probably declare them final to encourage the JVM to inline
		// them.
		/**
		 * Get the item corresponding to the given index location.
		 *
		 * 

		 * Do not advance {@link #pos} in this method; the default {@link #tryAdvance} and
		 * {@link #forEachRemaining} methods takes care of this.
		 *
		 * 

		 * The {@code location} given will be between {@code minPos} and {@link #getMaxPos()} (exclusive).
		 * Thus, a {@code location} of {@code minPos + 2} would mean {@link #tryAdvance} was called twice
		 * and this method should return what the next call to {@link #tryAdvance()} should give.
		 */
		protected abstract short get(long location);

		/**
		 * The maximum pos can be, and is the logical end (exclusive) of the "range".
		 *
		 * 

		 * If pos is equal to the return of this method, this means the last element has been returned and
		 * the next call to {@link #tryAdvance} will return {@code false}.
		 *
		 * 

		 * Usually set return the parent {@linkplain java.util.Collection#size() collection's size}, but
		 * does not have to be (for example, sublists and subranges).
		 *
		 * 

		 * This method allows the implementation to decide how it binds on the size (late or early).
		 * However, {@link EarlyBindingSizeIndexBasedSpliterator} and
		 * {@link LateBindingSizeIndexBasedSpliterator} give an implementation of this method for the two
		 * most common strategies.
		 */
		protected abstract long getMaxPos();

		/**
		 * Make a new spliterator to {@link #trySplit()} starting with the given {@code pos} and ending at
		 * the given {@code maxPos}.
		 *
		 * 

		 * An implementation is free to look at the range given, and if it deems it too small to split
		 * further, return {@code null}. In which case, {@link #trySplit()} will not modify the state of
		 * this spliterator.
		 *
		 * 

		 * Do not modify {@link #pos} in this method; the default {@link #trySplit()} method takes
		 * care of this.
		 *
		 * 

		 * To comply with the spec of {@link java.util.Spliterator#ORDERED}, this will only be called to
		 * create prefixes of the current sequence this spliterator is over, and this instance will start at
		 * the end of the returned sequence and have the same end point. As such, this method should also
		 * not change what {@link #getMaxPos()} returns.
		 */
		protected abstract ShortSpliterator makeForSplit(long pos, long maxPos);

		/**
		 * Compute where to split on the next {@link #trySplit()}, given the current pos and
		 * {@link #getMaxPos()} (or any other metric the implementation wishes to use).
		 *
		 * 

		 * If a value {@code == pos} or {@code == getMaxPos()} is returned, the {@link #trySplit()} method
		 * will assume a split of size 0 was computed, and thus won't split or change state. If a value
		 * outside that range is returned, then {@link #trySplit()} will throw
		 * {@link IndexOutOfBoundsException}. In particular, this means that no handling of overflow or
		 * underflow is performed.
		 *
		 * @apiNote The reasoning behind the throwing if out of range behavior is that, even though it can
		 *          significantly slow the process of splitting, it is much better then risking a buggy
		 *          implementation causing splits to stop happening much earlier then intended. Also,
		 *          splitting is not usually in the "inner loop" of stream operations, so this slowness
		 *          isn't in the bottleneck. That and we have already warned that high performance
		 *          spliterators should prefer implementing all the methods themselves instead of through
		 *          this interface.
		 *
		 * @implSpec This default implementation is a simple split-by-2 strategy, dividing in the middle of
		 *           pos and {@link #getMaxPos()}. It is unspecified whether the first range or the second
		 *           range will be larger in the case of an odd length range.
		 */
		protected long computeSplitPoint() {
			// Overflow safe midpoint computation.
			return pos + ((getMaxPos() - pos) / 2);
		}

		private void splitPointCheck(final long splitPoint, final long observedMax) {
			// TODO When minimum Java version becomes Java 9, use Objects.checkFromToIndex (after first letting
			// == max case pass through)
			if (splitPoint < pos || splitPoint > observedMax) {
				throw new IndexOutOfBoundsException("splitPoint " + splitPoint + " outside of range of current position " + pos + " and range end " + observedMax);
			}
		}

		// Since this is an index based spliterator, list characteristics make sense.
		@Override
		public int characteristics() {
			return ShortSpliterators.LIST_SPLITERATOR_CHARACTERISTICS;
		}

		@Override
		public long estimateSize() {
			return getMaxPos() - pos;
		}

		@Override
		public boolean tryAdvance(final ShortConsumer action) {
			if (pos >= getMaxPos()) return false;
			action.accept(get(pos++));
			return true;
		}

		@Override
		public void forEachRemaining(final ShortConsumer action) {
			for (final long max = getMaxPos(); pos < max; ++pos) {
				action.accept(get(pos));
			}
		}

		@Override
		public long skip(long n) {
			if (n < 0) throw new IllegalArgumentException("Argument must be nonnegative: " + n);
			final long max = getMaxPos();
			if (pos >= max) return 0;
			final long remaining = max - pos;
			if (n < remaining) {
				pos += n;
				return n;
			}
			n = remaining;
			pos = max;
			return n;
		}

		/**
		 * {@inheritDoc}
		 *
		 * @implSpec This implementation always returns a prefix of the elements, in order to comply with
		 *           the {@link java.util.Spliterator#ORDERED} property. This means this current iterator
		 *           does not need to to update what {@link #getMaxPos()} returns in response to this method
		 *           (but it may do "book-keeping" on it based on binding strategy).
		 *
		 *           

		 *           The split point is computed by {@link #computeSplitPoint()}; see that method for
		 *           details.
		 *
		 * @throws IndexOutOfBoundsException if the return of {@link #computeSplitPoint()} was {@code < pos}
		 *             or {@code > {@link #getMaxPos()}}.
		 */
		@Override
		public ShortSpliterator trySplit() {
			final long max = getMaxPos();
			final long splitPoint = computeSplitPoint();
			if (splitPoint == pos || splitPoint == max) return null;
			splitPointCheck(splitPoint, max);
			long oldPos = pos;
			ShortSpliterator maybeSplit = makeForSplit(oldPos, splitPoint);
			if (maybeSplit != null) this.pos = splitPoint;
			return maybeSplit;
		}
	}

	/**
	 * A skeletal implementation for a spliterator backed by an index based data store. High performance
	 * concrete implementations (like the main Spliterator of ArrayList) generally should avoid using
	 * this and just implement the interface directly, but should be decent for less performance
	 * critical implementations.
	 *
	 * 

	 * This class implements an early binding strategy for {@link #getMaxPos()}. The last index this
	 * spliterator covers is fixed at construction time and does not vary on changes to the backing data
	 * store. This should usually be the {@linkplain java.util.Collection#size() size} of the backing
	 * data store (until a split at least), hence the class' name, but this is not required.
	 *
	 * 

	 * As the abstract methods in this class are used in inner loops, it is generally a good idea to
	 * override the class as {@code final} as to encourage the JVM to inline them (or alternatively,
	 * override the abstract methods as final).
	 */
	public static abstract class EarlyBindingSizeIndexBasedSpliterator extends AbstractIndexBasedSpliterator {
		/** The maximum {@link #pos} can be */
		protected final long maxPos;

		protected EarlyBindingSizeIndexBasedSpliterator(long initialPos, long maxPos) {
			super(initialPos);
			this.maxPos = maxPos;
		}

		@Override
		protected final long getMaxPos() {
			return maxPos;
		}
	}

	/**
	 * A skeletal implementation for a spliterator backed by an index based data store. High performance
	 * concrete implementations (like the main Spliterator of ArrayList) generally should avoid using
	 * this and just implement the interface directly, but should be decent for less performance
	 * critical implementations.
	 *
	 * 

	 * This class implements a late binding strategy. On a new, non-split instance, the
	 * {@link #getMaxPos() max pos} will track the given data store (usually it's
	 * {@linkplain java.util.Collection#size() size}, hence the class' name). On the first
	 * {@linkplain #trySplit() split}, the last index will be read from the backing data store one last
	 * time and then be fixed for the remaining duration of this instance.

	 * The returned split should should also be have a constant {@code maxPos}.
	 *
	 * 

	 * As the abstract methods in this class are used in inner loops, it is generally a good idea to
	 * override the class as {@code final} as to encourage the JVM to inline them (or alternatively,
	 * override the abstract methods as final).
	 */
	public static abstract class LateBindingSizeIndexBasedSpliterator extends AbstractIndexBasedSpliterator {
		/** The maximum {@link #pos} can be, or -1 if it hasn't been fixed yet. */
		protected long maxPos = -1;
		private boolean maxPosFixed;

		protected LateBindingSizeIndexBasedSpliterator(long initialPos) {
			super(initialPos);
			this.maxPosFixed = false;
		}

		protected LateBindingSizeIndexBasedSpliterator(long initialPos, long fixedMaxPos) {
			super(initialPos);
			this.maxPos = fixedMaxPos;
			this.maxPosFixed = true;
		}

		/**
		 * Return the maximum pos can be dynamically tracking the backing data store.
		 *
		 * 
		 * This method will be the return value of {@link #getMaxPos()} until this spliterator is
		 * {@linkplain #trySplit()} split, in which case its final return value will be saved and remain
		 * constant for the rest of the duration of this instance.
		 */
		protected abstract long getMaxPosFromBackingStore();

		@Override
		protected final long getMaxPos() {
			return maxPosFixed ? maxPos : getMaxPosFromBackingStore();
		}

		@Override
		public ShortSpliterator trySplit() {
			ShortSpliterator maybeSplit = super.trySplit();
			if (!maxPosFixed && maybeSplit != null) {
				maxPos = getMaxPosFromBackingStore();
				maxPosFixed = true;
			}
			return maybeSplit;
		}
	}
}