All Downloads are FREE. Search and download functionalities are using the official Maven repository.

drv.BigArraysCommon.drv Maven / Gradle / Ivy

Go to download

fastutil extends the Java Collections Framework by providing type-specific maps, sets, lists, and queues with a small memory footprint and fast operations; it provides also big (64-bit) arrays, sets, and lists, sorting algorithms, fast, practical I/O classes for binary and text files, and facilities for memory mapping large files. This jar (fastutil-core.jar) contains data structures based on integers, longs, doubles, and objects, only; fastutil.jar contains all classes. If you have both jars in your dependencies, this jar should be excluded.

The newest version!
/*
 * Copyright (C) 2010-2022 Sebastiano Vigna
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * For the sorting code:
 *
 * Copyright (C) 1999 CERN - European Organization for Nuclear Research.
 *
 *   Permission to use, copy, modify, distribute and sell this software and
 *   its documentation for any purpose is hereby granted without fee,
 *   provided that the above copyright notice appear in all copies and that
 *   both that copyright notice and this permission notice appear in
 *   supporting documentation. CERN makes no representations about the
 *   suitability of this software for any purpose. It is provided "as is"
 *   without expressed or implied warranty.
 */

package it.unimi.dsi.fastutil;

import it.unimi.dsi.fastutil.ints.IntBigArrayBigList;
import it.unimi.dsi.fastutil.longs.LongComparator;
import it.unimi.dsi.fastutil.bytes.ByteBigArrays;
import it.unimi.dsi.fastutil.booleans.BooleanBigArrays;
import it.unimi.dsi.fastutil.chars.CharBigArrays;
import it.unimi.dsi.fastutil.shorts.ShortBigArrays;
import it.unimi.dsi.fastutil.ints.IntBigArrays;
import it.unimi.dsi.fastutil.longs.LongBigArrays;
import it.unimi.dsi.fastutil.floats.FloatBigArrays;
import it.unimi.dsi.fastutil.doubles.DoubleBigArrays;
import it.unimi.dsi.fastutil.objects.ObjectBigArrays;
import it.unimi.dsi.fastutil.booleans.BooleanArrays;
import it.unimi.dsi.fastutil.bytes.ByteArrays;
import it.unimi.dsi.fastutil.chars.CharArrays;
import it.unimi.dsi.fastutil.shorts.ShortArrays;
import it.unimi.dsi.fastutil.ints.IntArrays;
import it.unimi.dsi.fastutil.longs.LongArrays;
import it.unimi.dsi.fastutil.floats.FloatArrays;
import it.unimi.dsi.fastutil.doubles.DoubleArrays;
import it.unimi.dsi.fastutil.objects.ObjectArrays;
import java.util.concurrent.atomic.AtomicIntegerArray;
import java.util.concurrent.atomic.AtomicLongArray;

import java.util.Random;

/**
 * A class providing static methods and objects that do useful things with big
 * arrays.
 *
 * 

Introducing big arrays

* *

* A big array is an array-of-arrays representation of an array. The * length of a big array is bounded by {@link #SEGMENT_SIZE} * * {@link Integer#MAX_VALUE} = {@value #SEGMENT_SIZE} * (231 − * 1) rather than {@link Integer#MAX_VALUE}. The type of a big array is that of * an array-of-arrays, so a big array of integers is of type * {@code int[][]}. Note that {@link #SEGMENT_SIZE} has been chosen so that * a single segment is smaller than 231 bytes independently of the * data type. It might be enlarged in the future. * *

* If {@code a} is a big array, {@code a[0]}, {@code a[1]}, * … are called the segments of the big array. All segments, * except possibly for the last one, are of length {@link #SEGMENT_SIZE}. Given * an index {@code i} into a big array, there is an associated * {@linkplain #segment(long) segment} and an associated * {@linkplain #displacement(long) * displacement} into that segment. Access to single members happens by * means of accessors (see, e.g., {@link #get(int[][], long)} and * {@link #set(int[][], long, int)}), but you can also use the * methods {@link #segment(long)}/{@link #displacement(long)} to access entries * manually. * *

The intended usage of most of the methods of this class is that they * will be imported statically: for example, *

 * import static it.unimi.dsi.fastutil.BigArrays.copy;
 * import static it.unimi.dsi.fastutil.BigArrays.get;
 * import static it.unimi.dsi.fastutil.BigArrays.length;
 * import static it.unimi.dsi.fastutil.BigArrays.set;
 * 
* *

Dynamic binding will take care of selecting the right method depending * on the array type. * *

Scanning big arrays

* *

* You can scan a big array using the following idiomatic form: * *

 * for(int s = 0; s < a.length; s++) {
 *     final int[] t = a[s];
 *     final int l = t.length;
 *     for(int d = 0; d < l; d++) {
 *          do something with t[d]
 *     }
 * }
 * 
* * or using the simpler reversed version: * *
 * for(int s = a.length; s-- != 0;) {
 *     final int[] t = a[s];
 *     for(int d = t.length; d-- != 0;) {
 *         do something with t[d]
 *     }
 * }
 * 
*

* Inside the inner loop, the original index in {@code a} can be retrieved * using {@link #index(int, int) index(segment, displacement)}. You can also * use an additional long to keep track of the index. * *

* Note that caching is essential in making these loops essentially as fast as * those scanning standard arrays (as iterations of the outer loop happen very * rarely). Using loops of this kind is extremely faster than using a standard * loop and accessors. * *

* In some situations, you might want to iterate over a part of a big array * having an offset and a length. In this case, the idiomatic loops are as * follows: * *

 * for(int s = segment(offset); s < segment(offset + length + SEGMENT_MASK); s++) {
 *     final int[] t = a[s];
 *     final int l = (int)Math.min(t.length, offset + length - start(s));
 *     for(int d = (int)Math.max(0, offset - start(s)); d < l; d++) {
 *         do something with t[d]
 *     }
 * }
 * 
* * or, in a reversed form, * *
 * for(int s = segment(offset + length + SEGMENT_MASK); s-- != segment(offset);) {
 *     final int[] t = a[s];
 *     final int b = (int)Math.max(0, offset - start(s));
 *     for(int d = (int)Math.min(t.length, offset + length - start(s)); d-- != b ;) {
 *         do something with t[d]
 *     }
 * }
 * 
* *

Literal big arrays

* *

* A literal big array can be easily created by using the suitable type-specific * {@code wrap()} method (e.g., {@link BigArrays#wrap(int[])}) around a * standard Java literal array. * *

Atomic big arrays

* *

Limited support is available for atomic big arrays of integers and longs, with a similar syntax. Atomic big arrays are * arrays of instances of {@link java.util.concurrent.atomic.AtomicIntegerArray} or * {@link java.util.concurrent.atomic.AtomicLongArray} of length {@link #SEGMENT_SIZE} (or less, for * the last segment, as usual) and their size cannot be changed. Some methods from those classes are * available in {@link BigArrays} for atomic big arrays (e.g., * {@link BigArrays#incrementAndGet(AtomicIntegerArray[], long)}). * *

Big alternatives

* *

* If you find the kind of “bare hands” approach to big arrays not * enough object-oriented, please use big lists based on big arrays (e.g., * {@link IntBigArrayBigList}). Big arrays follow the Java tradition of * considering arrays as a “legal alien”—something in-between * an object and a primitive type. This approach lacks the consistency of a full * object-oriented approach, but provides some significant performance gains. * *

Additional methods

* *

In particular, the {@code ensureCapacity()}, {@code grow()}, * {@code trim()} and {@code setLength()} methods allow to handle * arrays much like array lists. * *

* In addition to commodity methods, this class contains {@link BigSwapper}-based * implementations of * {@linkplain #quickSort(long, long, LongComparator, BigSwapper) quicksort} and * of a stable, in-place * {@linkplain #mergeSort(long, long, LongComparator, BigSwapper) mergesort}. * These generic sorting methods can be used to sort any kind of list, but they * find their natural usage, for instance, in sorting big arrays in parallel. * * @see it.unimi.dsi.fastutil.Arrays */ public class BigArrays { /** * The shift used to compute the segment associated with an index * (equivalently, the logarithm of the segment size). */ public static final int SEGMENT_SHIFT = 27; /** * The current size of a segment (227) is the largest size that * makes the physical memory allocation for a single segment strictly * smaller than 231 bytes. */ public static final int SEGMENT_SIZE = 1 << SEGMENT_SHIFT; /** The mask used to compute the displacement associated to an index. */ public static final int SEGMENT_MASK = SEGMENT_SIZE - 1; protected BigArrays() { } /** * Computes the segment associated with a given index. * * @param index * an index into a big array. * @return the associated segment. */ public static int segment(final long index) { return (int) (index >>> SEGMENT_SHIFT); } /** * Computes the displacement associated with a given index. * * @param index * an index into a big array. * @return the associated displacement (in the associated * {@linkplain #segment(long) segment}). */ public static int displacement(final long index) { return (int) (index & SEGMENT_MASK); } /** * Computes the starting index of a given segment. * * @param segment * the segment of a big array. * @return the starting index of the segment. */ public static long start(final int segment) { return (long) segment << SEGMENT_SHIFT; } /** * Computes the nearest segment starting index of a given index. * *

This will either be {@code start(segment(index)} or {@code start(segment(index) + 1)}, * whichever is closer, given the bounds can be respected. If neither segment start is within * the bounds, then the index is returned unmodified. * *

This method can be useful for operations that seek to align on the outer array's boundaries * when possible. * * @implSpec The current implementation is branch heavy and is thus not suitable for use in * inner loops. However, it should be fine for the recursive step, where split points are * computed. * * @param index * an index into a big array. * @param min * the minimum (inclusive) valid index of the big array in question * @param max * the maximum (exclusive) valid index of the big array in question * @return the closest segment starting index to {@code index} * @since 8.5.0 */ public static long nearestSegmentStart(final long index, final long min, final long max) { // There probably is a less branchy, bit twiddly way to do this, but this is fine for now. // This isn't going to be used in inner loops, only the recursive call. final long lower = start(segment(index)); final long upper = start(segment(index) + 1); if (upper >= max) { if (lower < min) { return index; } return lower; } if (lower < min) return upper; // Overflow avoiding midpoint computation final long mid = lower + ((upper - lower) >> 1); return index <= mid ? lower : upper; } /** * Computes the index associated with given segment and displacement. * * @param segment * the segment of a big array. * @param displacement * the displacement into the segment. * @return the associated index: that is, {@link #segment(long) * segment(index(segment, displacement)) == segment} and * {@link #displacement(long) displacement(index(segment, * displacement)) == displacement}. */ public static long index(final int segment, final int displacement) { return start(segment) + displacement; } /** * Ensures that a range given by its first (inclusive) and last (exclusive) * elements fits a big array of given length. * *

* This method may be used whenever a big array range check is needed. * * @param bigArrayLength * a big-array length (must be nonnegative). * @param from * a start index (inclusive). * @param to * an end index (inclusive). * @throws IllegalArgumentException * if {@code from} is greater than {@code to}. * @throws ArrayIndexOutOfBoundsException * if {@code from} or {@code to} are greater than * {@code bigArrayLength} or negative. * * @implNote An {@code assert} checks whether {@code bigArrayLength} is nonnegative. */ public static void ensureFromTo(final long bigArrayLength, final long from, final long to) { assert bigArrayLength >= 0; if (from < 0) throw new ArrayIndexOutOfBoundsException("Start index (" + from + ") is negative"); if (from > to) throw new IllegalArgumentException("Start index (" + from + ") is greater than end index (" + to + ")"); if (to > bigArrayLength) throw new ArrayIndexOutOfBoundsException("End index (" + to + ") is greater than big-array length (" + bigArrayLength + ")"); } /** * Ensures that a range given by an offset and a length fits a big array of * given length. * *

* This method may be used whenever a big array range check is needed. * * @param bigArrayLength * a big-array length (must be nonnegative). * @param offset * a start index for the fragment * @param length * a length (the number of elements in the fragment). * @throws IllegalArgumentException * if {@code length} is negative. * @throws ArrayIndexOutOfBoundsException * if {@code offset} is negative or {@code offset} + * {@code length} is greater than * {@code bigArrayLength}. * * @implNote An {@code assert} checks whether {@code bigArrayLength} is nonnegative. */ public static void ensureOffsetLength(final long bigArrayLength, final long offset, final long length) { assert bigArrayLength >= 0; if (offset < 0) throw new ArrayIndexOutOfBoundsException("Offset (" + offset + ") is negative"); if (length < 0) throw new IllegalArgumentException("Length (" + length + ") is negative"); if (length > bigArrayLength - offset) throw new ArrayIndexOutOfBoundsException("Last index (" + Long.toUnsignedString(offset + length) + ") is greater than big-array length (" + bigArrayLength + ")"); } /** * Ensures that a big-array length is legal. * * @param bigArrayLength * a big-array length. * @throws IllegalArgumentException * if {@code length} is negative, or larger than or equal * to {@link #SEGMENT_SIZE} * {@link Integer#MAX_VALUE}. */ public static void ensureLength(final long bigArrayLength) { if (bigArrayLength < 0) throw new IllegalArgumentException("Negative big-array size: " + bigArrayLength); if (bigArrayLength >= (long) Integer.MAX_VALUE << SEGMENT_SHIFT) throw new IllegalArgumentException("Big-array size too big: " + bigArrayLength); } private static final int SMALL = 7; private static final int MEDIUM = 40; /** * Transforms two consecutive sorted ranges into a single sorted range. The * initial ranges are {@code [first, middle)} and * {@code [middle, last)}, and the resulting range is * {@code [first, last)}. Elements in the first input range will * precede equal elements in the second. */ private static void inPlaceMerge(final long from, long mid, final long to, final LongComparator comp, final BigSwapper swapper) { if (from >= mid || mid >= to) return; if (to - from == 2) { if (comp.compare(mid, from) < 0) { swapper.swap(from, mid); } return; } long firstCut; long secondCut; if (mid - from > to - mid) { firstCut = from + (mid - from) / 2; secondCut = lowerBound(mid, to, firstCut, comp); } else { secondCut = mid + (to - mid) / 2; firstCut = upperBound(from, mid, secondCut, comp); } long first2 = firstCut; long middle2 = mid; long last2 = secondCut; if (middle2 != first2 && middle2 != last2) { long first1 = first2; long last1 = middle2; while (first1 < --last1) swapper.swap(first1++, last1); first1 = middle2; last1 = last2; while (first1 < --last1) swapper.swap(first1++, last1); first1 = first2; last1 = last2; while (first1 < --last1) swapper.swap(first1++, last1); } mid = firstCut + (secondCut - mid); inPlaceMerge(from, firstCut, mid, comp, swapper); inPlaceMerge(mid, secondCut, to, comp, swapper); } /** * Performs a binary search on an already sorted range: finds the first * position where an element can be inserted without violating the ordering. * Sorting is by a user-supplied comparison function. * * @param mid * Beginning of the range. * @param to * One past the end of the range. * @param firstCut * Element to be searched for. * @param comp * Comparison function. * @return The largest index i such that, for every j in the range * {@code [first, i)}, {@code comp.apply(array[j], x)} is * {@code true}. */ private static long lowerBound(long mid, final long to, final long firstCut, final LongComparator comp) { long len = to - mid; while (len > 0) { long half = len / 2; long middle = mid + half; if (comp.compare(middle, firstCut) < 0) { mid = middle + 1; len -= half + 1; } else { len = half; } } return mid; } /** Returns the index of the median of three elements. */ private static long med3(final long a, final long b, final long c, final LongComparator comp) { final int ab = comp.compare(a, b); final int ac = comp.compare(a, c); final int bc = comp.compare(b, c); return (ab < 0 ? (bc < 0 ? b : ac < 0 ? c : a) : (bc > 0 ? b : ac > 0 ? c : a)); } /** * Sorts the specified range of elements using the specified big swapper and * according to the order induced by the specified comparator using * mergesort. * *

* This sort is guaranteed to be stable: equal elements will not be * reordered as a result of the sort. The sorting algorithm is an in-place * mergesort that is significantly slower than a standard mergesort, as its * running time is * O(n (log n)2), but it * does not allocate additional memory; as a result, it can be used as a * generic sorting algorithm. * * @param from * the index of the first element (inclusive) to be sorted. * @param to * the index of the last element (exclusive) to be sorted. * @param comp * the comparator to determine the order of the generic data * (arguments are positions). * @param swapper * an object that knows how to swap the elements at any two * positions. */ public static void mergeSort(final long from, final long to, final LongComparator comp, final BigSwapper swapper) { final long length = to - from; // Insertion sort on smallest arrays if (length < SMALL) { for (long i = from; i < to; i++) { for (long j = i; j > from && (comp.compare(j - 1, j) > 0); j--) { swapper.swap(j, j - 1); } } return; } // Recursively sort halves long mid = (from + to) >>> 1; mergeSort(from, mid, comp, swapper); mergeSort(mid, to, comp, swapper); // If list is already sorted, nothing left to do. This is an // optimization that results in faster sorts for nearly ordered lists. if (comp.compare(mid - 1, mid) <= 0) return; // Merge sorted halves inPlaceMerge(from, mid, to, comp, swapper); } /** * Sorts the specified range of elements using the specified big swapper and * according to the order induced by the specified comparator using * quicksort. * *

* The sorting algorithm is a tuned quicksort adapted from Jon L. Bentley * and M. Douglas McIlroy, “Engineering a Sort Function”, * Software: Practice and Experience, 23(11), pages 1249−1265, * 1993. * * @param from * the index of the first element (inclusive) to be sorted. * @param to * the index of the last element (exclusive) to be sorted. * @param comp * the comparator to determine the order of the generic data. * @param swapper * an object that knows how to swap the elements at any two * positions. */ public static void quickSort(final long from, final long to, final LongComparator comp, final BigSwapper swapper) { final long len = to - from; // Insertion sort on smallest arrays if (len < SMALL) { for (long i = from; i < to; i++) for (long j = i; j > from && (comp.compare(j - 1, j) > 0); j--) { swapper.swap(j, j - 1); } return; } // Choose a partition element, v long m = from + len / 2; // Small arrays, middle element if (len > SMALL) { long l = from, n = to - 1; if (len > MEDIUM) { // Big arrays, pseudomedian of 9 long s = len / 8; l = med3(l, l + s, l + 2 * s, comp); m = med3(m - s, m, m + s, comp); n = med3(n - 2 * s, n - s, n, comp); } m = med3(l, m, n, comp); // Mid-size, med of 3 } // long v = x[m]; long a = from, b = a, c = to - 1, d = c; // Establish Invariant: v* (v)* v* while (true) { int comparison; while (b <= c && ((comparison = comp.compare(b, m)) <= 0)) { if (comparison == 0) { if (a == m) m = b; // moving target; DELTA to JDK !!! else if (b == m) m = a; // moving target; DELTA to JDK !!! swapper.swap(a++, b); } b++; } while (c >= b && ((comparison = comp.compare(c, m)) >= 0)) { if (comparison == 0) { if (c == m) m = d; // moving target; DELTA to JDK !!! else if (d == m) m = c; // moving target; DELTA to JDK !!! swapper.swap(c, d--); } c--; } if (b > c) break; if (b == m) m = d; // moving target; DELTA to JDK !!! else if (c == m) m = c; // moving target; DELTA to JDK !!! swapper.swap(b++, c--); } // Swap partition elements back to middle long s; long n = from + len; s = Math.min(a - from, b - a); vecSwap(swapper, from, b - s, s); s = Math.min(d - c, n - d - 1); vecSwap(swapper, b, n - s, s); // Recursively sort non-partition-elements if ((s = b - a) > 1) quickSort(from, from + s, comp, swapper); if ((s = d - c) > 1) quickSort(n - s, n, comp, swapper); } /** * Performs a binary search on an already-sorted range: finds the last * position where an element can be inserted without violating the ordering. * Sorting is by a user-supplied comparison function. * * @param from * Beginning of the range. * @param mid * One past the end of the range. * @param secondCut * Element to be searched for. * @param comp * Comparison function. * @return The largest index i such that, for every j in the range * {@code [first, i)}, {@code comp.apply(x, array[j])} is * {@code false}. */ private static long upperBound(long from, final long mid, final long secondCut, final LongComparator comp) { long len = mid - from; while (len > 0) { long half = len / 2; long middle = from + half; if (comp.compare(secondCut, middle) < 0) { len = half; } else { from = middle + 1; len -= half + 1; } } return from; } /** Swaps x[a .. (a+n-1)] with x[b .. (b+n-1)]. */ private static void vecSwap(final BigSwapper swapper, long from, long l, final long s) { for (int i = 0; i < s; i++, from++, l++) swapper.swap(from, l); } #include "src/it/unimi/dsi/fastutil/ByteBigArraysFragment.h" #undef KEY_CLASS_Byte #include "src/it/unimi/dsi/fastutil/IntBigArraysFragment.h" #undef KEY_CLASS_Integer #include "src/it/unimi/dsi/fastutil/LongBigArraysFragment.h" #undef KEY_CLASS_Long #include "src/it/unimi/dsi/fastutil/DoubleBigArraysFragment.h" #undef KEY_CLASS_Double #include "src/it/unimi/dsi/fastutil/BooleanBigArraysFragment.h" #undef KEY_CLASS_Boolean #include "src/it/unimi/dsi/fastutil/ShortBigArraysFragment.h" #undef KEY_CLASS_Short #include "src/it/unimi/dsi/fastutil/CharBigArraysFragment.h" #undef KEY_CLASS_Character #include "src/it/unimi/dsi/fastutil/FloatBigArraysFragment.h" #undef KEY_CLASS_Float #undef KEYS_PRIMITIVE #include "src/it/unimi/dsi/fastutil/ObjectBigArraysFragment.h" public static void main(final String arg[]) { int[][] a = IntBigArrays.newBigArray(1L << Integer.parseInt(arg[0])); long x, y, z, start; for (int k = 10; k-- != 0;) { start = -System.currentTimeMillis(); x = 0; for (long i = length(a); i-- != 0;) x ^= i ^ get(a, i); if (x == 0) System.err.println(); System.out.println("Single loop: " + (start + System.currentTimeMillis()) + "ms"); start = -System.currentTimeMillis(); y = 0; for (int i = a.length; i-- != 0;) { final int[] t = a[i]; for (int d = t.length; d-- != 0;) y ^= t[d] ^ index(i, d); } if (y == 0) System.err.println(); if (x != y) throw new AssertionError(); System.out.println("Double loop: " + (start + System.currentTimeMillis()) + "ms"); z = 0; long j = length(a); for (int i = a.length; i-- != 0;) { final int[] t = a[i]; for (int d = t.length; d-- != 0;) y ^= t[d] ^ --j; } if (z == 0) System.err.println(); if (x != z) throw new AssertionError(); System.out.println("Double loop (with additional index): " + (start + System.currentTimeMillis()) + "ms"); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy