All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cern.colt.Partitioning Maven / Gradle / Ivy

Go to download

Parallel Colt is a multithreaded version of Colt - a library for high performance scientific computing in Java. It contains efficient algorithms for data analysis, linear algebra, multi-dimensional arrays, Fourier transforms, statistics and histogramming.

The newest version!
/*
Copyright (C) 1999 CERN - European Organization for Nuclear Research.
Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose 
is hereby granted without fee, provided that the above copyright notice appear in all copies and 
that both that copyright notice and this permission notice appear in supporting documentation. 
CERN makes no representations about the suitability of this software for any purpose. 
It is provided "as is" without expressed or implied warranty.
 */
package cern.colt;

import cern.colt.function.tint.IntComparator;
import cern.colt.list.tdouble.DoubleArrayList;
import cern.colt.list.tint.IntArrayList;

/**
 * Given some interval boundaries, partitions arrays such that all elements
 * falling into an interval are placed next to each other.
 * 

* The algorithms partition arrays into two or more intervals. They distinguish * between synchronously partitioning either one, two or three arrays. * They further come in templated versions, either partitioning int[] * arrays or double[] arrays. *

* You may want to start out reading about the simplest case: Partitioning one * int[] array into two intervals. To do so, read * {@link #partition(int[],int,int,int)}. * * Next, building upon that foundation comes a method partitioning * int[] arrays into multiple intervals. See * {@link #partition(int[],int,int,int[],int,int,int[])} for related * documentation. *

* All other methods are no different than the one's you now already understand, * except that they operate on slightly different data types. *

* Performance *

* Partitioning into two intervals is O( N ). Partitioning into k * intervals is O( N * log(k)). Constants factors are minimized. No * temporary memory is allocated; Partitioning is in-place. * * @see cern.colt.matrix.tdouble.algo.DoublePartitioning * * @author [email protected] * @version 1.0, 03-Jul-99 */ public class Partitioning extends Object { private static final int SMALL = 7; private static final int MEDIUM = 40; // benchmark only protected static int steps = 0; public static int swappedElements = 0; /** * Makes this class non instantiable, but still let's others inherit from * it. */ protected Partitioning() { } /** * Finds the given key "a" within some generic data using the binary search * algorithm. * * @param a * the index of the key to search for. * @param from * the leftmost search position, inclusive. * @param to * the rightmost search position, inclusive. * @param comp * the comparator determining the order of the generic data. * Takes as first argument the index a within the * generic splitters s. Takes as second argument the * index b within the generic data g. * @return index of the search key, if it is contained in the list; * otherwise, (-(insertion point) - 1). The * insertion point is defined as the the point at which the * value would be inserted into the list: the index of the first * element greater than the key, or list.length, if all * elements in the list are less than the specified key. Note that * this guarantees that the return value will be >= 0 if and only * if the key is found. */ private static int binarySearchFromTo(int a, int from, int to, IntComparator comp) { while (from <= to) { int mid = (from + to) / 2; int comparison = comp.compare(mid, a); if (comparison < 0) from = mid + 1; else if (comparison > 0) to = mid - 1; else return mid; // key found } return -(from + 1); // key not found. } /** * Same as {@link #dualPartition(int[],int[],int,int,int[],int,int,int[])} * except that it synchronously partitions double[] rather * than int[] arrays. */ public static void dualPartition(double[] list, double[] secondary, int from, int to, double[] splitters, int splitFrom, int splitTo, int[] splitIndexes) { double splitter; // int, double --> template type dependent if (splitFrom > splitTo) return; // nothing to do if (from > to) { // all bins are empty from--; for (int i = splitFrom; i <= splitTo;) splitIndexes[i++] = from; return; } // Choose a partition (pivot) index, m // Ideally, the pivot should be the median, because a median splits a // list into two equal sized sublists. // However, computing the median is expensive, so we use an // approximation. int medianIndex; if (splitFrom == splitTo) { // we don't really have a choice medianIndex = splitFrom; } else { // we do have a choice int m = (from + to) / 2; // Small arrays, middle element int len = to - from + 1; if (len > SMALL) { int l = from; int n = to; if (len > MEDIUM) { // Big arrays, pseudomedian of 9 int s = len / 8; l = med3(list, l, l + s, l + 2 * s); m = med3(list, m - s, m, m + s); n = med3(list, n - 2 * s, n - s, n); } m = med3(list, l, m, n); // Mid-size, pseudomedian of 3 } // Find the splitter closest to the pivot, i.e. the splitter that // best splits the list into two equal sized sublists. medianIndex = Sorting.binarySearchFromTo(splitters, list[m], splitFrom, splitTo); if (medianIndex < 0) medianIndex = -medianIndex - 1; // not found if (medianIndex > splitTo) medianIndex = splitTo; // not found, one past the end } splitter = splitters[medianIndex]; // Partition the list according to the splitter, i.e. // Establish invariant: list[i] < splitter <= list[j] for // i=from..medianIndex and j=medianIndex+1 .. to int splitIndex = dualPartition(list, secondary, from, to, splitter); splitIndexes[medianIndex] = splitIndex; // Optimization: Handle special cases to cut down recursions. if (splitIndex < from) { // no element falls into this bin // all bins with splitters[i] <= splitter are empty int i = medianIndex - 1; while (i >= splitFrom && (!(splitter < splitters[i]))) splitIndexes[i--] = splitIndex; splitFrom = medianIndex + 1; } else if (splitIndex >= to) { // all elements fall into this bin // all bins with splitters[i] >= splitter are empty int i = medianIndex + 1; while (i <= splitTo && (!(splitter > splitters[i]))) splitIndexes[i++] = splitIndex; splitTo = medianIndex - 1; } // recursively partition left half if (splitFrom <= medianIndex - 1) { dualPartition(list, secondary, from, splitIndex, splitters, splitFrom, medianIndex - 1, splitIndexes); } // recursively partition right half if (medianIndex + 1 <= splitTo) { dualPartition(list, secondary, splitIndex + 1, to, splitters, medianIndex + 1, splitTo, splitIndexes); } } /** * Same as {@link #dualPartition(int[],int[],int,int,int)} except that it * synchronously partitions double[] rather than * int[] arrays. */ public static int dualPartition(double[] list, double[] secondary, int from, int to, double splitter) { double element; // int, double --> template type dependent for (int i = from - 1; ++i <= to;) { element = list[i]; if (element < splitter) { // swap x[i] with x[from] list[i] = list[from]; list[from] = element; element = secondary[i]; secondary[i] = secondary[from]; secondary[from++] = element; } } return from - 1; } /** * Same as {@link #partition(int[],int,int,int[],int,int,int[])} except that * this method synchronously partitions two arrays at the same time; * both arrays are partially sorted according to the elements of the primary * array. In other words, each time an element in the primary array is moved * from index A to B, the correspoding element within the secondary array is * also moved from index A to B. *

* Use cases: *

* Image having a large list of 2-dimensional points. If memory consumption * and performance matter, it is a good idea to physically lay them out as * two 1-dimensional arrays (using something like Point2D objects * would be prohibitively expensive, both in terms of time and space). Now * imagine wanting to histogram the points. We may want to partially sort * the points by x-coordinate into intervals. This method efficiently does * the job. *

* Performance: *

* Same as for single-partition methods. */ public static void dualPartition(int[] list, int[] secondary, int from, int to, int[] splitters, int splitFrom, int splitTo, int[] splitIndexes) { int splitter; // int, double --> template type dependent if (splitFrom > splitTo) return; // nothing to do if (from > to) { // all bins are empty from--; for (int i = splitFrom; i <= splitTo;) splitIndexes[i++] = from; return; } // Choose a partition (pivot) index, m // Ideally, the pivot should be the median, because a median splits a // list into two equal sized sublists. // However, computing the median is expensive, so we use an // approximation. int medianIndex; if (splitFrom == splitTo) { // we don't really have a choice medianIndex = splitFrom; } else { // we do have a choice int m = (from + to) / 2; // Small arrays, middle element int len = to - from + 1; if (len > SMALL) { int l = from; int n = to; if (len > MEDIUM) { // Big arrays, pseudomedian of 9 int s = len / 8; l = med3(list, l, l + s, l + 2 * s); m = med3(list, m - s, m, m + s); n = med3(list, n - 2 * s, n - s, n); } m = med3(list, l, m, n); // Mid-size, pseudomedian of 3 } // Find the splitter closest to the pivot, i.e. the splitter that // best splits the list into two equal sized sublists. medianIndex = Sorting.binarySearchFromTo(splitters, list[m], splitFrom, splitTo); if (medianIndex < 0) medianIndex = -medianIndex - 1; // not found if (medianIndex > splitTo) medianIndex = splitTo; // not found, one past the end } splitter = splitters[medianIndex]; // Partition the list according to the splitter, i.e. // Establish invariant: list[i] < splitter <= list[j] for // i=from..medianIndex and j=medianIndex+1 .. to int splitIndex = dualPartition(list, secondary, from, to, splitter); splitIndexes[medianIndex] = splitIndex; // Optimization: Handle special cases to cut down recursions. if (splitIndex < from) { // no element falls into this bin // all bins with splitters[i] <= splitter are empty int i = medianIndex - 1; while (i >= splitFrom && (!(splitter < splitters[i]))) splitIndexes[i--] = splitIndex; splitFrom = medianIndex + 1; } else if (splitIndex >= to) { // all elements fall into this bin // all bins with splitters[i] >= splitter are empty int i = medianIndex + 1; while (i <= splitTo && (!(splitter > splitters[i]))) splitIndexes[i++] = splitIndex; splitTo = medianIndex - 1; } // recursively partition left half if (splitFrom <= medianIndex - 1) { dualPartition(list, secondary, from, splitIndex, splitters, splitFrom, medianIndex - 1, splitIndexes); } // recursively partition right half if (medianIndex + 1 <= splitTo) { dualPartition(list, secondary, splitIndex + 1, to, splitters, medianIndex + 1, splitTo, splitIndexes); } } /** * Same as {@link #partition(int[],int,int,int)} except that this method * synchronously partitions two arrays at the same time; both arrays * are partially sorted according to the elements of the primary array. In * other words, each time an element in the primary array is moved from * index A to B, the correspoding element within the secondary array is also * moved from index A to B. *

* Performance: *

* Same as for single-partition methods. */ public static int dualPartition(int[] list, int[] secondary, int from, int to, int splitter) { int element; // int, double --> template type dependent for (int i = from - 1; ++i <= to;) { element = list[i]; if (element < splitter) { // swap x[i] with x[from] list[i] = list[from]; list[from] = element; element = secondary[i]; secondary[i] = secondary[from]; secondary[from++] = element; } } return from - 1; } /** * Same as {@link #partition(int[],int,int,int[],int,int,int[])} except that * it generically partitions arbitrary shaped data (for example * matrices or multiple arrays) rather than int[] arrays. *

* This method operates on arbitrary shaped data and arbitrary shaped * splitters. In fact, it has no idea what kind of data by what kind of * splitters it is partitioning. Comparisons and swapping are delegated to * user provided objects which know their data and can do the job. *

* Lets call the generic data g (it may be a matrix, one array, * three linked lists or whatever). Lets call the generic splitters * s. This class takes a user comparison function operating on two * indexes (a,b), namely an {@link IntComparator}. The comparison * function determines whether s[a] is equal, less or greater than * g[b]. This method can then decide to swap the data g[b] * with the data g[c] (yes, c, not a). It calls a * user provided {@link cern.colt.Swapper} object that knows how to swap the * data of these two indexes. *

* Again, note the details: Comparisons compare s[a] with * g[b]. Swaps swap g[b] with g[c]. Prior to * calling this method, the generic splitters s must be sorted * ascending and must not contain multiple equal values. These preconditions * are not checked; be sure that they are met. * * @param from * the index of the first element within g to be * considered. * @param to * the index of the last element within g to be * considered. The method considers the elements * g[from] .. g[to]. * * * @param splitFrom * the index of the first splitter element to be considered. * @param splitTo * the index of the last splitter element to be considered. The * method considers the splitter elements * s[splitFrom] .. s[splitTo]. * * @param splitIndexes * a list into which this method fills the indexes of elements * delimiting intervals. Upon return * splitIndexes[splitFrom..splitTo] will be set * accordingly. Therefore, must satisfy * splitIndexes.length > splitTo. * * @param comp * the comparator comparing a splitter with an element of the * generic data. Takes as first argument the index a * within the generic splitters s. Takes as second * argument the index b within the generic data * g. * @param comp2 * the comparator to determine the order of the generic data. * Takes as first argument the index a within the * generic data g. Takes as second argument the index * b within the generic data g. * @param comp3 * the comparator comparing a splitter with another splitter. * Takes as first argument the index a within the * generic splitters s. Takes as second argument the * index b within the generic splitters g. * @param swapper * an object that knows how to swap the elements at any two * indexes (a,b). Takes as first argument the index b * within the generic data g. Takes as second argument * the index c within the generic data g. * *

* Tip: Normally you will have * splitIndexes.length == s.length as well as * from==0, to==g.length-1 and * splitFrom==0, splitTo==s.length-1. * * @see Sorting#binarySearchFromTo(int,int,IntComparator) */ public static void genericPartition(int from, int to, int splitFrom, int splitTo, int[] splitIndexes, IntComparator comp, IntComparator comp2, IntComparator comp3, Swapper swapper) { int splitter; // int, double --> template type dependent if (splitFrom > splitTo) return; // nothing to do if (from > to) { // all bins are empty from--; for (int i = splitFrom; i <= splitTo;) splitIndexes[i++] = from; return; } // Choose a partition (pivot) index, m // Ideally, the pivot should be the median, because a median splits a // list into two equal sized sublists. // However, computing the median is expensive, so we use an // approximation. int medianIndex; if (splitFrom == splitTo) { // we don't really have a choice medianIndex = splitFrom; } else { // we do have a choice int m = (from + to) / 2; // Small arrays, middle element int len = to - from + 1; if (len > SMALL) { int l = from; int n = to; if (len > MEDIUM) { // Big arrays, pseudomedian of 9 int s = len / 8; l = med3(l, l + s, l + 2 * s, comp2); m = med3(m - s, m, m + s, comp2); n = med3(n - 2 * s, n - s, n, comp2); } m = med3(l, m, n, comp2); // Mid-size, pseudomedian of 3 } // Find the splitter closest to the pivot, i.e. the splitter that // best splits the list into two equal sized sublists. medianIndex = binarySearchFromTo(m, splitFrom, splitTo, comp); if (medianIndex < 0) medianIndex = -medianIndex - 1; // not found if (medianIndex > splitTo) medianIndex = splitTo; // not found, one past the end } splitter = medianIndex; // Partition the list according to the splitter, i.e. // Establish invariant: list[i] < splitter <= list[j] for // i=from..medianIndex and j=medianIndex+1 .. to int splitIndex = genericPartition(from, to, splitter, comp, swapper); splitIndexes[medianIndex] = splitIndex; // Optimization: Handle special cases to cut down recursions. if (splitIndex < from) { // no element falls into this bin // all bins with splitters[i] <= splitter are empty int i = medianIndex - 1; while (i >= splitFrom && (!(comp3.compare(splitter, i) < 0))) splitIndexes[i--] = splitIndex; splitFrom = medianIndex + 1; } else if (splitIndex >= to) { // all elements fall into this bin // all bins with splitters[i] >= splitter are empty int i = medianIndex + 1; while (i <= splitTo && (!(comp3.compare(splitter, i) > 0))) splitIndexes[i++] = splitIndex; splitTo = medianIndex - 1; } // recursively partition left half if (splitFrom <= medianIndex - 1) { genericPartition(from, splitIndex, splitFrom, medianIndex - 1, splitIndexes, comp, comp2, comp3, swapper); } // recursively partition right half if (medianIndex + 1 <= splitTo) { genericPartition(splitIndex + 1, to, medianIndex + 1, splitTo, splitIndexes, comp, comp2, comp3, swapper); } } /** * Same as {@link #partition(int[],int,int,int)} except that it * generically partitions arbitrary shaped data (for example matrices * or multiple arrays) rather than int[] arrays. */ private static int genericPartition(int from, int to, int splitter, IntComparator comp, Swapper swapper) { for (int i = from - 1; ++i <= to;) { if (comp.compare(splitter, i) > 0) { // swap x[i] with x[from] swapper.swap(i, from); from++; } } return from - 1; } /** * Returns the index of the median of the three indexed elements. */ private static int med3(double x[], int a, int b, int c) { return (x[a] < x[b] ? (x[b] < x[c] ? b : x[a] < x[c] ? c : a) : (x[b] > x[c] ? b : x[a] > x[c] ? c : a)); } /** * Returns the index of the median of the three indexed elements. */ private static int med3(int x[], int a, int b, int c) { return (x[a] < x[b] ? (x[b] < x[c] ? b : x[a] < x[c] ? c : a) : (x[b] > x[c] ? b : x[a] > x[c] ? c : a)); } /** * Returns the index of the median of the three indexed chars. */ private static int med3(Object x[], int a, int b, int c, java.util.Comparator comp) { int ab = comp.compare(x[a], x[b]); int ac = comp.compare(x[a], x[c]); int bc = comp.compare(x[b], x[c]); return (ab < 0 ? (bc < 0 ? b : ac < 0 ? c : a) : (bc > 0 ? b : ac > 0 ? c : a)); } /** * Returns the index of the median of the three indexed chars. */ private static int med3(int a, int b, int c, IntComparator comp) { int ab = comp.compare(a, b); int ac = comp.compare(a, c); int bc = comp.compare(b, c); return (ab < 0 ? (bc < 0 ? b : ac < 0 ? c : a) : (bc > 0 ? b : ac > 0 ? c : a)); } /** * Same as {@link #partition(int[],int,int,int[],int,int,int[])} except that * it partitions double[] rather than int[] arrays. */ public static void partition(double[] list, int from, int to, double[] splitters, int splitFrom, int splitTo, int[] splitIndexes) { double splitter; // int, double --> template type dependent if (splitFrom > splitTo) return; // nothing to do if (from > to) { // all bins are empty from--; for (int i = splitFrom; i <= splitTo;) splitIndexes[i++] = from; return; } // Choose a partition (pivot) index, m // Ideally, the pivot should be the median, because a median splits a // list into two equal sized sublists. // However, computing the median is expensive, so we use an // approximation. int medianIndex; if (splitFrom == splitTo) { // we don't really have a choice medianIndex = splitFrom; } else { // we do have a choice int m = (from + to) / 2; // Small arrays, middle element int len = to - from + 1; if (len > SMALL) { int l = from; int n = to; if (len > MEDIUM) { // Big arrays, pseudomedian of 9 int s = len / 8; l = med3(list, l, l + s, l + 2 * s); m = med3(list, m - s, m, m + s); n = med3(list, n - 2 * s, n - s, n); } m = med3(list, l, m, n); // Mid-size, pseudomedian of 3 } // Find the splitter closest to the pivot, i.e. the splitter that // best splits the list into two equal sized sublists. medianIndex = Sorting.binarySearchFromTo(splitters, list[m], splitFrom, splitTo); if (medianIndex < 0) medianIndex = -medianIndex - 1; // not found if (medianIndex > splitTo) medianIndex = splitTo; // not found, one past the end } splitter = splitters[medianIndex]; // Partition the list according to the splitter, i.e. // Establish invariant: list[i] < splitter <= list[j] for // i=from..medianIndex and j=medianIndex+1 .. to int splitIndex = partition(list, from, to, splitter); splitIndexes[medianIndex] = splitIndex; // Optimization: Handle special cases to cut down recursions. if (splitIndex < from) { // no element falls into this bin // all bins with splitters[i] <= splitter are empty int i = medianIndex - 1; while (i >= splitFrom && (!(splitter < splitters[i]))) splitIndexes[i--] = splitIndex; splitFrom = medianIndex + 1; } else if (splitIndex >= to) { // all elements fall into this bin // all bins with splitters[i] >= splitter are empty int i = medianIndex + 1; while (i <= splitTo && (!(splitter > splitters[i]))) splitIndexes[i++] = splitIndex; splitTo = medianIndex - 1; } // recursively partition left half if (splitFrom <= medianIndex - 1) { partition(list, from, splitIndex, splitters, splitFrom, medianIndex - 1, splitIndexes); } // recursively partition right half if (medianIndex + 1 <= splitTo) { partition(list, splitIndex + 1, to, splitters, medianIndex + 1, splitTo, splitIndexes); } } /** * Same as {@link #partition(int[],int,int,int)} except that it partitions * double[] rather than int[] arrays. */ public static int partition(double[] list, int from, int to, double splitter) { double element; // int, double --> template type dependent for (int i = from - 1; ++i <= to;) { element = list[i]; if (element < splitter) { // swap x[i] with x[from] list[i] = list[from]; list[from++] = element; } } return from - 1; } /** * Partitions (partially sorts) the given list such that all elements * falling into some intervals are placed next to each other. Returns the * indexes of elements delimiting intervals. *

* Example: *

* list = (7, 4, 5, 50, 6, 4, 3, 6), splitters = (5, 10, 30) * defines the three intervals [-infinity,5), [5,10), [10,30). Lets * define to sort the entire list (from=0, to=7) using all * splitters (splitFrom==0, splitTo=2). *

* The method modifies the list to be * list = (4, 4, 3, 6, 7, 5, 6, 50) and returns the * splitIndexes = (2, 6, 6). In other words, *

    *
  • All values list[0..2] fall into [-infinity,5). *
  • All values list[3..6] fall into [5,10). *
  • All values list[7..6] fall into [10,30), i.e. no * elements, since 7>6. *
  • All values list[7 .. 7=list.length-1] fall into * [30,infinity]. *
  • In general, all values * list[splitIndexes[j-1]+1 .. splitIndexes[j]] fall into interval * j. *
* As can be seen, the list is partially sorted such that values falling * into a certain interval are placed next to each other. Note that * within an interval, elements are entirelly unsorted. They are only * sorted across interval boundaries. In particular, this partitioning * algorithm is not stable: the relative order of elements is not * preserved (Producing a stable algorithm would require no more than minor * modifications to method partition(int[],int,int,int)). *

* More formally, this method guarantees that upon return * for all j = splitFrom .. splitTo there holds:
* for all i = splitIndexes[j-1]+1 .. splitIndexes[j]: splitters[j-1] <= list[i] < splitters[j]. *

* Performance: *

* Let N=to-from+1 be the number of elements to be partitioned. Let * k=splitTo-splitFrom+1 be the number of splitter elements. Then * we have the following time complexities *

    *
  • Worst case: O( N * log(k) ). *
  • Average case: O( N * log(k) ). *
  • Best case: O( N ). In general, the more uniform (skewed) the * data is spread across intervals, the more performance approaches the * worst (best) case. If no elements fall into the given intervals, running * time is linear. *
* No temporary memory is allocated; the sort is in-place. *

* Implementation: *

* The algorithm can be seen as a Bentley/McIlroy quicksort where swapping * and insertion sort are omitted. It is designed to detect and take * advantage of skew while maintaining good performance in the uniform case. * * @param list * the list to be partially sorted. * * @param from * the index of the first element within list to be * considered. * @param to * the index of the last element within list to be * considered. The method considers the elements * list[from] .. list[to]. * * @param splitters * the values at which the list shall be split into intervals. * Must be sorted ascending and must not contain multiple * identical values. These preconditions are not checked; be sure * that they are met. * * @param splitFrom * the index of the first splitter element to be considered. * @param splitTo * the index of the last splitter element to be considered. The * method considers the splitter elements * splitters[splitFrom] .. splitters[splitTo]. * * @param splitIndexes * a list into which this method fills the indexes of elements * delimiting intervals. Upon return * splitIndexes[splitFrom..splitTo] will be set * accordingly. Therefore, must satisfy * splitIndexes.length > splitTo. *

* Tip: Normally you will have * splitIndexes.length == splitters.length as well as * from==0, to==list.length-1 and * splitFrom==0, splitTo==splitters.length-1. * * @see cern.colt.Arrays * @see cern.colt.GenericSorting * @see java.util.Arrays */ public static void partition(int[] list, int from, int to, int[] splitters, int splitFrom, int splitTo, int[] splitIndexes) { int element, splitter; // int, double --> template type dependent if (splitFrom > splitTo) return; // nothing to do if (from > to) { // all bins are empty from--; for (int i = splitFrom; i <= splitTo;) splitIndexes[i++] = from; return; } // Choose a partition (pivot) index, m // Ideally, the pivot should be the median, because a median splits a // list into two equal sized sublists. // However, computing the median is expensive, so we use an // approximation. int medianIndex; if (splitFrom == splitTo) { // we don't really have a choice medianIndex = splitFrom; } else { // we do have a choice int m = (from + to) / 2; // Small arrays, middle element int len = to - from + 1; if (len > SMALL) { int l = from; int n = to; if (len > MEDIUM) { // Big arrays, pseudomedian of 9 int s = len / 8; l = med3(list, l, l + s, l + 2 * s); m = med3(list, m - s, m, m + s); n = med3(list, n - 2 * s, n - s, n); } m = med3(list, l, m, n); // Mid-size, pseudomedian of 3 } // Find the splitter closest to the pivot, i.e. the splitter that // best splits the list into two equal sized sublists. medianIndex = Sorting.binarySearchFromTo(splitters, list[m], splitFrom, splitTo); // int key = list[m]; /* * if (splitTo-splitFrom+1 < 5) { // on short lists linear search is * quicker int i=splitFrom-1; while (++i <= splitTo && list[i] < * key); if (i > splitTo || list[i] > key) i = -i-1; // not found * medianIndex = i; } */ // else { /* * * int low = splitFrom; int high = splitTo; int comparison; * * int mid=0; while (low <= high) { mid = (low + high) / 2; * comparison = splitters[mid]-key; if (comparison < 0) low = mid + * 1; else if (comparison > 0) high = mid - 1; else break; //return * mid; // key found } medianIndex = mid; if (low > high) * medianIndex = -(medianIndex + 1); // key not found. //} */ if (medianIndex < 0) medianIndex = -medianIndex - 1; // not found if (medianIndex > splitTo) medianIndex = splitTo; // not found, one past the end } splitter = splitters[medianIndex]; // System.out.println("medianIndex="+medianIndex); // Partition the list according to the splitter, i.e. // Establish invariant: list[i] < splitter <= list[j] for // i=from..medianIndex and j=medianIndex+1 .. to // Could simply call: int splitIndex = partition(list, from, to, splitter); // but for speed the code is manually inlined. /* * steps += to-from+1; int head = from; for (int i=from-1; ++i<=to; ) { // * swap all elements < splitter to front element = list[i]; if (element < * splitter) { list[i] = list[head]; list[head++] = element; * //swappedElements++; } } int splitIndex = head-1; */ // System.out.println("splitIndex="+splitIndex); splitIndexes[medianIndex] = splitIndex; // if (splitFrom == splitTo) return; // done // Optimization: Handle special cases to cut down recursions. if (splitIndex < from) { // no element falls into this bin // all bins with splitters[i] <= splitter are empty int i = medianIndex - 1; while (i >= splitFrom && (!(splitter < splitters[i]))) splitIndexes[i--] = splitIndex; splitFrom = medianIndex + 1; } else if (splitIndex >= to) { // all elements fall into this bin // all bins with splitters[i] >= splitter are empty int i = medianIndex + 1; while (i <= splitTo && (!(splitter > splitters[i]))) splitIndexes[i++] = splitIndex; splitTo = medianIndex - 1; } // recursively partition left half if (splitFrom <= medianIndex - 1) { // System.out.println("1.recursive: from="+from+", // to="+splitIndex+", splitFrom="+splitFrom+", // splitTo="+(medianIndex-1)); partition(list, from, splitIndex, splitters, splitFrom, medianIndex - 1, splitIndexes); } // recursively partition right half if (medianIndex + 1 <= splitTo) { // System.out.println("2.recursive: from="+(splitIndex+1)+", // to="+to+", splitFrom="+(medianIndex+1)+", splitTo="+splitTo); partition(list, splitIndex + 1, to, splitters, medianIndex + 1, splitTo, splitIndexes); } // System.out.println("BACK TRACKING\n\n"); } /** * Partitions (partially sorts) the given list such that all elements * falling into the given interval are placed next to each other. Returns * the index of the element delimiting the interval. *

* Example: *

* list = (7, 4, 5, 50, 6, 4, 3, 6), splitter = 5 defines the two * intervals [-infinity,5), [5,+infinity]. *

* The method modifies the list to be * list = (4, 4, 3, 50, 6, 7, 5, 6) and returns the split index * 2. In other words, *

    *
  • All values list[0..2] fall into [-infinity,5). *
  • All values list[3=2+1 .. 7=list.length-1] fall into * [5,+infinity]. *
* As can be seen, the list is partially sorted such that values falling * into a certain interval are placed next to each other. Note that * within an interval, elements are entirelly unsorted. They are only * sorted across interval boundaries. In particular, this partitioning * algorithm is not stable. *

* More formally, this method guarantees that upon return there holds: *

    *
  • for all i = from .. returnValue: list[i] < splitter and *
  • for all * i = returnValue+1 .. list.length-1: !(list[i] < splitter). *
*

* Performance: *

* Let N=to-from+1 be the number of elements to be partially * sorted. Then the time complexity is O( N ). No temporary memory * is allocated; the sort is in-place. * *

* * @param list * the list to be partially sorted. * * @param from * the index of the first element within list to be * considered. * @param to * the index of the last element within list to be * considered. The method considers the elements * list[from] .. list[to]. * * @param splitter * the value at which the list shall be split. * * @return the index of the largest element falling into the interval * [-infinity,splitter), as seen after partitioning. */ public static int partition(int[] list, int from, int to, int splitter) { steps += to - from + 1; /* * System.out.println(); if (from<=to) { System.out.println("SORT * WORKING: from="+from+", to="+to+", splitter="+splitter); } else { * System.out.println("SORT WORKING: NOTHING TO DO."); } */ // returns index of last element < splitter /* * for (int i=from-1; ++i<=to; ) { if (list[i] < splitter) { int * element = list[i]; list[i] = list[from]; list[from++] = element; } } */ int element; for (int i = from - 1; ++i <= to;) { element = list[i]; if (element < splitter) { // swap x[i] with x[from] list[i] = list[from]; list[from++] = element; // swappedElements++; } } // if (from<=to) System.out.println("Swapped "+(head-from)+" elements"); /* * //JAL: int first = from; int last = to+1; --first; while (true) { * while (++first < last && list[first] < splitter); while (first < * --last && !(list[last] < splitter)); if (first >= last) return * first-1; int tmp = list[first]; list[first] = list[last]; list[last] = * tmp; } */ /* * System.out.println("splitter="+splitter); * System.out.println("before="+new IntArrayList(list)); int head = * from; int trail = to; int element; while (head<=trail) { head--; * while (++head < trail && list[head] < splitter); * * trail++; while (--trail > head && list[trail] >= splitter); * * if (head != trail) { element = list[head]; list[head] = list[trail]; * list[trail] = element; } head++; trail--; System.out.println("after * ="+new IntArrayList(list)+", head="+head); } */ /* * //System.out.println("splitter="+splitter); * //System.out.println("before="+new IntArrayList(list)); to++; //int * head = from; int element; //int oldHead; while (--to >= from) { * element = list[to]; if (element < splitter) { from--; while (++from < * to && list[from] < splitter); //if (head != to) { list[to] = * list[from]; list[from++] = element; //oldHead = list[head]; * //list[head] = element; //list[i] = oldHead; * * //head++; //} //head++; } //System.out.println("after ="+new * IntArrayList(list)+", head="+head); } */ /* * int i=from-1; int head = from; int trail = to; while (++i <= trail) { * int element = list[i]; if (element < splitter) { if (head == i) * head++; else { // swap list[i] with list[from] int oldHead = * list[head]; int oldTrail = list[trail]; list[head++] = element; * list[i--] = oldTrail; list[trail--] = oldHead; } } * //System.out.println(new IntArrayList(list)); } */ return from - 1; // return head-1; } /** * Same as {@link #partition(int[],int,int,int[],int,int,int[])} except that * it partitions Object[] rather than int[] arrays. */ public static void partition(Object[] list, int from, int to, Object[] splitters, int splitFrom, int splitTo, int[] splitIndexes, java.util.Comparator comp) { Object splitter; // int, double --> template type dependent if (splitFrom > splitTo) return; // nothing to do if (from > to) { // all bins are empty from--; for (int i = splitFrom; i <= splitTo;) splitIndexes[i++] = from; return; } // Choose a partition (pivot) index, m // Ideally, the pivot should be the median, because a median splits a // list into two equal sized sublists. // However, computing the median is expensive, so we use an // approximation. int medianIndex; if (splitFrom == splitTo) { // we don't really have a choice medianIndex = splitFrom; } else { // we do have a choice int m = (from + to) / 2; // Small arrays, middle element int len = to - from + 1; if (len > SMALL) { int l = from; int n = to; if (len > MEDIUM) { // Big arrays, pseudomedian of 9 int s = len / 8; l = med3(list, l, l + s, l + 2 * s, comp); m = med3(list, m - s, m, m + s, comp); n = med3(list, n - 2 * s, n - s, n, comp); } m = med3(list, l, m, n, comp); // Mid-size, pseudomedian of 3 } // Find the splitter closest to the pivot, i.e. the splitter that // best splits the list into two equal sized sublists. medianIndex = Sorting.binarySearchFromTo(splitters, list[m], splitFrom, splitTo, comp); if (medianIndex < 0) medianIndex = -medianIndex - 1; // not found if (medianIndex > splitTo) medianIndex = splitTo; // not found, one past the end } splitter = splitters[medianIndex]; // Partition the list according to the splitter, i.e. // Establish invariant: list[i] < splitter <= list[j] for // i=from..medianIndex and j=medianIndex+1 .. to int splitIndex = partition(list, from, to, splitter, comp); splitIndexes[medianIndex] = splitIndex; // Optimization: Handle special cases to cut down recursions. if (splitIndex < from) { // no element falls into this bin // all bins with splitters[i] <= splitter are empty int i = medianIndex - 1; while (i >= splitFrom && (!(comp.compare(splitter, splitters[i]) < 0))) splitIndexes[i--] = splitIndex; splitFrom = medianIndex + 1; } else if (splitIndex >= to) { // all elements fall into this bin // all bins with splitters[i] >= splitter are empty int i = medianIndex + 1; while (i <= splitTo && (!(comp.compare(splitter, splitters[i]) > 0))) splitIndexes[i++] = splitIndex; splitTo = medianIndex - 1; } // recursively partition left half if (splitFrom <= medianIndex - 1) { partition(list, from, splitIndex, splitters, splitFrom, medianIndex - 1, splitIndexes, comp); } // recursively partition right half if (medianIndex + 1 <= splitTo) { partition(list, splitIndex + 1, to, splitters, medianIndex + 1, splitTo, splitIndexes, comp); } } /** * Same as {@link #partition(int[],int,int,int)} except that it * synchronously partitions the objects of the given list by the * order of the given comparator. */ public static int partition(Object[] list, int from, int to, Object splitter, java.util.Comparator comp) { Object element; // int, double --> template type dependent for (int i = from - 1; ++i <= to;) { element = list[i]; if (comp.compare(element, splitter) < 0) { // swap x[i] with x[from] list[i] = list[from]; list[from] = element; from++; } } return from - 1; } /** * Equivalent to * partition(list.elements(), from, to, splitters.elements(), 0, splitters.size()-1, splitIndexes.elements()) * . */ public static void partition(DoubleArrayList list, int from, int to, DoubleArrayList splitters, IntArrayList splitIndexes) { partition(list.elements(), from, to, splitters.elements(), 0, splitters.size() - 1, splitIndexes.elements()); } /** * Equivalent to * partition(list.elements(), from, to, splitters.elements(), 0, splitters.size()-1, splitIndexes.elements()) * . */ public static void partition(IntArrayList list, int from, int to, IntArrayList splitters, IntArrayList splitIndexes) { partition(list.elements(), from, to, splitters.elements(), 0, splitters.size() - 1, splitIndexes.elements()); } /** * Same as * {@link #triplePartition(int[],int[],int[],int,int,int[],int,int,int[])} * except that it synchronously partitions double[] rather * than int[] arrays. */ public static void triplePartition(double[] list, double[] secondary, double[] tertiary, int from, int to, double[] splitters, int splitFrom, int splitTo, int[] splitIndexes) { double splitter; // int, double --> template type dependent if (splitFrom > splitTo) return; // nothing to do if (from > to) { // all bins are empty from--; for (int i = splitFrom; i <= splitTo;) splitIndexes[i++] = from; return; } // Choose a partition (pivot) index, m // Ideally, the pivot should be the median, because a median splits a // list into two equal sized sublists. // However, computing the median is expensive, so we use an // approximation. int medianIndex; if (splitFrom == splitTo) { // we don't really have a choice medianIndex = splitFrom; } else { // we do have a choice int m = (from + to) / 2; // Small arrays, middle element int len = to - from + 1; if (len > SMALL) { int l = from; int n = to; if (len > MEDIUM) { // Big arrays, pseudomedian of 9 int s = len / 8; l = med3(list, l, l + s, l + 2 * s); m = med3(list, m - s, m, m + s); n = med3(list, n - 2 * s, n - s, n); } m = med3(list, l, m, n); // Mid-size, pseudomedian of 3 } // Find the splitter closest to the pivot, i.e. the splitter that // best splits the list into two equal sized sublists. medianIndex = Sorting.binarySearchFromTo(splitters, list[m], splitFrom, splitTo); if (medianIndex < 0) medianIndex = -medianIndex - 1; // not found if (medianIndex > splitTo) medianIndex = splitTo; // not found, one past the end } splitter = splitters[medianIndex]; // Partition the list according to the splitter, i.e. // Establish invariant: list[i] < splitter <= list[j] for // i=from..medianIndex and j=medianIndex+1 .. to int splitIndex = triplePartition(list, secondary, tertiary, from, to, splitter); splitIndexes[medianIndex] = splitIndex; // Optimization: Handle special cases to cut down recursions. if (splitIndex < from) { // no element falls into this bin // all bins with splitters[i] <= splitter are empty int i = medianIndex - 1; while (i >= splitFrom && (!(splitter < splitters[i]))) splitIndexes[i--] = splitIndex; splitFrom = medianIndex + 1; } else if (splitIndex >= to) { // all elements fall into this bin // all bins with splitters[i] >= splitter are empty int i = medianIndex + 1; while (i <= splitTo && (!(splitter > splitters[i]))) splitIndexes[i++] = splitIndex; splitTo = medianIndex - 1; } // recursively partition left half if (splitFrom <= medianIndex - 1) { triplePartition(list, secondary, tertiary, from, splitIndex, splitters, splitFrom, medianIndex - 1, splitIndexes); } // recursively partition right half if (medianIndex + 1 <= splitTo) { triplePartition(list, secondary, tertiary, splitIndex + 1, to, splitters, medianIndex + 1, splitTo, splitIndexes); } } /** * Same as {@link #triplePartition(int[],int[],int[],int,int,int)} except * that it synchronously partitions double[] rather than * int[] arrays. */ public static int triplePartition(double[] list, double[] secondary, double[] tertiary, int from, int to, double splitter) { double element; // int, double --> template type dependent for (int i = from - 1; ++i <= to;) { element = list[i]; if (element < splitter) { // swap x[i] with x[from] list[i] = list[from]; list[from] = element; element = secondary[i]; secondary[i] = secondary[from]; secondary[from] = element; element = tertiary[i]; tertiary[i] = tertiary[from]; tertiary[from++] = element; } } return from - 1; } /** * Same as {@link #partition(int[],int,int,int[],int,int,int[])} except that * this method synchronously partitions three arrays at the same * time; all three arrays are partially sorted according to the elements of * the primary array. In other words, each time an element in the primary * array is moved from index A to B, the correspoding element within the * secondary array as well as the corresponding element within the tertiary * array are also moved from index A to B. *

* Use cases: *

* Image having a large list of 3-dimensional points. If memory consumption * and performance matter, it is a good idea to physically lay them out as * three 1-dimensional arrays (using something like Point3D objects * would be prohibitively expensive, both in terms of time and space). Now * imagine wanting to histogram the points. We may want to partially sort * the points by x-coordinate into intervals. This method efficiently does * the job. *

* Performance: *

* Same as for single-partition methods. */ public static void triplePartition(int[] list, int[] secondary, int[] tertiary, int from, int to, int[] splitters, int splitFrom, int splitTo, int[] splitIndexes) { int splitter; // int, double --> template type dependent if (splitFrom > splitTo) return; // nothing to do if (from > to) { // all bins are empty from--; for (int i = splitFrom; i <= splitTo;) splitIndexes[i++] = from; return; } // Choose a partition (pivot) index, m // Ideally, the pivot should be the median, because a median splits a // list into two equal sized sublists. // However, computing the median is expensive, so we use an // approximation. int medianIndex; if (splitFrom == splitTo) { // we don't really have a choice medianIndex = splitFrom; } else { // we do have a choice int m = (from + to) / 2; // Small arrays, middle element int len = to - from + 1; if (len > SMALL) { int l = from; int n = to; if (len > MEDIUM) { // Big arrays, pseudomedian of 9 int s = len / 8; l = med3(list, l, l + s, l + 2 * s); m = med3(list, m - s, m, m + s); n = med3(list, n - 2 * s, n - s, n); } m = med3(list, l, m, n); // Mid-size, pseudomedian of 3 } // Find the splitter closest to the pivot, i.e. the splitter that // best splits the list into two equal sized sublists. medianIndex = Sorting.binarySearchFromTo(splitters, list[m], splitFrom, splitTo); if (medianIndex < 0) medianIndex = -medianIndex - 1; // not found if (medianIndex > splitTo) medianIndex = splitTo; // not found, one past the end } splitter = splitters[medianIndex]; // Partition the list according to the splitter, i.e. // Establish invariant: list[i] < splitter <= list[j] for // i=from..medianIndex and j=medianIndex+1 .. to int splitIndex = triplePartition(list, secondary, tertiary, from, to, splitter); splitIndexes[medianIndex] = splitIndex; // Optimization: Handle special cases to cut down recursions. if (splitIndex < from) { // no element falls into this bin // all bins with splitters[i] <= splitter are empty int i = medianIndex - 1; while (i >= splitFrom && (!(splitter < splitters[i]))) splitIndexes[i--] = splitIndex; splitFrom = medianIndex + 1; } else if (splitIndex >= to) { // all elements fall into this bin // all bins with splitters[i] >= splitter are empty int i = medianIndex + 1; while (i <= splitTo && (!(splitter > splitters[i]))) splitIndexes[i++] = splitIndex; splitTo = medianIndex - 1; } // recursively partition left half if (splitFrom <= medianIndex - 1) { triplePartition(list, secondary, tertiary, from, splitIndex, splitters, splitFrom, medianIndex - 1, splitIndexes); } // recursively partition right half if (medianIndex + 1 <= splitTo) { triplePartition(list, secondary, tertiary, splitIndex + 1, to, splitters, medianIndex + 1, splitTo, splitIndexes); } } /** * Same as {@link #partition(int[],int,int,int)} except that this method * synchronously partitions three arrays at the same time; all three * arrays are partially sorted according to the elements of the primary * array. In other words, each time an element in the primary array is moved * from index A to B, the correspoding element within the secondary array as * well as the corresponding element within the tertiary array are also * moved from index A to B. *

* Performance: *

* Same as for single-partition methods. */ public static int triplePartition(int[] list, int[] secondary, int[] tertiary, int from, int to, int splitter) { int element; // int, double --> template type dependent for (int i = from - 1; ++i <= to;) { element = list[i]; if (element < splitter) { // swap x[i] with x[from] list[i] = list[from]; list[from] = element; element = secondary[i]; secondary[i] = secondary[from]; secondary[from] = element; element = tertiary[i]; tertiary[i] = tertiary[from]; tertiary[from++] = element; } } return from - 1; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy