smile.sort.HeapSelect Maven / Gradle / Ivy
/******************************************************************************
* Confidential Proprietary *
* (c) Copyright Haifeng Li 2011, All Rights Reserved *
******************************************************************************/
package smile.sort;
/**
* This class tracks the smallest values seen thus far in a stream of values.
* This implements a single-pass selection for large data sets. That is,
* we have a stream of input values, each of which we get to see only once. We
* want to be able to report at any time, say after n values, the i-th smallest
* value see so far.
*
* @author Haifeng Li
*/
public class HeapSelect> {
/**
* The heap size.
*/
private int k;
/**
* The number of objects that have been added into heap.
*/
private int n;
/**
* True if the heap is fully sorted.
*/
private boolean sorted;
/**
* The heap array.
*/
private T[] heap;
/**
* Constructor.
* @param heap the array to store smallest values to track.
*/
public HeapSelect(T[] heap) {
this.heap = heap;
k = heap.length;
n = 0;
sorted = false;
}
/**
* Assimilate a new value from the stream.
*/
public void add(T datum) {
sorted = false;
if (n < k) {
heap[n++] = datum;
if (n == k) {
heapify(heap);
}
} else {
n++;
if (datum.compareTo(heap[0]) < 0) {
heap[0] = datum;
SortUtils.siftDown(heap, 0, k-1);
}
}
}
/**
* In case of avoiding creating new objects frequently, one may check and
* update the peek object directly and call this method to sort the internal
* array in heap order.
*/
public void heapify() {
if (n < k) {
throw new IllegalStateException();
}
SortUtils.siftDown(heap, 0, k-1);
}
/**
* Returns the k-th smallest value seen so far.
*/
public T peek() {
return heap[0];
}
/**
* Returns the i-th smallest value seen so far. i = 0 returns the smallest
* value seen, i = 1 the second largest, ..., i = k-1 the last position
* tracked. Also, i must be less than the number of previous assimilated.
*/
public T get(int i) {
if (i > Math.min(k, n) - 1) {
throw new IllegalArgumentException("HeapSelect i is greater than the number of data received so far.");
}
if (i == k-1) {
return heap[0];
}
if (!sorted) {
sort(heap, Math.min(k,n));
sorted = true;
}
return heap[k-1-i];
}
/**
* Sort the smallest values.
*/
public void sort() {
if (!sorted) {
sort(heap, Math.min(k,n));
sorted = true;
}
}
/**
* Place the array in max-heap order. Note that the array is not fully sorted.
*/
private static > void heapify(T[] arr) {
int n = arr.length;
for (int i = n / 2 - 1; i >= 0; i--)
SortUtils.siftDown(arr, i, n - 1);
}
/**
* Sorts the specified array into descending order. It is based on Shell
* sort, which is very efficient because the array is almost sorted by
* heapifying.
*/
private static > void sort(T[] a, int n) {
int inc = 1;
do {
inc *= 3;
inc++;
} while (inc <= n);
do {
inc /= 3;
for (int i = inc; i < n; i++) {
T v = a[i];
int j = i;
while (a[j - inc].compareTo(v) < 0) {
a[j] = a[j - inc];
j -= inc;
if (j < inc) {
break;
}
}
a[j] = v;
}
} while (inc > 1);
}
}