smile.sort.IntHeapSelect Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of openchemlib Show documentation
Show all versions of openchemlib Show documentation
Open Source Chemistry Library
/*******************************************************************************
* Copyright (c) 2010 Haifeng Li
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
package smile.sort;
/**
* This class tracks the smallest values seen thus far in a stream of values.
* This implements a single-pass selection for large data sets. That is,
* we have a stream of input values, each of which we get to see only once. We
* want to be able to report at any time, say after n values, the i-th smallest
* value see so far.
*
* @author Haifeng Li
*/
public class IntHeapSelect {
/**
* The heap size.
*/
private int k;
/**
* The number of objects that have been added into heap.
*/
private int n;
/**
* True if the heap is fully sorted.
*/
private boolean sorted;
/**
* The heap array.
*/
private int[] heap;
/**
* Constructor.
* @param k the heap size.
*/
public IntHeapSelect(int k) {
this(new int[k]);
}
/**
* Constructor.
* @param heap the array to store smallest values to track.
*/
public IntHeapSelect(int[] heap) {
this.heap = heap;
k = heap.length;
n = 0;
sorted = false;
}
/**
* Assimilate a new value from the stream.
*/
public void add(int datum) {
sorted = false;
if (n < k) {
heap[n++] = datum;
if (n == k) {
heapify(heap);
}
} else {
n++;
if (datum < heap[0]) {
heap[0] = datum;
SortUtils.siftDown(heap, 0, k-1);
}
}
}
/**
* Returns the k-th smallest value seen so far.
*/
public int peek() {
return heap[0];
}
/**
* Returns the i-th smallest value seen so far. i = 0 returns the smallest
* value seen, i = 1 the second largest, ..., i = k-1 the last position
* tracked. Also, i must be less than the number of previous assimilated.
*/
public int get(int i) {
if (i > Math.min(k, n) - 1) {
throw new IllegalArgumentException("HeapSelect i is greater than the number of data received so far.");
}
if (i == k-1) {
return heap[0];
}
if (!sorted) {
sort(heap, Math.min(k,n));
sorted = true;
}
return heap[k-1-i];
}
/**
* Sort the smallest values.
*/
public void sort() {
if (!sorted) {
sort(heap, Math.min(k,n));
sorted = true;
}
}
/**
* Place the array in max-heap order. Note that the array is not fully sorted.
*/
private static void heapify(int[] arr) {
int n = arr.length;
for (int i = n / 2 - 1; i >= 0; i--)
SortUtils.siftDown(arr, i, n - 1);
}
/**
* Sorts the specified array into descending order. It is based on Shell
* sort, which is very efficient because the array is almost sorted by
* heapifying.
*/
private static void sort(int[] a, int n) {
int inc = 1;
do {
inc *= 3;
inc++;
} while (inc <= n);
do {
inc /= 3;
for (int i = inc; i < n; i++) {
int v = a[i];
int j = i;
while (a[j - inc] < v) {
a[j] = a[j - inc];
j -= inc;
if (j < inc) {
break;
}
}
a[j] = v;
}
} while (inc > 1);
}
}