com.browseengine.bobo.util.BigNestedIntArray Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of bobo-browse Show documentation
Show all versions of bobo-browse Show documentation
Bobo is a Faceted Search implementation written purely in Java, an extension of Apache Lucene
The newest version!
/**
*
*/
package com.browseengine.bobo.util;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.util.OpenBitSet;
import com.browseengine.bobo.facets.data.TermValueList;
import com.browseengine.bobo.query.scoring.FacetTermScoringFunction;
public final class BigNestedIntArray {
public static final int MAX_ITEMS = 1024;
private static final int MAX_SLOTS = 1024;
private static final int SLOTID_MASK = 0x3FF;
private static final int PAGEID_SHIFT = 10;
private static final int COUNT_MASK = 0x7FF;
private static final int VALIDX_SHIFT = 11;
private static final int ROUNDING = 255;
private static final int MISSING = Integer.MIN_VALUE;
private static final int[] MISSING_PAGE;
static {
MISSING_PAGE = new int[MAX_SLOTS];
Arrays.fill(MISSING_PAGE, MISSING);
}
private int _maxItems = MAX_ITEMS;
private int[][] _list;
private int _size;
static final private String[] EMPTY = new String[0];
static public abstract class Loader {
private int[][] _list;
private int _curPageNo;
private int[] _curPage;
private int _curSlot;
private int _curData;
private int[][] _reuse;
private int[] _reuseIdx;
public int reuseUsage;
private static Comparator COMPARE_ARRAYSIZE = new Comparator() {
@Override
public int compare(int[] o1, int[] o2) {
if (o1 == null || o2 == null) {
if (o1 != null) return -1;
if (o2 != null) return 1;
return 0;
}
return (o1.length - o2.length);
}
};
private void reclaim(int[][] list) {
_reuse = null;
_reuseIdx = null;
reuseUsage = 0;
if (list != null && list.length > 0) {
Arrays.sort(list, COMPARE_ARRAYSIZE); // sort by size
for (int i = (list.length - 1); i >= 0; i--) {
if (list[i] != null) {
_reuse = list;
_reuseIdx = list[i]; // use the largest page for tracking
break;
}
}
if (_reuseIdx == null) return;
Arrays.fill(_reuseIdx, -1);
for (int i = 0; i < list.length; i++) {
if (list[i] == null) break;
int idx = (list[i]).length - 1;
if (idx >= 0 && _reuseIdx[idx] == -1) _reuseIdx[idx] = i;
}
}
}
private int[] alloc(int size) {
size += (ROUNDING - 1);
size -= (size % ROUNDING);
if (_reuseIdx != null && _reuseIdx.length >= size) {
int location = _reuseIdx[size - 1];
if (location >= 0 && location < _reuse.length) {
int[] page = _reuse[location];
if (page != null && page.length == size) {
// found a reusable page
_reuseIdx[size - 1]++;
_reuse[location] = null;
if (page == _reuseIdx) {
// find a replacement page for reuseIdx
for (int i = location; i >= 0; i--) {
if (_reuse[i] != null) {
_reuseIdx = _reuse[i];
System.arraycopy(page, 0, _reuseIdx, 0, _reuseIdx.length);
}
}
}
reuseUsage += size;
return page;
} else {
// no more page with this size
_reuseIdx[size - 1] = -1;
}
}
}
return new int[size];
}
/**
* initializes the loading context
* @param size
*/
final public void initialize(int size, int[][] oldList) {
reclaim(oldList);
_list = new int[(size + MAX_SLOTS - 1) / MAX_SLOTS][];
_curPageNo = 0;
_curSlot = 0;
_curData = MAX_SLOTS;
_curPage = new int[MAX_SLOTS * 2];
}
/**
* finishes loading
*/
final public int[][] finish() {
if (_list.length > _curPageNo) {
// save the last page
while (_curSlot < MAX_SLOTS) {
_curPage[_curSlot++] = MISSING;
}
_list[_curPageNo] = copyPageTo(alloc(_curData));
}
_reuse = null;
_reuseIdx = null;
return _list;
}
/**
* loads data
*/
public abstract void load() throws Exception;
/**
* reserves storage for the next int array data
* @param id
* @param size
*/
final protected void reserve(int id, int size) {
final int pageNo = (id >> PAGEID_SHIFT);
final int slotId = (id & SLOTID_MASK);
if (pageNo != _curPageNo) {
if (pageNo < _curPageNo) throw new IllegalArgumentException("id is out of order");
// save the current page
while (_curSlot < MAX_SLOTS) {
_curPage[_curSlot++] = MISSING;
}
_list[_curPageNo++] = copyPageTo(alloc(_curData));
_curSlot = 0;
_curData = MAX_SLOTS;
while (_curPageNo < pageNo) {
_list[_curPageNo++] = null;
}
} else {
if (_curPageNo == pageNo && _curSlot > slotId) throw new IllegalArgumentException(
"id is out of order");
}
while (_curSlot < slotId) {
_curPage[_curSlot++] = MISSING;
}
if (_curPage.length <= _curData + size) {
// double the size of the variable part at least
_curPage = copyPageTo(new int[_curPage.length
+ Math.max((_curPage.length - MAX_SLOTS), size)]);
}
}
/**
* stores int array data. must call reserve(int,int) first to allocate storage
* @param data
* @param off
* @param len
*/
final protected void store(int[] data, int off, int len) {
if (len == 0) {
_curPage[_curSlot] = MISSING;
} else if (len == 1 && data[off] >= 0) {
_curPage[_curSlot] = data[off];
} else {
_curPage[_curSlot] = ((-_curData) << VALIDX_SHIFT | len);
System.arraycopy(data, off, _curPage, _curData, len);
_curData += len;
}
_curSlot++;
}
protected void add(int id, int[] data, int off, int len) {
reserve(id, len);
store(data, off, len);
}
/**
* allocates storage for future calls of setData.
* @param id
* @param len
*/
protected void allocate(int id, int len, boolean nonNegativeIntOnly) {
reserve(id, len);
if (len == 0) {
_curPage[_curSlot] = MISSING;
} else if (len == 1 && nonNegativeIntOnly) {
_curPage[_curSlot] = 0;
} else {
_curPage[_curSlot] = ((-_curData) << VALIDX_SHIFT);
_curData += len;
}
_curSlot++;
}
protected int[] copyPageTo(int[] dst) {
System.arraycopy(_curPage, 0, dst, 0, _curData);
return dst;
}
}
/**
* Constructs BigNEstedIntArray
* @throws Exception
*/
public BigNestedIntArray() {
}
/**
* set maximum number of items per doc.
* @param maxItems
*/
public void setMaxItems(int maxItems) {
_maxItems = Math.min(maxItems, MAX_ITEMS);
}
/**
* get maximum number of items per doc.
* @return maxItems
*/
public int getMaxItems() {
return _maxItems;
}
/**
* loads data using the loader
* @param size
* @param loader
* @throws Exception
*/
public final void load(int size, Loader loader) throws Exception {
_size = size;
loader.initialize(size, _list);
if (size > 0) {
loader.load();
}
_list = loader.finish();
}
public int size() {
return _size;
}
/**
* gets an int data at [id][idx]
* @param id
* @param idx
* @param defaultValue
*/
public final int getData(int id, int idx, int defaultValue) {
final int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) return defaultValue;
int val = page[id & SLOTID_MASK];
if (val >= 0) {
return val;
} else if (val == MISSING) {
return defaultValue;
} else {
val >>= VALIDX_SHIFT; // signed shift, remember this is a negative number
return page[idx - val];
}
}
/**
* gets an int data at [id]
* @param id
* @param buf
* @return length
*/
public final int getData(int id, int[] buf) {
final int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) return 0;
int val = page[id & SLOTID_MASK];
if (val >= 0) {
buf[0] = val;
return 1;
} else if (val == MISSING) {
return 0;
} else {
final int num = (val & COUNT_MASK);
val >>= VALIDX_SHIFT; // signed shift, remember this is a negative number
System.arraycopy(page, (-val), buf, 0, num);
return num;
}
}
/**
* translates the int value using the val list
* @param id
* @param valarray
*/
public final String[] getTranslatedData(int id, TermValueList> valarray) {
final int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) {
return EMPTY;
} else {
int val = page[id & SLOTID_MASK];
if (val >= 0) {
return new String[] { valarray.get(val) };
} else if (val == MISSING) {
return EMPTY;
} else {
final int num = (val & COUNT_MASK);
val >>= VALIDX_SHIFT; // signed shift, remember this is a negative number
String[] ret = new String[num];
for (int i = 0; i < num; i++) {
ret[i] = valarray.get(page[i - val]);
}
return ret;
}
}
}
/**
* translates the int value using the val list
* @param id
* @param valarray
*/
public final Object[] getRawData(int id, TermValueList> valarray) {
final int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) {
return EMPTY;
} else {
int val = page[id & SLOTID_MASK];
if (val >= 0) {
return new Object[] { valarray.getRawValue(val) };
} else if (val == MISSING) {
return EMPTY;
} else {
final int num = (val & COUNT_MASK);
val >>= VALIDX_SHIFT; // signed shift, remember this is a negative number
Object[] ret = new Object[num];
for (int i = 0; i < num; i++) {
ret[i] = valarray.getRawValue(page[i - val]);
}
return ret;
}
}
}
public final float getScores(int id, int[] freqs, float[] boosts,
FacetTermScoringFunction function) {
function.clearScores();
final int[] page = _list[id >> PAGEID_SHIFT];
int val = page[id & SLOTID_MASK];
if (val >= 0) {
return function.score(freqs[val], boosts[val]);
} else {
final int num = (val & COUNT_MASK);
val >>= VALIDX_SHIFT; // signed shift, remember this is a negative number
int idx;
for (int i = 0; i < num; i++) {
idx = page[i - val];
function.scoreAndCollect(freqs[idx], boosts[idx]);
}
return function.getCurrentScore();
}
}
public final int compare(int i, int j) {
final int[] page1 = _list[i >> PAGEID_SHIFT];
final int[] page2 = _list[j >> PAGEID_SHIFT];
if (page1 == null) {
if (page2 == null) return 0;
else return -1;
} else {
if (page2 == null) return 1;
}
final int val1 = page1[i & SLOTID_MASK];
final int val2 = page2[j & SLOTID_MASK];
if (val1 >= 0 && val2 >= 0) return val1 - val2;
if (val1 >= 0) {
if (val2 == MISSING) return 1;
int idx = -(val2 >> VALIDX_SHIFT);// signed shift, remember this is a negative number
int val = val1 - page2[idx];
if (val == 0) {
return -1;
} else {
return val;
}
}
if (val2 >= 0) {
if (val1 == MISSING) return -1;
int idx = -(val1 >> VALIDX_SHIFT);// signed shift, remember this is a negative number
int val = page1[idx] - val2;
if (val == 0) {
return 1;
} else {
return val;
}
}
if (val1 == MISSING) {
if (val2 == MISSING) {
return 0;
} else return -1;
} else {
if (val2 == MISSING) {
return 1;
}
}
int idx1 = -(val1 >> VALIDX_SHIFT);// signed shift, remember this is a negative number
int len1 = (val1 & COUNT_MASK);
int idx2 = -(val2 >> VALIDX_SHIFT);// signed shift, remember this is a negative number
int len2 = (val2 & COUNT_MASK);
for (int k = 0; k < len1; ++k) {
if (k >= len2) {
return 1;
}
int compVal = page1[idx1 + k] - page2[idx2 + k];
if (compVal != 0) return compVal;
}
if (len1 == len2) return 0;
return -1;
}
public final boolean contains(int id, int value) {
return contains(id, value, false);
}
public final boolean contains(int id, int value, boolean withMissing) {
final int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) {
if (withMissing && value == 0) {
return true;
} else {
return false;
}
}
final int val = page[id & SLOTID_MASK];
if (val >= 0) {
return (val == value);
} else if (val != MISSING) {
int idx = -(val >> VALIDX_SHIFT);// signed shift, remember this is a negative number
int end = idx + (val & COUNT_MASK);
while (idx < end) {
if (page[idx++] == value) return true;
}
} else if (withMissing) {
return (value == 0);
}
return false;
}
/**
* @param id - documentID
* @param startValueId - inclusive
* @param endValueId - exclusive
*/
public final boolean containsValueInRange(int id, int startValueId, int endValueId) {
final int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) return false;
final int val = page[id & SLOTID_MASK];
if (val >= 0) {
return val >= startValueId && val < endValueId;
} else if (val != MISSING) {
int idx = -(val >> VALIDX_SHIFT);// signed shift, remember this is a negative number
int end = idx + (val & COUNT_MASK);
while (idx < end) {
if (page[idx] >= startValueId && page[idx] < endValueId) return true;
idx++;
}
}
return false;
}
public final boolean contains(int id, OpenBitSet values) {
final int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) return false;
final int val = page[id & SLOTID_MASK];
if (val >= 0) {
return (values.fastGet(val));
} else if (val != MISSING) {
int idx = -(val >> VALIDX_SHIFT);// signed shift, remember this is a negative number
int end = idx + (val & COUNT_MASK);
while (idx < end) {
if (values.fastGet(page[idx++])) return true;
}
}
return false;
}
public final int findValue(int value, int id, int maxID) {
return findValue(value, id, maxID, false);
}
public final int findValue(int value, int id, int maxID, boolean withMissing) {
int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) page = MISSING_PAGE;
while (true) {
final int val = page[id & SLOTID_MASK];
if (val >= 0) {
if (val == value) return id;
} else if (val != MISSING) {
int idx = -(val >> VALIDX_SHIFT);// signed shift, remember this is a negative number
int end = idx + (val & COUNT_MASK);
while (idx < end) {
if (page[idx++] == value) return id;
}
} else if (withMissing) {
if (0 == value) return id;
}
if (id >= maxID) break;
if (((++id) & SLOTID_MASK) == 0) {
page = _list[id >> PAGEID_SHIFT];
if (page == null) page = MISSING_PAGE;
}
}
return DocIdSetIterator.NO_MORE_DOCS;
}
public final int findValues(OpenBitSet values, int id, int maxID) {
return findValues(values, id, maxID, false);
}
public final int findValues(OpenBitSet values, int id, int maxID, boolean withMissing) {
int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) page = MISSING_PAGE;
while (true) {
final int val = page[id & SLOTID_MASK];
if (val >= 0) {
if (values.fastGet(val)) return id;
} else if (val != MISSING) {
int idx = -(val >> VALIDX_SHIFT);// signed shift, remember this is a negative number
int end = idx + (val & COUNT_MASK);
while (idx < end) {
if (values.fastGet(page[idx++])) return id;
}
} else if (withMissing) {
if (values.fastGet(0)) return id;
}
if (id >= maxID) break;
if ((++id & SLOTID_MASK) == 0) {
page = _list[id >> PAGEID_SHIFT];
if (page == null) page = MISSING_PAGE;
}
}
return DocIdSetIterator.NO_MORE_DOCS;
}
public final int findValuesInRange(int startIndex, int endIndex, int id, int maxID) {
int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) page = MISSING_PAGE;
while (true) {
int val = page[id & SLOTID_MASK];
if (val >= 0) {
if (val >= startIndex && val <= endIndex) return id;
} else if (val != MISSING) {
int idx = -(val >> VALIDX_SHIFT);// signed shift, remember this is a negative number
int end = idx + (val & COUNT_MASK);
while (idx < end) {
val = page[idx++];
if (val >= startIndex && val <= endIndex) return id;
}
}
if (id >= maxID) break;
if ((++id & SLOTID_MASK) == 0) {
page = _list[id >> PAGEID_SHIFT];
if (page == null) page = MISSING_PAGE;
}
}
return DocIdSetIterator.NO_MORE_DOCS;
}
public final int count(final int id, final int[] count) {
final int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) {
count[0]++;
return 0;
}
int val = page[id & SLOTID_MASK];
if (val >= 0) {
count[val]++;
return 1;
} else if (val != MISSING) {
int idx = -(val >> VALIDX_SHIFT); // signed shift, remember val is a negative number
int cnt = (val & COUNT_MASK);
int end = idx + cnt;
while (idx < end) {
count[page[idx++]]++;
}
return cnt;
}
count[0]++;
return 0;
}
public final void countNoReturn(final int id, final int[] count) {
final int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) {
count[0]++;
return;
}
int val = page[id & SLOTID_MASK];
if (val >= 0) {
count[val]++;
return;
} else if (val != MISSING) {
int idx = -(val >> VALIDX_SHIFT); // signed shift, remember val is a negative number
int cnt = (val & COUNT_MASK);
int end = idx + cnt;
while (idx < end) {
count[page[idx++]]++;
}
return;
}
count[0]++;
return;
}
public final void countNoReturn(final int id, final BigSegmentedArray count) {
final int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) {
count.add(0, count.get(0) + 1);
return;
}
int val = page[id & SLOTID_MASK];
if (val >= 0) {
count.add(val, count.get(val) + 1);
return;
} else if (val != MISSING) {
int idx = -(val >> VALIDX_SHIFT); // signed shift, remember val is a negative number
int cnt = (val & COUNT_MASK);
int end = idx + cnt;
while (idx < end) {
count.add(page[idx], count.get(page[idx]) + 1);
idx++;
}
return;
}
count.add(0, count.get(0) + 1);
return;
}
public final void countNoReturnWithFilter(final int id, final int[] count, OpenBitSet filter) {
final int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) {
count[0]++;
return;
}
int val = page[id & SLOTID_MASK];
if (val >= 0) {
if (filter.fastGet(val)) {
count[val]++;
}
return;
} else if (val != MISSING) {
int idx = -(val >> VALIDX_SHIFT); // signed shift, remember val is a negative number
int cnt = (val & COUNT_MASK);
int end = idx + cnt;
while (idx < end) {
int value = page[idx++];
if (filter.fastGet(value)) {
count[value]++;
}
}
return;
}
count[0]++;
return;
}
public final void countNoReturnWithFilter(final int id, final BigSegmentedArray count,
OpenBitSet filter) {
final int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) {
count.add(0, count.get(0) + 1);
return;
}
int val = page[id & SLOTID_MASK];
if (val >= 0) {
if (filter.fastGet(val)) {
count.add(val, count.get(val) + 1);
}
return;
} else if (val != MISSING) {
int idx = -(val >> VALIDX_SHIFT); // signed shift, remember val is a negative number
int cnt = (val & COUNT_MASK);
int end = idx + cnt;
while (idx < end) {
int value = page[idx++];
if (filter.fastGet(value)) {
count.add(value, count.get(value) + 1);
}
}
return;
}
count.add(0, count.get(0) + 1);
return;
}
/**
* returns the number data items for id
* @param id
*/
public final int getNumItems(int id) {
final int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) return 0;
int val = page[id & SLOTID_MASK];
if (val >= 0) return 1;
if (val == MISSING) return 0;
return (val & COUNT_MASK);
}
/**
* adds Data to id
*/
public final boolean addData(final int id, final int data) {
final int[] page = _list[id >> PAGEID_SHIFT];
if (page == null) return true;
final int slotId = (id & SLOTID_MASK);
int val = page[slotId];
if (val == MISSING) {
return true; // don't store
} else if (val >= 0) {
page[slotId] = data; // only one value
return true;
} else {
int num = (val & COUNT_MASK);
if (num >= _maxItems) return false;
val >>= VALIDX_SHIFT; // signed shift, remember this is a negative number
page[num - val] = data;
val = ((val << VALIDX_SHIFT) | (num + 1));
page[slotId] = val;
return true;
}
}
/**
* A loader that buffer all data in memory, then load them to BigNestedIntArray.
* Data does not need to be sorted prior to the operation.
* Note that this loader supports only non-negative integer data.
*/
public final static class BufferedLoader extends Loader {
private static int EOD = Integer.MIN_VALUE;
private static int SEGSIZE = 8;
private int _size;
private final BigIntArray _info;
private BigIntBuffer _buffer;
private int _maxItems;
public BufferedLoader(int size, int maxItems, BigIntBuffer buffer) {
_size = size;
_maxItems = Math.min(maxItems, BigNestedIntArray.MAX_ITEMS);
_info = new BigIntArray(size << 1); // pointer and count
_info.fill(EOD);
_buffer = buffer;
}
public BufferedLoader(int size) {
this(size, MAX_ITEMS, new BigIntBuffer());
}
/**
* resets loader. This also resets underlying BigIntBuffer.
*/
public void reset(int size, int maxItems, BigIntBuffer buffer) {
if (size >= capacity()) throw new IllegalArgumentException("unable to change size");
_size = size;
_maxItems = maxItems;
_info.fill(EOD);
_buffer = buffer;
}
/**
* adds a pair of id and value to the buffer
* @param id
* @param val
*/
public final boolean add(int id, int val) {
int ptr = _info.get(id << 1);
if (ptr == EOD) {
// 1st insert
_info.add(id << 1, val);
return true;
}
int cnt = _info.get((id << 1) + 1);
if (cnt == EOD) {
// 2nd insert
_info.add((id << 1) + 1, val);
return true;
}
if (ptr >= 0) {
// this id has two values stored in-line.
int firstVal = ptr;
int secondVal = cnt;
ptr = _buffer.alloc(SEGSIZE);
_buffer.set(ptr++, EOD);
_buffer.set(ptr++, firstVal);
_buffer.set(ptr++, secondVal);
_buffer.set(ptr++, val);
cnt = 3;
} else {
ptr = (-ptr);
if (cnt >= _maxItems) return false; // exceeded the limit
if ((ptr % SEGSIZE) == 0) {
int oldPtr = ptr;
ptr = _buffer.alloc(SEGSIZE);
_buffer.set(ptr++, (-oldPtr));
}
_buffer.set(ptr++, val);
cnt++;
}
_info.add(id << 1, (-ptr));
_info.add((id << 1) + 1, cnt);
return true;
}
private final int readToBuf(int id, int[] buf) {
int ptr = _info.get(id << 1);
int cnt = _info.get((id << 1) + 1);
int i;
if (ptr >= 0) {
// read in-line data
i = 0;
buf[i++] = ptr;
if (cnt >= 0) buf[i++] = cnt;
return i;
}
// read from segments
i = cnt;
while (ptr != EOD) {
ptr = (-ptr) - 1;
int val;
while ((val = _buffer.get(ptr--)) >= 0) {
buf[--i] = val;
}
ptr = val;
}
if (i > 0) {
throw new RuntimeException("error reading buffered data back");
}
return cnt;
}
@Override
public final void load() {
int[] buf = new int[MAX_ITEMS];
int size = _size;
for (int i = 0; i < size; i++) {
int count = readToBuf(i, buf);
if (count > 0) {
add(i, buf, 0, count);
}
}
}
public final int capacity() {
return _info.capacity() >> 1;
}
}
}