com.browseengine.bobo.facets.impl.RangeFacetCountCollector Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of bobo-browse Show documentation
Show all versions of bobo-browse Show documentation
Bobo is a Faceted Search implementation written purely in Java, an extension of Apache Lucene
The newest version!
package com.browseengine.bobo.facets.impl;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import com.browseengine.bobo.api.BrowseFacet;
import com.browseengine.bobo.api.ComparatorFactory;
import com.browseengine.bobo.api.FacetIterator;
import com.browseengine.bobo.api.FacetSpec;
import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
import com.browseengine.bobo.api.FieldValueAccessor;
import com.browseengine.bobo.facets.FacetCountCollector;
import com.browseengine.bobo.facets.data.FacetDataCache;
import com.browseengine.bobo.facets.data.TermStringList;
import com.browseengine.bobo.facets.filter.FacetRangeFilter;
import com.browseengine.bobo.util.BigIntArray;
import com.browseengine.bobo.util.BigSegmentedArray;
import com.browseengine.bobo.util.IntBoundedPriorityQueue;
import com.browseengine.bobo.util.IntBoundedPriorityQueue.IntComparator;
import com.browseengine.bobo.util.LazyBigIntArray;
public class RangeFacetCountCollector implements FacetCountCollector {
private final FacetSpec _ospec;
protected BigSegmentedArray _count;
private int _countlength;
private final BigSegmentedArray _array;
protected FacetDataCache> _dataCache;
private final String _name;
private final TermStringList _predefinedRanges;
private int[][] _predefinedRangeIndexes;
public RangeFacetCountCollector(String name, FacetDataCache> dataCache, int docBase,
FacetSpec ospec, List predefinedRanges) {
_name = name;
_dataCache = dataCache;
_countlength = _dataCache.freqs.length;
_count = new LazyBigIntArray(_countlength);
_array = _dataCache.orderArray;
_ospec = ospec;
if (predefinedRanges != null) {
_predefinedRanges = new TermStringList();
Collections.sort(predefinedRanges);
_predefinedRanges.addAll(predefinedRanges);
} else {
_predefinedRanges = null;
}
if (_predefinedRanges != null) {
_predefinedRangeIndexes = new int[_predefinedRanges.size()][];
int i = 0;
for (String range : _predefinedRanges) {
_predefinedRangeIndexes[i++] = FacetRangeFilter.parse(_dataCache, range);
}
}
}
/**
* gets distribution of the value arrays. When predefined ranges are available, this returns distribution by predefined ranges.
*/
@Override
public BigSegmentedArray getCountDistribution() {
BigSegmentedArray dist;
if (_predefinedRangeIndexes != null) {
dist = new LazyBigIntArray(_predefinedRangeIndexes.length);
int n = 0;
for (int[] range : _predefinedRangeIndexes) {
int start = range[0];
int end = range[1];
int sum = 0;
for (int i = start; i < end; ++i) {
sum += _count.get(i);
}
dist.add(n++, sum);
}
} else {
dist = _count;
}
return dist;
}
@Override
public String getName() {
return _name;
}
@Override
public BrowseFacet getFacet(String value) {
BrowseFacet facet = null;
int[] range = FacetRangeFilter.parse(_dataCache, value);
if (range != null) {
int sum = 0;
for (int i = range[0]; i <= range[1]; ++i) {
sum += _count.get(i);
}
facet = new BrowseFacet(value, sum);
}
return facet;
}
@Override
public int getFacetHitsCount(Object value) {
int[] range = FacetRangeFilter.parse(_dataCache, (String) value);
int sum = 0;
if (range != null) {
for (int i = range[0]; i <= range[1]; ++i) {
sum += _count.get(i);
}
}
return sum;
}
@Override
public void collect(int docid) {
int i = _array.get(docid);
_count.add(i, _count.get(i) + 1);
}
@Override
public final void collectAll() {
_count = BigIntArray.fromArray(_dataCache.freqs);
_countlength = _dataCache.freqs.length;
}
void convertFacets(BrowseFacet[] facets) {
int i = 0;
for (BrowseFacet facet : facets) {
int hit = facet.getFacetValueHitCount();
String val = facet.getValue();
RangeFacet rangeFacet = new RangeFacet();
rangeFacet.setValues(val, val);
rangeFacet.setFacetValueHitCount(hit);
facets[i++] = rangeFacet;
}
}
@Override
public List getFacets() {
if (_ospec != null) {
if (_predefinedRangeIndexes != null) {
int minCount = _ospec.getMinHitCount();
// int maxNumOfFacets = _ospec.getMaxCount();
// if (maxNumOfFacets <= 0 || maxNumOfFacets > _predefinedRangeIndexes.length)
// maxNumOfFacets = _predefinedRangeIndexes.length;
int[] rangeCount = new int[_predefinedRangeIndexes.length];
for (int k = 0; k < _predefinedRangeIndexes.length; ++k) {
int count = 0;
int idx = _predefinedRangeIndexes[k][0];
int end = _predefinedRangeIndexes[k][1];
while (idx <= end) {
count += _count.get(idx++);
}
rangeCount[k] = count;
}
List facetColl = new ArrayList(_predefinedRanges.size());
for (int k = 0; k < _predefinedRangeIndexes.length; ++k) {
if (rangeCount[k] >= minCount) {
BrowseFacet choice = new BrowseFacet(_predefinedRanges.get(k), rangeCount[k]);
facetColl.add(choice);
}
// if(facetColl.size() >= maxNumOfFacets) break;
}
return facetColl;
} else {
return FacetCountCollector.EMPTY_FACET_LIST;
}
} else {
return FacetCountCollector.EMPTY_FACET_LIST;
}
}
public List getFacetsNew() {
if (_ospec != null) {
if (_predefinedRangeIndexes != null) {
int minCount = _ospec.getMinHitCount();
int maxNumOfFacets = _ospec.getMaxCount();
if (maxNumOfFacets <= 0 || maxNumOfFacets > _predefinedRangeIndexes.length) maxNumOfFacets = _predefinedRangeIndexes.length;
BigSegmentedArray rangeCount = new LazyBigIntArray(_predefinedRangeIndexes.length);
for (int k = 0; k < _predefinedRangeIndexes.length; ++k) {
int count = 0;
int idx = _predefinedRangeIndexes[k][0];
int end = _predefinedRangeIndexes[k][1];
while (idx <= end) {
count += _count.get(idx++);
}
rangeCount.add(k, count);
}
List facetColl;
FacetSortSpec sortspec = _ospec.getOrderBy();
if (sortspec == FacetSortSpec.OrderValueAsc) {
facetColl = new ArrayList(maxNumOfFacets);
for (int k = 0; k < _predefinedRangeIndexes.length; ++k) {
if (rangeCount.get(k) >= minCount) {
BrowseFacet choice = new BrowseFacet(_predefinedRanges.get(k), rangeCount.get(k));
facetColl.add(choice);
}
if (facetColl.size() >= maxNumOfFacets) break;
}
} else // if (sortspec == FacetSortSpec.OrderHitsDesc)
{
ComparatorFactory comparatorFactory;
if (sortspec == FacetSortSpec.OrderHitsDesc) {
comparatorFactory = new FacetHitcountComparatorFactory();
} else {
comparatorFactory = _ospec.getCustomComparatorFactory();
}
if (comparatorFactory == null) {
throw new IllegalArgumentException("facet comparator factory not specified");
}
final IntComparator comparator = comparatorFactory.newComparator(
new FieldValueAccessor() {
@Override
public String getFormatedValue(int index) {
return _predefinedRanges.get(index);
}
@Override
public Object getRawValue(int index) {
return _predefinedRanges.getRawValue(index);
}
}, rangeCount);
final int forbidden = -1;
IntBoundedPriorityQueue pq = new IntBoundedPriorityQueue(comparator, maxNumOfFacets,
forbidden);
for (int i = 0; i < _predefinedRangeIndexes.length; ++i) {
if (rangeCount.get(i) >= minCount) pq.offer(i);
}
int val;
facetColl = new LinkedList();
while ((val = pq.pollInt()) != forbidden) {
BrowseFacet facet = new BrowseFacet(_predefinedRanges.get(val), rangeCount.get(val));
((LinkedList) facetColl).addFirst(facet);
}
}
return facetColl;
} else {
return FacetCountCollector.EMPTY_FACET_LIST;
}
} else {
return FacetCountCollector.EMPTY_FACET_LIST;
}
}
private static class RangeFacet extends BrowseFacet {
/**
*
*/
private static final long serialVersionUID = 1L;
String _lower;
String _upper;
RangeFacet() {
}
void setValues(String lower, String upper) {
_lower = lower;
_upper = upper;
setValue(new StringBuilder("[").append(_lower).append(" TO ").append(_upper).append(']')
.toString());
}
}
@Override
public void close() {
// TODO Auto-generated method stub
}
@Override
public FacetIterator iterator() {
if (_predefinedRanges != null) {
BigSegmentedArray rangeCounts = new LazyBigIntArray(_predefinedRangeIndexes.length);
for (int k = 0; k < _predefinedRangeIndexes.length; ++k) {
int count = 0;
int idx = _predefinedRangeIndexes[k][0];
int end = _predefinedRangeIndexes[k][1];
while (idx <= end) {
count += _count.get(idx++);
}
rangeCounts.add(k, rangeCounts.get(k) + count);
}
return new DefaultFacetIterator(_predefinedRanges, rangeCounts, rangeCounts.size(), true);
}
return null;
}
}