com.browseengine.bobo.facets.impl.SimpleGroupbyFacetHandler Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of bobo-browse Show documentation
Show all versions of bobo-browse Show documentation
Bobo is a Faceted Search implementation written purely in Java, an extension of Apache Lucene
The newest version!
package com.browseengine.bobo.facets.impl;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Properties;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.search.ScoreDoc;
import com.browseengine.bobo.api.BoboSegmentReader;
import com.browseengine.bobo.api.BrowseFacet;
import com.browseengine.bobo.api.BrowseSelection;
import com.browseengine.bobo.api.ComparatorFactory;
import com.browseengine.bobo.api.FacetIterator;
import com.browseengine.bobo.api.FacetSpec;
import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
import com.browseengine.bobo.api.FieldValueAccessor;
import com.browseengine.bobo.facets.FacetCountCollector;
import com.browseengine.bobo.facets.FacetCountCollectorSource;
import com.browseengine.bobo.facets.FacetHandler;
import com.browseengine.bobo.facets.FacetHandler.FacetDataNone;
import com.browseengine.bobo.facets.filter.RandomAccessAndFilter;
import com.browseengine.bobo.facets.filter.RandomAccessFilter;
import com.browseengine.bobo.sort.DocComparator;
import com.browseengine.bobo.sort.DocComparatorSource;
import com.browseengine.bobo.util.BigSegmentedArray;
import com.browseengine.bobo.util.IntBoundedPriorityQueue;
import com.browseengine.bobo.util.IntBoundedPriorityQueue.IntComparator;
import com.browseengine.bobo.util.LazyBigIntArray;
public class SimpleGroupbyFacetHandler extends FacetHandler {
private final LinkedHashSet _fieldsSet;
private ArrayList _facetHandlers;
private Map _facetHandlerMap;
private static final String SEP = ",";
private final String _sep;
public SimpleGroupbyFacetHandler(String name, LinkedHashSet dependsOn, String separator) {
super(name, dependsOn);
_fieldsSet = dependsOn;
_facetHandlers = null;
_facetHandlerMap = null;
_sep = separator;
}
public SimpleGroupbyFacetHandler(String name, LinkedHashSet dependsOn) {
this(name, dependsOn, SEP);
}
@Override
public RandomAccessFilter buildRandomAccessFilter(String value, Properties selectionProperty)
throws IOException {
List filterList = new ArrayList();
String[] vals = value.split(_sep);
for (int i = 0; i < vals.length; ++i) {
SimpleFacetHandler handler = _facetHandlers.get(i);
BrowseSelection sel = new BrowseSelection(handler.getName());
sel.addValue(vals[i]);
filterList.add(handler.buildFilter(sel));
}
return new RandomAccessAndFilter(filterList);
}
@Override
public FacetCountCollectorSource getFacetCountCollectorSource(final BrowseSelection sel,
final FacetSpec fspec) {
return new FacetCountCollectorSource() {
@Override
public FacetCountCollector getFacetCountCollector(BoboSegmentReader reader, int docBase) {
ArrayList collectorList = new ArrayList(
_facetHandlers.size());
for (SimpleFacetHandler facetHandler : _facetHandlers) {
collectorList.add((DefaultFacetCountCollector) (facetHandler
.getFacetCountCollectorSource(sel, fspec).getFacetCountCollector(reader, docBase)));
}
return new GroupbyFacetCountCollector(_name, fspec,
collectorList.toArray(new DefaultFacetCountCollector[collectorList.size()]),
reader.maxDoc(), _sep);
}
};
}
@Override
public String[] getFieldValues(BoboSegmentReader reader, int id) {
ArrayList valList = new ArrayList();
for (FacetHandler> handler : _facetHandlers) {
StringBuffer buf = new StringBuffer();
boolean firsttime = true;
String[] vals = handler.getFieldValues(reader, id);
if (vals != null && vals.length > 0) {
if (!firsttime) {
buf.append(",");
} else {
firsttime = false;
}
for (String val : vals) {
buf.append(val);
}
}
valList.add(buf.toString());
}
return valList.toArray(new String[valList.size()]);
}
@Override
public Object[] getRawFieldValues(BoboSegmentReader reader, int id) {
return getFieldValues(reader, id);
}
@Override
public DocComparatorSource getDocComparatorSource() {
return new DocComparatorSource() {
@Override
public DocComparator getComparator(AtomicReader reader, int docbase) throws IOException {
ArrayList comparatorList = new ArrayList(_fieldsSet.size());
for (FacetHandler> handler : _facetHandlers) {
comparatorList.add(handler.getDocComparatorSource().getComparator(reader, docbase));
}
return new GroupbyDocComparator(comparatorList.toArray(new DocComparator[comparatorList
.size()]));
}
};
}
@Override
public FacetDataNone load(BoboSegmentReader reader) throws IOException {
_facetHandlers = new ArrayList(_fieldsSet.size());
_facetHandlerMap = new HashMap(_fieldsSet.size());
for (String name : _fieldsSet) {
FacetHandler> handler = reader.getFacetHandler(name);
if (handler == null || !(handler instanceof SimpleFacetHandler)) {
throw new IllegalStateException("only simple facet handlers supported");
}
SimpleFacetHandler sfh = (SimpleFacetHandler) handler;
_facetHandlers.add(sfh);
_facetHandlerMap.put(name, sfh);
}
return FacetDataNone.instance;
}
private static class GroupbyDocComparator extends DocComparator {
private final DocComparator[] _comparators;
public GroupbyDocComparator(DocComparator[] comparators) {
_comparators = comparators;
}
@Override
public final int compare(ScoreDoc d1, ScoreDoc d2) {
int retval = 0;
for (DocComparator comparator : _comparators) {
retval = comparator.compare(d1, d2);
if (retval != 0) break;
}
return retval;
}
@SuppressWarnings("rawtypes")
@Override
public final Comparable> value(final ScoreDoc doc) {
return new Comparable() {
@SuppressWarnings("unchecked")
@Override
public int compareTo(Object o) {
int retval = 0;
for (DocComparator comparator : _comparators) {
retval = comparator.value(doc).compareTo(o);
if (retval != 0) break;
}
return retval;
}
};
}
}
private static class GroupbyFacetCountCollector implements FacetCountCollector {
private final DefaultFacetCountCollector[] _subcollectors;
private final String _name;
private final FacetSpec _fspec;
private final BigSegmentedArray _count;
private final int _countlength;
private final int[] _lens;
private final int _maxdoc;
private final String _sep;
public GroupbyFacetCountCollector(String name, FacetSpec fspec,
DefaultFacetCountCollector[] subcollectors, int maxdoc, String sep) {
_name = name;
_fspec = fspec;
_subcollectors = subcollectors;
_sep = sep;
int totalLen = 1;
_lens = new int[_subcollectors.length];
for (int i = 0; i < _subcollectors.length; ++i) {
_lens[i] = _subcollectors[i]._countlength;
totalLen *= _lens[i];
}
_countlength = totalLen;
_count = new LazyBigIntArray(_countlength);
_maxdoc = maxdoc;
}
@Override
final public void collect(int docid) {
int idx = 0;
int i = 0;
int segsize = _countlength;
for (DefaultFacetCountCollector subcollector : _subcollectors) {
segsize = segsize / _lens[i++];
idx += (subcollector._dataCache.orderArray.get(docid) * segsize);
}
_count.add(idx, _count.get(idx) + 1);
}
@Override
public void collectAll() {
for (int i = 0; i < _maxdoc; ++i) {
collect(i);
}
}
@Override
public BigSegmentedArray getCountDistribution() {
return _count;
}
@Override
public String getName() {
return _name;
}
@Override
public BrowseFacet getFacet(String value) {
String[] vals = value.split(_sep);
if (vals.length == 0) return null;
StringBuffer buf = new StringBuffer();
int startIdx = 0;
int segLen = _countlength;
for (int i = 0; i < vals.length; ++i) {
if (i > 0) {
buf.append(_sep);
}
int index = _subcollectors[i]._dataCache.valArray.indexOf(vals[i]);
String facetName = _subcollectors[i]._dataCache.valArray.get(index);
buf.append(facetName);
segLen /= _subcollectors[i]._countlength;
startIdx += index * segLen;
}
int count = 0;
for (int i = startIdx; i < startIdx + segLen; ++i) {
count += _count.get(i);
}
BrowseFacet f = new BrowseFacet(buf.toString(), count);
return f;
}
@Override
public int getFacetHitsCount(Object value) {
String[] vals = ((String) value).split(_sep);
if (vals.length == 0) return 0;
int startIdx = 0;
int segLen = _countlength;
for (int i = 0; i < vals.length; ++i) {
int index = _subcollectors[i]._dataCache.valArray.indexOf(vals[i]);
segLen /= _subcollectors[i]._countlength;
startIdx += index * segLen;
}
int count = 0;
for (int i = startIdx; i < startIdx + segLen; ++i)
count += _count.get(i);
return count;
}
private final String getFacetString(int idx) {
StringBuffer buf = new StringBuffer();
int i = 0;
for (int len : _lens) {
if (i > 0) {
buf.append(_sep);
}
int adjusted = idx * len;
int bucket = adjusted / _countlength;
buf.append(_subcollectors[i]._dataCache.valArray.get(bucket));
idx = adjusted % _countlength;
i++;
}
return buf.toString();
}
private final Object[] getRawFaceValue(int idx) {
Object[] retVal = new Object[_lens.length];
int i = 0;
for (int len : _lens) {
int adjusted = idx * len;
int bucket = adjusted / _countlength;
retVal[i++] = _subcollectors[i]._dataCache.valArray.getRawValue(bucket);
idx = adjusted % _countlength;
}
return retVal;
}
@Override
public List getFacets() {
if (_fspec != null) {
int minCount = _fspec.getMinHitCount();
int max = _fspec.getMaxCount();
if (max <= 0) max = _countlength;
FacetSortSpec sortspec = _fspec.getOrderBy();
List facetColl;
if (sortspec == FacetSortSpec.OrderValueAsc) {
facetColl = new ArrayList(max);
for (int i = 1; i < _countlength; ++i) // exclude zero
{
int hits = _count.get(i);
if (hits >= minCount) {
BrowseFacet facet = new BrowseFacet(getFacetString(i), hits);
facetColl.add(facet);
}
if (facetColl.size() >= max) break;
}
} else {
ComparatorFactory comparatorFactory;
if (sortspec == FacetSortSpec.OrderHitsDesc) {
comparatorFactory = new FacetHitcountComparatorFactory();
} else {
comparatorFactory = _fspec.getCustomComparatorFactory();
}
if (comparatorFactory == null) {
throw new IllegalArgumentException("facet comparator factory not specified");
}
IntComparator comparator = comparatorFactory.newComparator(new FieldValueAccessor() {
@Override
public String getFormatedValue(int index) {
return getFacetString(index);
}
@Override
public Object getRawValue(int index) {
return getRawFaceValue(index);
}
}, _count);
facetColl = new LinkedList();
final int forbidden = -1;
IntBoundedPriorityQueue pq = new IntBoundedPriorityQueue(comparator, max, forbidden);
for (int i = 1; i < _countlength; ++i) // exclude zero
{
int hits = _count.get(i);
if (hits >= minCount) {
if (!pq.offer(i)) {
// pq is full. we can safely ignore any facet with <=hits.
minCount = hits + 1;
}
}
}
int val;
while ((val = pq.pollInt()) != forbidden) {
BrowseFacet facet = new BrowseFacet(getFacetString(val), _count.get(val));
((LinkedList) facetColl).addFirst(facet);
}
}
return facetColl;
} else {
return FacetCountCollector.EMPTY_FACET_LIST;
}
}
@Override
public void close() {
// TODO Auto-generated method stub
}
@Override
public FacetIterator iterator() {
return new GroupByFacetIterator();
}
public class GroupByFacetIterator extends FacetIterator {
private int _index;
public GroupByFacetIterator() {
_index = 0;
facet = null;
count = 0;
}
/*
* (non-Javadoc)
* @see com.browseengine.bobo.api.FacetIterator#next()
*/
@Override
public Comparable> next() {
if ((_index >= 0) && !hasNext()) throw new NoSuchElementException(
"No more facets in this iteration");
_index++;
facet = getFacetString(_index);
count = _count.get(_index);
return facet;
}
/*
* (non-Javadoc)
* @see java.util.Iterator#hasNext()
*/
@Override
public boolean hasNext() {
return (_index < (_countlength - 1));
}
/*
* (non-Javadoc)
* @see java.util.Iterator#remove()
*/
@Override
public void remove() {
throw new UnsupportedOperationException("remove() method not supported for Facet Iterators");
}
/*
* (non-Javadoc)
* @see com.browseengine.bobo.api.FacetIterator#next(int)
*/
@Override
public Comparable> next(int minHits) {
if ((_index >= 0) && !hasNext()) {
count = 0;
facet = null;
return null;
}
do {
_index++;
} while ((_index < (_countlength - 1)) && (_count.get(_index) < minHits));
if (_count.get(_index) >= minHits) {
facet = getFacetString(_index);
count = _count.get(_index);
} else {
count = 0;
facet = null;
}
return facet;
}
/**
* The string from here should be already formatted. No need to reformat.
* @see com.browseengine.bobo.api.FacetIterator#format(java.lang.Object)
*/
@Override
public String format(Object val) {
return (String) val;
}
}
}
}