All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.browseengine.bobo.facets.impl.SimpleGroupbyFacetHandler Maven / Gradle / Ivy

Go to download

Bobo is a Faceted Search implementation written purely in Java, an extension of Apache Lucene

The newest version!
package com.browseengine.bobo.facets.impl;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Properties;

import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.search.ScoreDoc;

import com.browseengine.bobo.api.BoboSegmentReader;
import com.browseengine.bobo.api.BrowseFacet;
import com.browseengine.bobo.api.BrowseSelection;
import com.browseengine.bobo.api.ComparatorFactory;
import com.browseengine.bobo.api.FacetIterator;
import com.browseengine.bobo.api.FacetSpec;
import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
import com.browseengine.bobo.api.FieldValueAccessor;
import com.browseengine.bobo.facets.FacetCountCollector;
import com.browseengine.bobo.facets.FacetCountCollectorSource;
import com.browseengine.bobo.facets.FacetHandler;
import com.browseengine.bobo.facets.FacetHandler.FacetDataNone;
import com.browseengine.bobo.facets.filter.RandomAccessAndFilter;
import com.browseengine.bobo.facets.filter.RandomAccessFilter;
import com.browseengine.bobo.sort.DocComparator;
import com.browseengine.bobo.sort.DocComparatorSource;
import com.browseengine.bobo.util.BigSegmentedArray;
import com.browseengine.bobo.util.IntBoundedPriorityQueue;
import com.browseengine.bobo.util.IntBoundedPriorityQueue.IntComparator;
import com.browseengine.bobo.util.LazyBigIntArray;

public class SimpleGroupbyFacetHandler extends FacetHandler {
  private final LinkedHashSet _fieldsSet;
  private ArrayList _facetHandlers;
  private Map _facetHandlerMap;

  private static final String SEP = ",";
  private final String _sep;

  public SimpleGroupbyFacetHandler(String name, LinkedHashSet dependsOn, String separator) {
    super(name, dependsOn);
    _fieldsSet = dependsOn;
    _facetHandlers = null;
    _facetHandlerMap = null;
    _sep = separator;
  }

  public SimpleGroupbyFacetHandler(String name, LinkedHashSet dependsOn) {
    this(name, dependsOn, SEP);
  }

  @Override
  public RandomAccessFilter buildRandomAccessFilter(String value, Properties selectionProperty)
      throws IOException {
    List filterList = new ArrayList();
    String[] vals = value.split(_sep);
    for (int i = 0; i < vals.length; ++i) {
      SimpleFacetHandler handler = _facetHandlers.get(i);
      BrowseSelection sel = new BrowseSelection(handler.getName());
      sel.addValue(vals[i]);
      filterList.add(handler.buildFilter(sel));
    }
    return new RandomAccessAndFilter(filterList);
  }

  @Override
  public FacetCountCollectorSource getFacetCountCollectorSource(final BrowseSelection sel,
      final FacetSpec fspec) {
    return new FacetCountCollectorSource() {

      @Override
      public FacetCountCollector getFacetCountCollector(BoboSegmentReader reader, int docBase) {
        ArrayList collectorList = new ArrayList(
            _facetHandlers.size());
        for (SimpleFacetHandler facetHandler : _facetHandlers) {
          collectorList.add((DefaultFacetCountCollector) (facetHandler
              .getFacetCountCollectorSource(sel, fspec).getFacetCountCollector(reader, docBase)));
        }
        return new GroupbyFacetCountCollector(_name, fspec,
            collectorList.toArray(new DefaultFacetCountCollector[collectorList.size()]),
            reader.maxDoc(), _sep);
      }

    };
  }

  @Override
  public String[] getFieldValues(BoboSegmentReader reader, int id) {
    ArrayList valList = new ArrayList();
    for (FacetHandler handler : _facetHandlers) {
      StringBuffer buf = new StringBuffer();
      boolean firsttime = true;
      String[] vals = handler.getFieldValues(reader, id);
      if (vals != null && vals.length > 0) {
        if (!firsttime) {
          buf.append(",");
        } else {
          firsttime = false;
        }
        for (String val : vals) {
          buf.append(val);
        }
      }
      valList.add(buf.toString());
    }
    return valList.toArray(new String[valList.size()]);
  }

  @Override
  public Object[] getRawFieldValues(BoboSegmentReader reader, int id) {
    return getFieldValues(reader, id);
  }

  @Override
  public DocComparatorSource getDocComparatorSource() {
    return new DocComparatorSource() {

      @Override
      public DocComparator getComparator(AtomicReader reader, int docbase) throws IOException {
        ArrayList comparatorList = new ArrayList(_fieldsSet.size());
        for (FacetHandler handler : _facetHandlers) {
          comparatorList.add(handler.getDocComparatorSource().getComparator(reader, docbase));
        }
        return new GroupbyDocComparator(comparatorList.toArray(new DocComparator[comparatorList
            .size()]));
      }

    };

  }

  @Override
  public FacetDataNone load(BoboSegmentReader reader) throws IOException {
    _facetHandlers = new ArrayList(_fieldsSet.size());
    _facetHandlerMap = new HashMap(_fieldsSet.size());
    for (String name : _fieldsSet) {
      FacetHandler handler = reader.getFacetHandler(name);
      if (handler == null || !(handler instanceof SimpleFacetHandler)) {
        throw new IllegalStateException("only simple facet handlers supported");
      }
      SimpleFacetHandler sfh = (SimpleFacetHandler) handler;
      _facetHandlers.add(sfh);
      _facetHandlerMap.put(name, sfh);
    }
    return FacetDataNone.instance;
  }

  private static class GroupbyDocComparator extends DocComparator {
    private final DocComparator[] _comparators;

    public GroupbyDocComparator(DocComparator[] comparators) {
      _comparators = comparators;
    }

    @Override
    public final int compare(ScoreDoc d1, ScoreDoc d2) {
      int retval = 0;
      for (DocComparator comparator : _comparators) {
        retval = comparator.compare(d1, d2);
        if (retval != 0) break;
      }
      return retval;
    }

    @SuppressWarnings("rawtypes")
    @Override
    public final Comparable value(final ScoreDoc doc) {
      return new Comparable() {

        @SuppressWarnings("unchecked")
        @Override
        public int compareTo(Object o) {
          int retval = 0;
          for (DocComparator comparator : _comparators) {
            retval = comparator.value(doc).compareTo(o);
            if (retval != 0) break;
          }
          return retval;
        }

      };
    }
  }

  private static class GroupbyFacetCountCollector implements FacetCountCollector {

    private final DefaultFacetCountCollector[] _subcollectors;
    private final String _name;
    private final FacetSpec _fspec;
    private final BigSegmentedArray _count;
    private final int _countlength;
    private final int[] _lens;
    private final int _maxdoc;
    private final String _sep;

    public GroupbyFacetCountCollector(String name, FacetSpec fspec,
        DefaultFacetCountCollector[] subcollectors, int maxdoc, String sep) {
      _name = name;
      _fspec = fspec;
      _subcollectors = subcollectors;
      _sep = sep;
      int totalLen = 1;
      _lens = new int[_subcollectors.length];
      for (int i = 0; i < _subcollectors.length; ++i) {
        _lens[i] = _subcollectors[i]._countlength;
        totalLen *= _lens[i];
      }
      _countlength = totalLen;
      _count = new LazyBigIntArray(_countlength);
      _maxdoc = maxdoc;
    }

    @Override
    final public void collect(int docid) {
      int idx = 0;
      int i = 0;
      int segsize = _countlength;
      for (DefaultFacetCountCollector subcollector : _subcollectors) {
        segsize = segsize / _lens[i++];
        idx += (subcollector._dataCache.orderArray.get(docid) * segsize);
      }
      _count.add(idx, _count.get(idx) + 1);
    }

    @Override
    public void collectAll() {
      for (int i = 0; i < _maxdoc; ++i) {
        collect(i);
      }
    }

    @Override
    public BigSegmentedArray getCountDistribution() {
      return _count;
    }

    @Override
    public String getName() {
      return _name;
    }

    @Override
    public BrowseFacet getFacet(String value) {
      String[] vals = value.split(_sep);
      if (vals.length == 0) return null;
      StringBuffer buf = new StringBuffer();
      int startIdx = 0;
      int segLen = _countlength;

      for (int i = 0; i < vals.length; ++i) {
        if (i > 0) {
          buf.append(_sep);
        }
        int index = _subcollectors[i]._dataCache.valArray.indexOf(vals[i]);
        String facetName = _subcollectors[i]._dataCache.valArray.get(index);
        buf.append(facetName);

        segLen /= _subcollectors[i]._countlength;
        startIdx += index * segLen;
      }

      int count = 0;
      for (int i = startIdx; i < startIdx + segLen; ++i) {
        count += _count.get(i);
      }

      BrowseFacet f = new BrowseFacet(buf.toString(), count);
      return f;
    }

    @Override
    public int getFacetHitsCount(Object value) {
      String[] vals = ((String) value).split(_sep);
      if (vals.length == 0) return 0;
      int startIdx = 0;
      int segLen = _countlength;

      for (int i = 0; i < vals.length; ++i) {
        int index = _subcollectors[i]._dataCache.valArray.indexOf(vals[i]);
        segLen /= _subcollectors[i]._countlength;
        startIdx += index * segLen;
      }

      int count = 0;
      for (int i = startIdx; i < startIdx + segLen; ++i)
        count += _count.get(i);

      return count;
    }

    private final String getFacetString(int idx) {
      StringBuffer buf = new StringBuffer();
      int i = 0;
      for (int len : _lens) {
        if (i > 0) {
          buf.append(_sep);
        }

        int adjusted = idx * len;

        int bucket = adjusted / _countlength;
        buf.append(_subcollectors[i]._dataCache.valArray.get(bucket));
        idx = adjusted % _countlength;
        i++;
      }
      return buf.toString();
    }

    private final Object[] getRawFaceValue(int idx) {
      Object[] retVal = new Object[_lens.length];
      int i = 0;
      for (int len : _lens) {
        int adjusted = idx * len;
        int bucket = adjusted / _countlength;
        retVal[i++] = _subcollectors[i]._dataCache.valArray.getRawValue(bucket);
        idx = adjusted % _countlength;
      }
      return retVal;
    }

    @Override
    public List getFacets() {
      if (_fspec != null) {
        int minCount = _fspec.getMinHitCount();
        int max = _fspec.getMaxCount();
        if (max <= 0) max = _countlength;

        FacetSortSpec sortspec = _fspec.getOrderBy();
        List facetColl;
        if (sortspec == FacetSortSpec.OrderValueAsc) {
          facetColl = new ArrayList(max);
          for (int i = 1; i < _countlength; ++i) // exclude zero
          {
            int hits = _count.get(i);
            if (hits >= minCount) {
              BrowseFacet facet = new BrowseFacet(getFacetString(i), hits);
              facetColl.add(facet);
            }
            if (facetColl.size() >= max) break;
          }
        } else {
          ComparatorFactory comparatorFactory;
          if (sortspec == FacetSortSpec.OrderHitsDesc) {
            comparatorFactory = new FacetHitcountComparatorFactory();
          } else {
            comparatorFactory = _fspec.getCustomComparatorFactory();
          }

          if (comparatorFactory == null) {
            throw new IllegalArgumentException("facet comparator factory not specified");
          }

          IntComparator comparator = comparatorFactory.newComparator(new FieldValueAccessor() {

            @Override
            public String getFormatedValue(int index) {
              return getFacetString(index);
            }

            @Override
            public Object getRawValue(int index) {
              return getRawFaceValue(index);
            }

          }, _count);
          facetColl = new LinkedList();
          final int forbidden = -1;
          IntBoundedPriorityQueue pq = new IntBoundedPriorityQueue(comparator, max, forbidden);

          for (int i = 1; i < _countlength; ++i) // exclude zero
          {
            int hits = _count.get(i);
            if (hits >= minCount) {
              if (!pq.offer(i)) {
                // pq is full. we can safely ignore any facet with <=hits.
                minCount = hits + 1;
              }
            }
          }

          int val;
          while ((val = pq.pollInt()) != forbidden) {
            BrowseFacet facet = new BrowseFacet(getFacetString(val), _count.get(val));
            ((LinkedList) facetColl).addFirst(facet);
          }
        }
        return facetColl;
      } else {
        return FacetCountCollector.EMPTY_FACET_LIST;
      }
    }

    @Override
    public void close() {
      // TODO Auto-generated method stub
    }

    @Override
    public FacetIterator iterator() {
      return new GroupByFacetIterator();
    }

    public class GroupByFacetIterator extends FacetIterator {

      private int _index;

      public GroupByFacetIterator() {
        _index = 0;
        facet = null;
        count = 0;
      }

      /*
       * (non-Javadoc)
       * @see com.browseengine.bobo.api.FacetIterator#next()
       */
      @Override
      public Comparable next() {
        if ((_index >= 0) && !hasNext()) throw new NoSuchElementException(
            "No more facets in this iteration");
        _index++;
        facet = getFacetString(_index);
        count = _count.get(_index);
        return facet;
      }

      /*
       * (non-Javadoc)
       * @see java.util.Iterator#hasNext()
       */
      @Override
      public boolean hasNext() {
        return (_index < (_countlength - 1));
      }

      /*
       * (non-Javadoc)
       * @see java.util.Iterator#remove()
       */
      @Override
      public void remove() {
        throw new UnsupportedOperationException("remove() method not supported for Facet Iterators");
      }

      /*
       * (non-Javadoc)
       * @see com.browseengine.bobo.api.FacetIterator#next(int)
       */
      @Override
      public Comparable next(int minHits) {
        if ((_index >= 0) && !hasNext()) {
          count = 0;
          facet = null;
          return null;
        }
        do {
          _index++;
        } while ((_index < (_countlength - 1)) && (_count.get(_index) < minHits));
        if (_count.get(_index) >= minHits) {
          facet = getFacetString(_index);
          count = _count.get(_index);
        } else {
          count = 0;
          facet = null;
        }
        return facet;
      }

      /**
       * The string from here should be already formatted. No need to reformat.
       * @see com.browseengine.bobo.api.FacetIterator#format(java.lang.Object)
       */
      @Override
      public String format(Object val) {
        return (String) val;
      }
    }

  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy