All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.browseengine.bobo.facets.impl.RangeFacetCountCollector Maven / Gradle / Ivy

Go to download

Bobo is a Faceted Search implementation written purely in Java, an extension of Apache Lucene

The newest version!
package com.browseengine.bobo.facets.impl;

import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;

import com.browseengine.bobo.api.BrowseFacet;
import com.browseengine.bobo.api.ComparatorFactory;
import com.browseengine.bobo.api.FacetIterator;
import com.browseengine.bobo.api.FacetSpec;
import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
import com.browseengine.bobo.api.FieldValueAccessor;
import com.browseengine.bobo.facets.FacetCountCollector;
import com.browseengine.bobo.facets.data.FacetDataCache;
import com.browseengine.bobo.facets.data.TermStringList;
import com.browseengine.bobo.facets.filter.FacetRangeFilter;
import com.browseengine.bobo.util.BigIntArray;
import com.browseengine.bobo.util.BigSegmentedArray;
import com.browseengine.bobo.util.IntBoundedPriorityQueue;
import com.browseengine.bobo.util.IntBoundedPriorityQueue.IntComparator;
import com.browseengine.bobo.util.LazyBigIntArray;

public class RangeFacetCountCollector implements FacetCountCollector {
  private final FacetSpec _ospec;
  protected BigSegmentedArray _count;
  private int _countlength;
  private final BigSegmentedArray _array;
  protected FacetDataCache _dataCache;
  private final String _name;
  private final TermStringList _predefinedRanges;
  private int[][] _predefinedRangeIndexes;

  public RangeFacetCountCollector(String name, FacetDataCache dataCache, int docBase,
      FacetSpec ospec, List predefinedRanges) {
    _name = name;
    _dataCache = dataCache;
    _countlength = _dataCache.freqs.length;
    _count = new LazyBigIntArray(_countlength);
    _array = _dataCache.orderArray;
    _ospec = ospec;
    if (predefinedRanges != null) {
      _predefinedRanges = new TermStringList();
      Collections.sort(predefinedRanges);
      _predefinedRanges.addAll(predefinedRanges);
    } else {
      _predefinedRanges = null;
    }

    if (_predefinedRanges != null) {
      _predefinedRangeIndexes = new int[_predefinedRanges.size()][];
      int i = 0;
      for (String range : _predefinedRanges) {
        _predefinedRangeIndexes[i++] = FacetRangeFilter.parse(_dataCache, range);
      }
    }
  }

  /**
   * gets distribution of the value arrays. When predefined ranges are available, this returns distribution by predefined ranges.
   */
  @Override
  public BigSegmentedArray getCountDistribution() {
    BigSegmentedArray dist;
    if (_predefinedRangeIndexes != null) {
      dist = new LazyBigIntArray(_predefinedRangeIndexes.length);
      int n = 0;
      for (int[] range : _predefinedRangeIndexes) {
        int start = range[0];
        int end = range[1];

        int sum = 0;
        for (int i = start; i < end; ++i) {
          sum += _count.get(i);
        }
        dist.add(n++, sum);
      }
    } else {
      dist = _count;
    }

    return dist;
  }

  @Override
  public String getName() {
    return _name;
  }

  @Override
  public BrowseFacet getFacet(String value) {
    BrowseFacet facet = null;
    int[] range = FacetRangeFilter.parse(_dataCache, value);
    if (range != null) {
      int sum = 0;
      for (int i = range[0]; i <= range[1]; ++i) {
        sum += _count.get(i);
      }
      facet = new BrowseFacet(value, sum);
    }
    return facet;
  }

  @Override
  public int getFacetHitsCount(Object value) {
    int[] range = FacetRangeFilter.parse(_dataCache, (String) value);
    int sum = 0;
    if (range != null) {
      for (int i = range[0]; i <= range[1]; ++i) {
        sum += _count.get(i);
      }
    }
    return sum;
  }

  @Override
  public void collect(int docid) {
    int i = _array.get(docid);
    _count.add(i, _count.get(i) + 1);
  }

  @Override
  public final void collectAll() {
    _count = BigIntArray.fromArray(_dataCache.freqs);
    _countlength = _dataCache.freqs.length;
  }

  void convertFacets(BrowseFacet[] facets) {
    int i = 0;
    for (BrowseFacet facet : facets) {
      int hit = facet.getFacetValueHitCount();
      String val = facet.getValue();
      RangeFacet rangeFacet = new RangeFacet();
      rangeFacet.setValues(val, val);
      rangeFacet.setFacetValueHitCount(hit);
      facets[i++] = rangeFacet;
    }
  }

  @Override
  public List getFacets() {
    if (_ospec != null) {
      if (_predefinedRangeIndexes != null) {
        int minCount = _ospec.getMinHitCount();
        // int maxNumOfFacets = _ospec.getMaxCount();
        // if (maxNumOfFacets <= 0 || maxNumOfFacets > _predefinedRangeIndexes.length)
        // maxNumOfFacets = _predefinedRangeIndexes.length;

        int[] rangeCount = new int[_predefinedRangeIndexes.length];

        for (int k = 0; k < _predefinedRangeIndexes.length; ++k) {
          int count = 0;
          int idx = _predefinedRangeIndexes[k][0];
          int end = _predefinedRangeIndexes[k][1];
          while (idx <= end) {
            count += _count.get(idx++);
          }
          rangeCount[k] = count;
        }

        List facetColl = new ArrayList(_predefinedRanges.size());
        for (int k = 0; k < _predefinedRangeIndexes.length; ++k) {
          if (rangeCount[k] >= minCount) {
            BrowseFacet choice = new BrowseFacet(_predefinedRanges.get(k), rangeCount[k]);
            facetColl.add(choice);
          }
          // if(facetColl.size() >= maxNumOfFacets) break;
        }
        return facetColl;
      } else {
        return FacetCountCollector.EMPTY_FACET_LIST;
      }
    } else {
      return FacetCountCollector.EMPTY_FACET_LIST;
    }
  }

  public List getFacetsNew() {
    if (_ospec != null) {
      if (_predefinedRangeIndexes != null) {
        int minCount = _ospec.getMinHitCount();
        int maxNumOfFacets = _ospec.getMaxCount();
        if (maxNumOfFacets <= 0 || maxNumOfFacets > _predefinedRangeIndexes.length) maxNumOfFacets = _predefinedRangeIndexes.length;

        BigSegmentedArray rangeCount = new LazyBigIntArray(_predefinedRangeIndexes.length);

        for (int k = 0; k < _predefinedRangeIndexes.length; ++k) {
          int count = 0;
          int idx = _predefinedRangeIndexes[k][0];
          int end = _predefinedRangeIndexes[k][1];
          while (idx <= end) {
            count += _count.get(idx++);
          }
          rangeCount.add(k, count);
        }

        List facetColl;
        FacetSortSpec sortspec = _ospec.getOrderBy();
        if (sortspec == FacetSortSpec.OrderValueAsc) {
          facetColl = new ArrayList(maxNumOfFacets);
          for (int k = 0; k < _predefinedRangeIndexes.length; ++k) {
            if (rangeCount.get(k) >= minCount) {
              BrowseFacet choice = new BrowseFacet(_predefinedRanges.get(k), rangeCount.get(k));
              facetColl.add(choice);
            }
            if (facetColl.size() >= maxNumOfFacets) break;
          }
        } else // if (sortspec == FacetSortSpec.OrderHitsDesc)
        {
          ComparatorFactory comparatorFactory;
          if (sortspec == FacetSortSpec.OrderHitsDesc) {
            comparatorFactory = new FacetHitcountComparatorFactory();
          } else {
            comparatorFactory = _ospec.getCustomComparatorFactory();
          }

          if (comparatorFactory == null) {
            throw new IllegalArgumentException("facet comparator factory not specified");
          }

          final IntComparator comparator = comparatorFactory.newComparator(
            new FieldValueAccessor() {
              @Override
              public String getFormatedValue(int index) {
                return _predefinedRanges.get(index);
              }

              @Override
              public Object getRawValue(int index) {
                return _predefinedRanges.getRawValue(index);
              }
            }, rangeCount);

          final int forbidden = -1;
          IntBoundedPriorityQueue pq = new IntBoundedPriorityQueue(comparator, maxNumOfFacets,
              forbidden);
          for (int i = 0; i < _predefinedRangeIndexes.length; ++i) {
            if (rangeCount.get(i) >= minCount) pq.offer(i);
          }

          int val;
          facetColl = new LinkedList();
          while ((val = pq.pollInt()) != forbidden) {
            BrowseFacet facet = new BrowseFacet(_predefinedRanges.get(val), rangeCount.get(val));
            ((LinkedList) facetColl).addFirst(facet);
          }
        }
        return facetColl;
      } else {
        return FacetCountCollector.EMPTY_FACET_LIST;
      }
    } else {
      return FacetCountCollector.EMPTY_FACET_LIST;
    }
  }

  private static class RangeFacet extends BrowseFacet {
    /**
     *
     */
    private static final long serialVersionUID = 1L;

    String _lower;
    String _upper;

    RangeFacet() {
    }

    void setValues(String lower, String upper) {
      _lower = lower;
      _upper = upper;
      setValue(new StringBuilder("[").append(_lower).append(" TO ").append(_upper).append(']')
          .toString());
    }
  }

  @Override
  public void close() {
    // TODO Auto-generated method stub
  }

  @Override
  public FacetIterator iterator() {
    if (_predefinedRanges != null) {
      BigSegmentedArray rangeCounts = new LazyBigIntArray(_predefinedRangeIndexes.length);
      for (int k = 0; k < _predefinedRangeIndexes.length; ++k) {
        int count = 0;
        int idx = _predefinedRangeIndexes[k][0];
        int end = _predefinedRangeIndexes[k][1];
        while (idx <= end) {
          count += _count.get(idx++);
        }
        rangeCounts.add(k, rangeCounts.get(k) + count);
      }
      return new DefaultFacetIterator(_predefinedRanges, rangeCounts, rangeCounts.size(), true);
    }
    return null;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy