All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.browseengine.bobo.facets.impl.PathFacetCountCollector Maven / Gradle / Ivy

Go to download

Bobo is a Faceted Search implementation written purely in Java, an extension of Apache Lucene

The newest version!
package com.browseengine.bobo.facets.impl;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Properties;
import java.util.regex.Pattern;

import org.apache.log4j.Logger;

import com.browseengine.bobo.api.BrowseFacet;
import com.browseengine.bobo.api.BrowseSelection;
import com.browseengine.bobo.api.ComparatorFactory;
import com.browseengine.bobo.api.FacetIterator;
import com.browseengine.bobo.api.FacetSpec;
import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
import com.browseengine.bobo.facets.FacetCountCollector;
import com.browseengine.bobo.facets.data.FacetDataCache;
import com.browseengine.bobo.util.BigIntArray;
import com.browseengine.bobo.util.BigSegmentedArray;
import com.browseengine.bobo.util.BoundedPriorityQueue;
import com.browseengine.bobo.util.LazyBigIntArray;
import com.browseengine.bobo.util.ListMerger;

public class PathFacetCountCollector implements FacetCountCollector {
  private static final Logger log = Logger.getLogger(PathFacetCountCollector.class.getName());
  private final BrowseSelection _sel;
  protected BigSegmentedArray _count;
  private final String _name;
  private final String _sep;
  private final BigSegmentedArray _orderArray;
  protected final FacetDataCache _dataCache;
  private final ComparatorFactory _comparatorFactory;
  private final int _minHitCount;
  private int _maxCount;
  private String[] _stringData;
  private final char[] _sepArray;
  private int _patStart;
  private int _patEnd;

  PathFacetCountCollector(String name, String sep, BrowseSelection sel, FacetSpec ospec,
      FacetDataCache dataCache) {
    _sel = sel;
    _name = name;
    _dataCache = dataCache;
    _sep = sep;
    _sepArray = sep.toCharArray();
    _count = new LazyBigIntArray(_dataCache.freqs.length);
    log.info(name + ": " + _count.size());
    _orderArray = _dataCache.orderArray;
    _minHitCount = ospec.getMinHitCount();
    _maxCount = ospec.getMaxCount();
    if (_maxCount < 1) {
      _maxCount = _count.size();
    }
    FacetSortSpec sortOption = ospec.getOrderBy();
    switch (sortOption) {
    case OrderHitsDesc:
      _comparatorFactory = new FacetHitcountComparatorFactory();
      break;
    case OrderValueAsc:
      _comparatorFactory = null;
      break;
    case OrderByCustom:
      _comparatorFactory = ospec.getCustomComparatorFactory();
      break;
    default:
      throw new IllegalArgumentException("invalid sort option: " + sortOption);
    }
    Pattern.compile(_sep);
    _stringData = new String[10];
    _patStart = 0;
    _patEnd = 0;
  }

  @Override
  public BigSegmentedArray getCountDistribution() {
    return _count;
  }

  @Override
  public String getName() {
    return _name;
  }

  @Override
  public void collect(int docid) {
    int i = _orderArray.get(docid);
    _count.add(i, _count.get(i) + 1);
  }

  @Override
  public void collectAll() {
    _count = BigIntArray.fromArray(_dataCache.freqs);
  }

  @Override
  public BrowseFacet getFacet(String value) {
    return null;
  }

  @Override
  public int getFacetHitsCount(Object value) {
    return 0;
  }

  private void ensureCapacity(int minCapacity) {
    int oldCapacity = _stringData.length;
    if (minCapacity > oldCapacity) {
      Object oldData[] = _stringData;
      int newCapacity = (oldCapacity * 3) / 2 + 1;
      if (newCapacity < minCapacity) newCapacity = minCapacity;
      // minCapacity is usually close to size, so this is a win:
      _stringData = new String[newCapacity];
      System.arraycopy(oldData, 0, _stringData, Math.min(oldData.length, newCapacity), newCapacity);
    }
  }

  private int patListSize() {
    return (_patEnd - _patStart);
  }

  public boolean splitString(String input) {
    _patStart = 0;
    _patEnd = 0;
    char[] str = input.toCharArray();
    int index = 0;
    int sepindex = 0;
    int tokStart = -1;
    int tokEnd = 0;
    while (index < input.length()) {
      for (sepindex = 0; (sepindex < _sepArray.length)
          && (str[index + sepindex] == _sepArray[sepindex]); sepindex++)
        ;
      if (sepindex == _sepArray.length) {
        index += _sepArray.length;
        if (tokStart >= 0) {
          ensureCapacity(_patEnd + 1);
          tokEnd++;
          _stringData[_patEnd++] = input.substring(tokStart, tokEnd);
        }
        tokStart = -1;
      } else {
        if (tokStart < 0) {
          tokStart = index;
          tokEnd = index;
        } else {
          tokEnd++;
        }
        index++;
      }
    }

    if (_patEnd == 0) return false;

    if (tokStart >= 0) {
      ensureCapacity(_patEnd + 1);
      tokEnd++;
      _stringData[_patEnd++] = input.substring(tokStart, tokEnd);
    }

    // let gc do its job
    str = null;

    // Construct result
    while (_patEnd > 0 && _stringData[patListSize() - 1].equals("")) {
      _patEnd--;
    }
    return true;
  }

  private List getFacetsForPath(String selectedPath, int depth, boolean strict,
      int minCount, int maxCount) {
    LinkedList list = new LinkedList();

    BoundedPriorityQueue pq = null;
    if (_comparatorFactory != null) {
      final Comparator comparator = _comparatorFactory.newComparator();

      pq = new BoundedPriorityQueue(new Comparator() {

        @Override
        public int compare(BrowseFacet o1, BrowseFacet o2) {
          return -comparator.compare(o1, o2);
        }

      }, maxCount);
    }

    String[] startParts = null;
    int startDepth = 0;

    if (selectedPath != null && selectedPath.length() > 0) {
      startParts = selectedPath.split(_sep);
      startDepth = startParts.length;
      if (!selectedPath.endsWith(_sep)) {
        selectedPath += _sep;
      }
    }

    String currentPath = null;
    int currentCount = 0;

    int wantedDepth = startDepth + depth;

    int index = 0;
    if (selectedPath != null && selectedPath.length() > 0) {
      index = _dataCache.valArray.indexOf(selectedPath);
      if (index < 0) {
        index = -(index + 1);
      }
    }

    StringBuffer buf = new StringBuffer();
    for (int i = index; i < _count.size(); ++i) {
      if (_count.get(i) >= minCount) {
        String path = _dataCache.valArray.get(i);
        // if (path==null || path.equals(selectedPath)) continue;

        int subCount = _count.get(i);

        // do not use Java split string in a loop !
        // String[] pathParts=path.split(_sep);
        int pathDepth = 0;
        if (!splitString(path)) {
          pathDepth = 0;
        } else {
          pathDepth = patListSize();
        }

        int tmpdepth = 0;
        if ((startDepth == 0) || (startDepth > 0 && path.startsWith(selectedPath))) {
          buf.delete(0, buf.length());
          int minDepth = Math.min(wantedDepth, pathDepth);
          tmpdepth = 0;
          for (int k = _patStart; ((k < _patEnd) && (tmpdepth < minDepth)); ++k, tmpdepth++) {
            buf.append(_stringData[k]);
            if (!_stringData[k].endsWith(_sep)) {
              if (pathDepth != wantedDepth || k < (wantedDepth - 1)) buf.append(_sep);
            }
          }
          String wantedPath = buf.toString();
          if (currentPath == null) {
            currentPath = wantedPath;
            currentCount = subCount;
          } else if (wantedPath.equals(currentPath)) {
            if (!strict) {
              currentCount += subCount;
            }
          } else {
            boolean directNode = false;

            if (wantedPath.endsWith(_sep)) {
              if (currentPath.equals(wantedPath.substring(0, wantedPath.length() - 1))) {
                directNode = true;
              }
            }

            if (strict) {
              if (directNode) {
                currentCount += subCount;
              } else {
                BrowseFacet ch = new BrowseFacet(currentPath, currentCount);
                if (pq != null) {
                  pq.add(ch);
                } else {
                  if (list.size() < maxCount) {
                    list.add(ch);
                  }
                }
                currentPath = wantedPath;
                currentCount = subCount;
              }
            } else {
              if (!directNode) {
                BrowseFacet ch = new BrowseFacet(currentPath, currentCount);
                if (pq != null) {
                  pq.add(ch);
                } else {
                  if (list.size() < maxCount) {
                    list.add(ch);
                  }
                }
                currentPath = wantedPath;
                currentCount = subCount;
              } else {
                currentCount += subCount;
              }
            }
          }
        } else {
          break;
        }
      }
    }

    if (currentPath != null && currentCount > 0) {
      BrowseFacet ch = new BrowseFacet(currentPath, currentCount);
      if (pq != null) {
        pq.add(ch);
      } else {
        if (list.size() < maxCount) {
          list.add(ch);
        }
      }
    }

    if (pq != null) {
      BrowseFacet val;
      while ((val = pq.poll()) != null) {
        list.addFirst(val);
      }
    }

    return list;
  }

  @Override
  public List getFacets() {
    Properties props = _sel == null ? null : _sel.getSelectionProperties();
    int depth = PathFacetHandler.getDepth(props);
    boolean strict = PathFacetHandler.isStrict(props);

    String[] paths = _sel == null ? null : _sel.getValues();
    if (paths == null || paths.length == 0) {
      return getFacetsForPath(null, depth, strict, _minHitCount, _maxCount);
    }

    if (paths.length == 1) return getFacetsForPath(paths[0], depth, strict, _minHitCount, _maxCount);

    LinkedList finalList = new LinkedList();
    ArrayList> iterList = new ArrayList>(paths.length);
    for (String path : paths) {
      List subList = getFacetsForPath(path, depth, strict, _minHitCount, _maxCount);
      if (subList.size() > 0) {
        iterList.add(subList.iterator());
      }
    }
    @SuppressWarnings("unchecked")
    Iterator finalIter = ListMerger.mergeLists(iterList
        .toArray((Iterator[]) new Iterator[iterList.size()]),
      _comparatorFactory == null ? new FacetValueComparatorFactory().newComparator()
          : _comparatorFactory.newComparator());
    while (finalIter.hasNext()) {
      BrowseFacet f = finalIter.next();
      finalList.addFirst(f);
    }
    return finalList;
  }

  @Override
  public void close() {
    // TODO Auto-generated method stub
  }

  @Override
  public FacetIterator iterator() {
    Properties props = _sel == null ? null : _sel.getSelectionProperties();
    int depth = PathFacetHandler.getDepth(props);
    boolean strict = PathFacetHandler.isStrict(props);
    List finalList;

    String[] paths = _sel == null ? null : _sel.getValues();
    if (paths == null || paths.length == 0) {
      finalList = getFacetsForPath(null, depth, strict, Integer.MIN_VALUE, _count.size());
      return new PathFacetIterator(finalList);
    }

    if (paths.length == 1) {
      finalList = getFacetsForPath(paths[0], depth, strict, Integer.MIN_VALUE, _count.size());
      return new PathFacetIterator(finalList);
    }

    finalList = new LinkedList();
    ArrayList> iterList = new ArrayList>(paths.length);
    for (String path : paths) {
      List subList = getFacetsForPath(path, depth, strict, Integer.MIN_VALUE,
        _count.size());
      if (subList.size() > 0) {
        iterList.add(subList.iterator());
      }
    }

    @SuppressWarnings("unchecked")
    Iterator finalIter = ListMerger.mergeLists(iterList
        .toArray((Iterator[]) new Iterator[iterList.size()]),
      _comparatorFactory == null ? new FacetValueComparatorFactory().newComparator()
          : _comparatorFactory.newComparator());
    while (finalIter.hasNext()) {
      BrowseFacet f = finalIter.next();
      finalList.add(f);
    }
    return new PathFacetIterator(finalList);
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy