All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.browseengine.bobo.facets.impl.RangeFacetCountCollector Maven / Gradle / Ivy

There is a newer version: 3.1.2
Show newest version
/**
 * This software is licensed to you under the Apache License, Version 2.0 (the
 * "Apache License").
 *
 * LinkedIn's contributions are made under the Apache License. If you contribute
 * to the Software, the contributions will be deemed to have been made under the
 * Apache License, unless you expressly indicate otherwise. Please do not make any
 * contributions that would be inconsistent with the Apache License.
 *
 * You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, this software
 * distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache
 * License for the specific language governing permissions and limitations for the
 * software governed under the Apache License.
 *
 * © 2012 LinkedIn Corp. All Rights Reserved.  
 */

package com.browseengine.bobo.facets.impl;

import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;

import com.browseengine.bobo.api.BrowseFacet;
import com.browseengine.bobo.api.ComparatorFactory;
import com.browseengine.bobo.api.FacetIterator;
import com.browseengine.bobo.api.FacetSpec;
import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
import com.browseengine.bobo.api.FieldValueAccessor;
import com.browseengine.bobo.facets.FacetCountCollector;
import com.browseengine.bobo.facets.data.FacetDataCache;
import com.browseengine.bobo.facets.data.TermStringList;
import com.browseengine.bobo.facets.filter.FacetRangeFilter;
import com.browseengine.bobo.util.BigIntArray;
import com.browseengine.bobo.util.BigSegmentedArray;
import com.browseengine.bobo.util.IntBoundedPriorityQueue;
import com.browseengine.bobo.util.IntBoundedPriorityQueue.IntComparator;
import com.browseengine.bobo.util.LazyBigIntArray;

public class RangeFacetCountCollector implements FacetCountCollector
{
  private final FacetSpec _ospec;
  protected BigSegmentedArray _count;
  private int _countlength;
  private final BigSegmentedArray _array;
  protected FacetDataCache _dataCache;
  private final String _name;
  private final TermStringList _predefinedRanges;
  private int[][] _predefinedRangeIndexes;
  private int _docBase;
  
  public RangeFacetCountCollector(String name,FacetDataCache dataCache,int docBase,FacetSpec ospec,List predefinedRanges)
  {
    _name = name;
    _dataCache = dataCache;
    _countlength = _dataCache.freqs.length;
    _count= new LazyBigIntArray(_countlength);
    _array = _dataCache.orderArray;
    _docBase = docBase;
    _ospec=ospec;
    if(predefinedRanges != null) {
      _predefinedRanges = new TermStringList();
      Collections.sort(predefinedRanges);
      _predefinedRanges.addAll(predefinedRanges);
    }else {
    	  _predefinedRanges = null;
      }
      
      if (_predefinedRanges!=null)
      {
          _predefinedRangeIndexes = new int[_predefinedRanges.size()][];
          int i=0;
          for (String range : _predefinedRanges)
          {
              _predefinedRangeIndexes[i++]=FacetRangeFilter.parse(_dataCache,range);
          }
      }
  }
  
  /**
   * gets distribution of the value arrays. When predefined ranges are available, this returns distribution by predefined ranges.
   */
  public BigSegmentedArray getCountDistribution()
  {
    BigSegmentedArray dist;
    if (_predefinedRangeIndexes!=null)
    {
      dist = new LazyBigIntArray(_predefinedRangeIndexes.length);
      int n=0;
      for (int[] range : _predefinedRangeIndexes)
      {
        int start = range[0];
        int end = range[1];
        
        int sum = 0;
        for (int i=start;i list = new ArrayList();

      for (int i=0;i getFacets() {
    if (_ospec!=null){
      if (_predefinedRangeIndexes!=null)
      {
        int minCount=_ospec.getMinHitCount();
        //int maxNumOfFacets = _ospec.getMaxCount();
        //if (maxNumOfFacets <= 0 || maxNumOfFacets > _predefinedRangeIndexes.length) maxNumOfFacets = _predefinedRangeIndexes.length;
        
        int[] rangeCount = new int[_predefinedRangeIndexes.length];
       
        for (int k=0;k<_predefinedRangeIndexes.length;++k)
        {
          int count = 0;
          int idx = _predefinedRangeIndexes[k][0];
          int end = _predefinedRangeIndexes[k][1];
          while(idx <= end)
          {
            count += _count.get(idx++);
          }
          rangeCount[k] = count;
        }
        
        List facetColl = new ArrayList(_predefinedRanges.size());
          for (int k=0;k<_predefinedRangeIndexes.length;++k)
          {
            if(rangeCount[k] >= minCount)
            {
              BrowseFacet choice=new BrowseFacet(_predefinedRanges.get(k), rangeCount[k]);
              facetColl.add(choice);
            }
            //if(facetColl.size() >= maxNumOfFacets) break;
          }
        return facetColl;
      }
      else
      {
        return FacetCountCollector.EMPTY_FACET_LIST;
      }
    }
    else
    {
      return FacetCountCollector.EMPTY_FACET_LIST;
    }
  }
  
  public List getFacetsNew() {
	  if (_ospec!=null){
		  if (_predefinedRangeIndexes!=null)
		  {
			  int minCount=_ospec.getMinHitCount();
			  int maxNumOfFacets = _ospec.getMaxCount();
	      if (maxNumOfFacets <= 0 || maxNumOfFacets > _predefinedRangeIndexes.length) maxNumOfFacets = _predefinedRangeIndexes.length;
	      
	      BigSegmentedArray rangeCount = new LazyBigIntArray(_predefinedRangeIndexes.length);
	     
	      for (int k=0;k<_predefinedRangeIndexes.length;++k)
        {
          int count = 0;
          int idx = _predefinedRangeIndexes[k][0];
          int end = _predefinedRangeIndexes[k][1];
          while(idx <= end)
          {
            count += _count.get(idx++);
          }
          rangeCount.add(k, count);
        }
	      
	      List facetColl;
	      FacetSortSpec sortspec = _ospec.getOrderBy();
	      if (sortspec == FacetSortSpec.OrderValueAsc)
        {
	        facetColl = new ArrayList(maxNumOfFacets);
	        for (int k=0;k<_predefinedRangeIndexes.length;++k)
	        {
	          if(rangeCount.get(k) >= minCount)
	          {
	            BrowseFacet choice=new BrowseFacet(_predefinedRanges.get(k), rangeCount.get(k));
	            facetColl.add(choice);
	          }
	          if(facetColl.size() >= maxNumOfFacets) break;
	        }
        }
	      else //if (sortspec == FacetSortSpec.OrderHitsDesc)
	      {
	        ComparatorFactory comparatorFactory;
	        if (sortspec == FacetSortSpec.OrderHitsDesc)
	        {
	          comparatorFactory = new FacetHitcountComparatorFactory();
	        }
	        else
	        {
	          comparatorFactory = _ospec.getCustomComparatorFactory();
	        }

	        if (comparatorFactory == null){
	          throw new IllegalArgumentException("facet comparator factory not specified");
	        }

	        final IntComparator comparator = comparatorFactory.newComparator(new FieldValueAccessor(){
	            public String getFormatedValue(int index)
	            {
	              return _predefinedRanges.get(index);
	            }

	            public Object getRawValue(int index) {
	              return _predefinedRanges.getRawValue(index);
	            }
  	        }, rangeCount);
	        
	        final int forbidden = -1;
	        IntBoundedPriorityQueue pq=new IntBoundedPriorityQueue(comparator, maxNumOfFacets, forbidden);
	        for (int i=0; i<_predefinedRangeIndexes.length; ++i)
	        {
	          if (rangeCount.get(i)>=minCount) 	pq.offer(i);
	        }

	        int val;
	        facetColl=new LinkedList();
	        while((val = pq.pollInt()) != forbidden)
	        {
	          BrowseFacet facet=new BrowseFacet(_predefinedRanges.get(val),rangeCount.get(val));
	          ((LinkedList)facetColl).addFirst(facet);
	        }
	      }
			  return facetColl;
		  }
		  else
		  {
			  return FacetCountCollector.EMPTY_FACET_LIST;
		  }
	  }
	  else
	  {
		  return FacetCountCollector.EMPTY_FACET_LIST;
	  }
  }
  
  private static class RangeFacet extends BrowseFacet{
    /**
     * 
     */
    private static final long serialVersionUID = 1L;
    
    String _lower;
    String _upper;
    
    RangeFacet(){           
    }
    
    void setValues(String lower, String upper) {
        _lower=lower;
        _upper=upper;
        setValue(new StringBuilder("[").append(_lower).append(" TO ").append(_upper).append(']').toString());
    }
  }

  public void close()
  {
    // TODO Auto-generated method stub
  }    

  public FacetIterator iterator() {
	  if(_predefinedRanges != null) {
		  BigSegmentedArray rangeCounts = new LazyBigIntArray(_predefinedRangeIndexes.length);
          for (int k=0;k<_predefinedRangeIndexes.length;++k)
          {
            int count = 0;
            int idx = _predefinedRangeIndexes[k][0];
            int end = _predefinedRangeIndexes[k][1];
            while(idx <= end)
            {
              count += _count.get(idx++);
            }
            rangeCounts.add(k, rangeCounts.get(k) + count);
          }
		  return new DefaultFacetIterator(_predefinedRanges, rangeCounts, rangeCounts.size(), true);
	  }
	  return null;
  }  
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy