All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.browseengine.bobo.facets.impl.DefaultFacetCountCollector Maven / Gradle / Ivy
/**
* This software is licensed to you under the Apache License, Version 2.0 (the
* "Apache License").
*
* LinkedIn's contributions are made under the Apache License. If you contribute
* to the Software, the contributions will be deemed to have been made under the
* Apache License, unless you expressly indicate otherwise. Please do not make any
* contributions that would be inconsistent with the Apache License.
*
* You may obtain a copy of the Apache License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, this software
* distributed under the Apache License is distributed on an "AS IS" BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the Apache
* License for the specific language governing permissions and limitations for the
* software governed under the Apache License.
*
* © 2012 LinkedIn Corp. All Rights Reserved.
*/
package com.browseengine.bobo.facets.impl;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import javax.management.MBeanServer;
import javax.management.ObjectName;
import javax.management.StandardMBean;
import org.apache.log4j.Logger;
import com.browseengine.bobo.api.BrowseFacet;
import com.browseengine.bobo.api.BrowseSelection;
import com.browseengine.bobo.api.ComparatorFactory;
import com.browseengine.bobo.api.FacetIterator;
import com.browseengine.bobo.api.FacetSpec;
import com.browseengine.bobo.api.FacetSpec.FacetSortSpec;
import com.browseengine.bobo.api.FieldValueAccessor;
import com.browseengine.bobo.facets.FacetCountCollector;
import com.browseengine.bobo.facets.data.FacetDataCache;
import com.browseengine.bobo.facets.data.TermDoubleList;
import com.browseengine.bobo.facets.data.TermFloatList;
import com.browseengine.bobo.facets.data.TermIntList;
import com.browseengine.bobo.facets.data.TermLongList;
import com.browseengine.bobo.facets.data.TermShortList;
import com.browseengine.bobo.facets.data.TermValueList;
import com.browseengine.bobo.jmx.JMXUtil;
import com.browseengine.bobo.util.BigSegmentedArray;
import com.browseengine.bobo.util.IntBoundedPriorityQueue;
import com.browseengine.bobo.util.IntBoundedPriorityQueue.IntComparator;
import com.browseengine.bobo.util.LazyBigIntArray;
import com.browseengine.bobo.util.MemoryManager;
import com.browseengine.bobo.util.MemoryManagerAdminMBean;
public abstract class DefaultFacetCountCollector implements FacetCountCollector
{
private static final Logger log = Logger.getLogger(DefaultFacetCountCollector.class.getName());
protected final FacetSpec _ospec;
public BigSegmentedArray _count;
public int _countlength;
protected FacetDataCache _dataCache;
private final String _name;
protected final BrowseSelection _sel;
protected final BigSegmentedArray _array;
private int _docBase;
protected final LinkedList intarraylist = new LinkedList();
private Iterator _iterator;
private boolean _closed = false;
protected static MemoryManager intarraymgr = new MemoryManager(new MemoryManager.Initializer()
{
public void init(BigSegmentedArray buf)
{
buf.fill(0);
}
public BigSegmentedArray newInstance(int size)
{
return new LazyBigIntArray(size);
}
public int size(BigSegmentedArray buf)
{
assert buf != null;
return buf.size();
}
});
static{
try{
// register memory manager mbean
MBeanServer mbeanServer = java.lang.management.ManagementFactory.getPlatformMBeanServer();
ObjectName mbeanName = new ObjectName(JMXUtil.JMX_DOMAIN,"name","DefaultFacetCountCollector-MemoryManager");
StandardMBean mbean = new StandardMBean(intarraymgr.getAdminMBean(), MemoryManagerAdminMBean.class);
mbeanServer.registerMBean(mbean, mbeanName);
}
catch(Exception e){
log.error(e.getMessage(),e);
}
}
public DefaultFacetCountCollector(String name,FacetDataCache dataCache,int docBase,
BrowseSelection sel,FacetSpec ospec)
{
_sel = sel;
_ospec = ospec;
_name = name;
_dataCache=dataCache;
_countlength = _dataCache.freqs.length;
if (_dataCache.freqs.length <= 3096)
{
_count = new LazyBigIntArray(_countlength);
} else
{
_count = intarraymgr.get(_countlength);
intarraylist.add(_count);
}
_array = _dataCache.orderArray;
_docBase = docBase;
}
public String getName()
{
return _name;
}
abstract public void collect(int docid);
abstract public void collectAll();
public BrowseFacet getFacet(String value)
{
if (_closed)
{
throw new IllegalStateException("This instance of count collector for " + _name + " was already closed");
}
BrowseFacet facet = null;
int index=_dataCache.valArray.indexOf(value);
if (index >=0 ){
facet = new BrowseFacet(_dataCache.valArray.get(index),_count.get(index));
}
else{
facet = new BrowseFacet(_dataCache.valArray.format(value),0);
}
return facet;
}
public int getFacetHitsCount(Object value)
{
if (_closed)
{
throw new IllegalStateException("This instance of count collector for " + _name + " was already closed");
}
int index=_dataCache.valArray.indexOf(value);
if (index >= 0)
{
return _count.get(index);
}
else{
return 0;
}
}
public BigSegmentedArray getCountDistribution()
{
return _count;
}
public FacetDataCache getFacetDataCache(){
return _dataCache;
}
public static List getFacets(FacetSpec ospec, BigSegmentedArray count, int countlength, final TermValueList> valList){
if (ospec!=null)
{
int minCount=ospec.getMinHitCount();
int max=ospec.getMaxCount();
if (max <= 0) max=countlength;
List facetColl;
FacetSortSpec sortspec = ospec.getOrderBy();
if (sortspec == FacetSortSpec.OrderValueAsc)
{
facetColl=new ArrayList(max);
for (int i = 1; i < countlength;++i) // exclude zero
{
int hits=count.get(i);
if (hits>=minCount)
{
BrowseFacet facet=new BrowseFacet(valList.get(i),hits);
facetColl.add(facet);
}
if (facetColl.size()>=max) break;
}
}
else //if (sortspec == FacetSortSpec.OrderHitsDesc)
{
ComparatorFactory comparatorFactory;
if (sortspec == FacetSortSpec.OrderHitsDesc){
comparatorFactory = new FacetHitcountComparatorFactory();
}
else{
comparatorFactory = ospec.getCustomComparatorFactory();
}
if (comparatorFactory == null){
throw new IllegalArgumentException("facet comparator factory not specified");
}
final IntComparator comparator = comparatorFactory.newComparator(new FieldValueAccessor(){
public String getFormatedValue(int index) {
return valList.get(index);
}
public Object getRawValue(int index) {
return valList.getRawValue(index);
}
}, count);
facetColl=new LinkedList();
final int forbidden = -1;
IntBoundedPriorityQueue pq=new IntBoundedPriorityQueue(comparator,max, forbidden);
for (int i=1;i=minCount)
{
pq.offer(i);
}
}
int val;
while((val = pq.pollInt()) != forbidden)
{
BrowseFacet facet=new BrowseFacet(valList.get(val),count.get(val));
((LinkedList)facetColl).addFirst(facet);
}
}
return facetColl;
}
else
{
return FacetCountCollector.EMPTY_FACET_LIST;
}
}
public List getFacets() {
if (_closed)
{
throw new IllegalStateException("This instance of count collector for " + _name + " was already closed");
}
return getFacets(_ospec,_count, _countlength, _dataCache.valArray);
}
@Override
public void close()
{
if (_closed)
{
log.warn("This instance of count collector for '" + _name + "' was already closed. This operation is no-op.");
return;
}
_closed = true;
while(!intarraylist.isEmpty())
{
intarraymgr.release(intarraylist.poll());
}
}
/**
* This function returns an Iterator to visit the facets in value order
* @return The Iterator to iterate over the facets in value order
*/
public FacetIterator iterator()
{
if (_closed)
{
throw new IllegalStateException("This instance of count collector for '" + _name + "' was already closed");
}
if (_dataCache.valArray.getType().equals(Integer.class))
{
return new DefaultIntFacetIterator((TermIntList) _dataCache.valArray, _count, _countlength, false);
} else if (_dataCache.valArray.getType().equals(Long.class))
{
return new DefaultLongFacetIterator((TermLongList) _dataCache.valArray, _count, _countlength, false);
} else if (_dataCache.valArray.getType().equals(Short.class))
{
return new DefaultShortFacetIterator((TermShortList) _dataCache.valArray, _count, _countlength, false);
} else if (_dataCache.valArray.getType().equals(Float.class))
{
return new DefaultFloatFacetIterator((TermFloatList) _dataCache.valArray, _count, _countlength, false);
} else if (_dataCache.valArray.getType().equals(Double.class))
{
return new DefaultDoubleFacetIterator((TermDoubleList) _dataCache.valArray, _count, _countlength, false);
} else
return new DefaultFacetIterator(_dataCache.valArray, _count, _countlength, false);
}
}