com.browseengine.bobo.facets.data.MultiValueWithWeightFacetDataCache Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of bobo-browse Show documentation
Show all versions of bobo-browse Show documentation
Bobo is a Faceted Search implementation written purely in Java, an extension of Apache Lucene
The newest version!
package com.browseengine.bobo.facets.data;
import it.unimi.dsi.fastutil.ints.IntArrayList;
import java.io.IOException;
import org.apache.lucene.index.AtomicReader;
import org.apache.lucene.index.DocsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.OpenBitSet;
import com.browseengine.bobo.api.BoboSegmentReader.WorkArea;
import com.browseengine.bobo.util.BigNestedIntArray;
import com.browseengine.bobo.util.BigNestedIntArray.BufferedLoader;
public class MultiValueWithWeightFacetDataCache extends MultiValueFacetDataCache {
private static final long serialVersionUID = 1L;
public final BigNestedIntArray _weightArray;
public MultiValueWithWeightFacetDataCache() {
super();
_weightArray = new BigNestedIntArray();
}
/**
* loads multi-value facet data. This method uses a workarea to prepare loading.
* @param fieldName
* @param reader
* @param listFactory
* @param workArea
* @throws IOException
*/
@Override
public void load(String fieldName, AtomicReader reader, TermListFactory listFactory,
WorkArea workArea) throws IOException {
String field = fieldName.intern();
int maxdoc = reader.maxDoc();
BufferedLoader loader = getBufferedLoader(maxdoc, workArea);
BufferedLoader weightLoader = getBufferedLoader(maxdoc, null);
@SuppressWarnings("unchecked")
TermValueList list = (listFactory == null ? (TermValueList) new TermStringList()
: listFactory.createTermList());
IntArrayList minIDList = new IntArrayList();
IntArrayList maxIDList = new IntArrayList();
IntArrayList freqList = new IntArrayList();
OpenBitSet bitset = new OpenBitSet(maxdoc + 1);
int negativeValueCount = getNegativeValueCount(reader, field);
int t = 1; // valid term id starts from 1
list.add(null);
minIDList.add(-1);
maxIDList.add(-1);
freqList.add(0);
_overflow = false;
String pre = null;
int df = 0;
int minID = -1;
int maxID = -1;
int docID = -1;
int valId = 0;
Terms terms = reader.terms(field);
if (terms != null) {
TermsEnum termsEnum = terms.iterator(null);
BytesRef text;
while ((text = termsEnum.next()) != null) {
String strText = text.utf8ToString();
String val = null;
int weight = 0;
String[] split = strText.split("\u0000");
if (split.length > 1) {
val = split[0];
weight = Integer.parseInt(split[split.length - 1]);
} else {
continue;
}
if (pre == null || !val.equals(pre)) {
if (pre != null) {
freqList.add(df);
minIDList.add(minID);
maxIDList.add(maxID);
}
list.add(val);
df = 0;
minID = -1;
maxID = -1;
valId = (t - 1 < negativeValueCount) ? (negativeValueCount - t + 1) : t;
t++;
}
Term term = new Term(field, strText);
DocsEnum docsEnum = reader.termDocsEnum(term);
if (docsEnum != null) {
while ((docID = docsEnum.nextDoc()) != DocsEnum.NO_MORE_DOCS) {
df++;
if (!loader.add(docID, valId)) {
logOverflow(fieldName);
} else {
weightLoader.add(docID, weight);
}
if (docID < minID) minID = docID;
bitset.fastSet(docID);
while (docsEnum.nextDoc() != DocsEnum.NO_MORE_DOCS) {
docID = docsEnum.docID();
df++;
if (!loader.add(docID, valId)) {
logOverflow(fieldName);
} else {
weightLoader.add(docID, weight);
}
bitset.fastSet(docID);
}
if (docID > maxID) maxID = docID;
}
}
pre = val;
}
if (pre != null) {
freqList.add(df);
minIDList.add(minID);
maxIDList.add(maxID);
}
}
list.seal();
try {
_nestedArray.load(maxdoc + 1, loader);
_weightArray.load(maxdoc + 1, weightLoader);
} catch (IOException e) {
throw e;
} catch (Exception e) {
throw new RuntimeException("failed to load due to " + e.toString(), e);
}
this.valArray = list;
this.freqs = freqList.toIntArray();
this.minIDs = minIDList.toIntArray();
this.maxIDs = maxIDList.toIntArray();
int doc = 0;
while (doc < maxdoc && !_nestedArray.contains(doc, 0, true)) {
++doc;
}
if (doc < maxdoc) {
this.minIDs[0] = doc;
doc = maxdoc - 1;
while (doc >= 0 && !_nestedArray.contains(doc, 0, true)) {
--doc;
}
this.maxIDs[0] = doc;
}
this.freqs[0] = maxdoc - (int) bitset.cardinality();
}
}