com.jaeksoft.searchlib.facet.Facet Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearchserver Show documentation
Show all versions of opensearchserver Show documentation
OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.
The newest version!
/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2008-2015 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see .
**/
package com.jaeksoft.searchlib.facet;
import it.unimi.dsi.fastutil.Arrays;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermFreqVector;
import org.roaringbitmap.RoaringBitmap;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.facet.FacetCounter.FacetSorter;
import com.jaeksoft.searchlib.index.FieldCacheIndex;
import com.jaeksoft.searchlib.index.ReaderAbstract;
import com.jaeksoft.searchlib.result.collector.DocIdInterface;
import com.jaeksoft.searchlib.schema.SchemaField;
import com.jaeksoft.searchlib.schema.TermVector;
import com.jaeksoft.searchlib.util.Timer;
public class Facet implements Iterable> {
protected FacetField facetField;
private Map facetMap;
protected transient List> list = null;
public Facet() {
list = null;
facetMap = new HashMap();
}
public Facet(FacetField facetField) {
this();
this.facetField = facetField;
}
private Facet(FacetField facetField, String[] terms, int[] counts) {
this(facetField);
int i = 0;
int minCount = facetField.getMinCount();
for (int count : counts) {
String term = terms[i];
if (term != null && count >= minCount)
facetMap.put(term, new FacetCounter(count));
i++;
}
}
private Facet(FacetField facetField, Map facetMap) {
this(facetField);
int minCount = facetField.getMinCount();
for (Map.Entry entry : facetMap.entrySet()) {
String term = entry.getKey();
FacetCounter counter = entry.getValue();
if (term != null && counter.count >= minCount)
this.facetMap.put(term, counter);
}
}
public FacetField getFacetField() {
return this.facetField;
}
protected void sum(Facet facet) {
if (facet == null)
return;
for (Map.Entry entry : facet) {
String term = entry.getKey();
if (term == null)
continue;
FacetCounter value = entry.getValue();
FacetCounter count = facetMap.get(term);
if (count == null)
facetMap.put(term, new FacetCounter(value));
else
count.add(value);
}
}
public List> getList() {
synchronized (this) {
if (list != null)
return list;
list = new ArrayList>(
facetMap.entrySet());
list = limitOrderBy(facetField, list);
return list;
}
}
private Map.Entry get(int i) {
return getList().get(i);
}
@Override
public Iterator> iterator() {
return getList().iterator();
}
public int getTermCount() {
return getList().size();
}
public String getTerm(int i) {
return get(i).getKey();
}
public long getCount(int i) {
return get(i).getValue().count;
}
final static protected Facet facetMultivalued(ReaderAbstract reader,
SchemaField schemaField, DocIdInterface docIdInterface,
FacetField facetField, Timer timer) throws IOException,
SearchLibException {
String fieldName = facetField.getName();
if (schemaField.getTermVector() == TermVector.NO) {
FieldCacheIndex stringIndex = reader.getStringIndex(fieldName);
int[] countIndex = computeMultivaluedTD(reader, fieldName,
stringIndex, docIdInterface);
return new Facet(facetField, stringIndex.lookup, countIndex);
} else {
Map facetMap = computeMultivaluedTFV(reader,
fieldName, docIdInterface);
return new Facet(facetField, facetMap);
}
}
final static protected Facet facetSingleValue(ReaderAbstract reader,
DocIdInterface collector, FacetField facetField, Timer timer)
throws IOException {
String fieldName = facetField.getName();
FieldCacheIndex stringIndex = reader.getStringIndex(fieldName);
int[] countIndex = computeSinglevalued(stringIndex, collector);
return new Facet(facetField, stringIndex.lookup, countIndex);
}
final private static int[] computeMultivaluedTD(ReaderAbstract reader,
String fieldName, FieldCacheIndex stringIndex,
DocIdInterface docIdInterface) throws IOException,
SearchLibException {
int[] countIndex = new int[stringIndex.lookup.length];
int indexPos = 0;
if (docIdInterface.getSize() == 0)
return countIndex;
int[] docs = new int[100];
int[] freqs = new int[100];
RoaringBitmap bitset = docIdInterface.getBitSet();
Term oTerm = new Term(fieldName);
for (String term : stringIndex.lookup) {
if (term != null) {
Term t = oTerm.createTerm(term);
TermDocs termDocs = reader.getTermDocs(t);
int l;
while ((l = termDocs.read(docs, freqs)) > 0)
for (int i = 0; i < l; i++)
if (freqs[i] > 0)
if (bitset.contains(docs[i]))
countIndex[indexPos]++;
termDocs.close();
}
indexPos++;
}
return countIndex;
}
final private static Map computeMultivaluedTFV(
ReaderAbstract reader, String fieldName,
DocIdInterface docIdInterface) throws IOException,
SearchLibException {
Map termMap = new HashMap();
if (docIdInterface.getSize() == 0)
return termMap;
for (int docId : docIdInterface.getIds()) {
TermFreqVector tfv = reader.getTermFreqVector(docId, fieldName);
if (tfv == null)
continue;
String[] terms = tfv.getTerms();
int[] freqs = tfv.getTermFrequencies();
if (terms == null || freqs == null)
continue;
int i = 0;
for (String term : terms) {
if (freqs[i++] > 0) {
FacetCounter facetItem = termMap.get(term);
if (facetItem == null)
termMap.put(term, new FacetCounter(1));
else
facetItem.increment();
}
}
}
return termMap;
}
final private static int[] computeSinglevalued(FieldCacheIndex stringIndex,
DocIdInterface collector) throws IOException {
int[] countArray = new int[stringIndex.lookup.length];
int[] order = stringIndex.order;
int i = collector.getSize();
for (int id : collector.getIds()) {
if (i == 0)
break;
countArray[order[id]]++;
i--;
}
return countArray;
}
final private static List> limitOrderBy(
FacetField facetField, List> list) {
FacetSorter facetSorter = FacetSorter.getSorter(list,
facetField.getOrderBy());
if (facetSorter == null)
return list;
Arrays.quickSort(0, list.size(), facetSorter, facetSorter);
Integer limit = facetField.getLimit();
if (limit == null)
return list;
if (list.size() <= limit)
return list;
return list.subList(0, limit);
}
}