org.pageseeder.flint.lucene.facet.FlexibleFieldFacet Maven / Gradle / Ivy
/*
* Copyright 2015 Allette Systems (Australia)
* http://www.allette.com.au
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.pageseeder.flint.lucene.facet;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.*;
import org.pageseeder.flint.lucene.search.DocumentCounter;
import org.pageseeder.flint.lucene.search.Filter;
import org.pageseeder.flint.lucene.search.Terms;
import org.pageseeder.flint.lucene.util.Beta;
import org.pageseeder.flint.lucene.util.Bucket;
import org.pageseeder.flint.lucene.util.Bucket.Entry;
import org.pageseeder.xmlwriter.XMLWriter;
import java.io.IOException;
import java.util.List;
/**
* A facet implementation using a simple index field.
*
* @author Jean-Baptiste Reure
*
* @version 5.1.3
*/
@Beta
public abstract class FlexibleFieldFacet extends FlexibleFacet {
/**
* The default number of facet values if not specified.
*/
public static final int DEFAULT_MAX_NUMBER_OF_VALUES = 10;
/**
* The max nb of terms
*/
private final int _maxTerms;
/**
* The queries used to calculate each facet.
*/
private transient Bucket bucket;
/**
* The total number of terms found in the search results
*/
private transient int totalTerms = -1;
/**
* If there are results containing the field used in this facet
*/
private transient boolean hasResults = false;
/**
* Creates a new facet with the specified name;
*
* @param name The name of the facet.
* @param maxterms The maximum number of terms to return
*/
protected FlexibleFieldFacet(String name, int maxterms) {
super(name);
this._maxTerms = maxterms;
}
/**
* Computes each facet option as a flexible facet.
* All filters but the ones using the same field as this facet are applied to the base query before computing the numbers.
*
* @param searcher the index search to use.
* @param base the base query.
* @param filters the filters applied to the base query (ignored if the base query is null)
* @param size the maximum number of field values to compute.
*
* @throws IOException if thrown by the searcher.
*/
public void compute(IndexSearcher searcher, Query base, List filters, int size) throws IOException {
// If the base is null, simply calculate for each query
if (base == null) {
compute(searcher, size);
} else {
if (size < 0) throw new IllegalArgumentException("size < 0");
// reset
this.totalTerms = size == 0 ? -1 : 0;
this.hasResults = false;
this.bucket = null;
// re-compute the query without the corresponding filter (for flexible facets)
Query filtered = base;
if (filters != null) {
this.flexible = true;
for (Filter filter : filters) {
if (!this._name.equals(filter.name()))
filtered = filter.filterQuery(filtered);
}
}
// try wildcard query as it's faster, but if it fails go through all terms
if (size == 0) try {
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.add(filtered, Occur.MUST);
query.add(new WildcardQuery(new Term(this._name, "*")), Occur.MUST);
TopDocs td = searcher.search(query.build(), 1);
this.hasResults = td.totalHits.value > 0;
return;
} catch (Exception ex) {
// go through terms then
}
// find all terms
List terms = Terms.terms(searcher.getIndexReader(), name());
if (this._maxTerms > 0 && terms.size() > this._maxTerms) return;
// loop through terms
DocumentCounter counter = new DocumentCounter();
Bucket bucket = new Bucket<>(size);
for (Term t : terms) {
BooleanQuery.Builder query = new BooleanQuery.Builder();
query.add(filtered, Occur.MUST);
query.add(termToQuery(t), Occur.MUST);
if (size == 0) {
// we just want to know if there are results,
// so load only one and stop when we get one
TopDocs td = searcher.search(query.build(), 1);
if (td.totalHits.value > 0) {
this.hasResults = true;
return;
}
} else {
// count results
searcher.search(query.build(), counter);
int count = counter.getCount();
bucket.add(t.text(), count);
counter.reset();
if (count > 0) {
this.totalTerms++;
this.hasResults = true;
}
}
}
if (size != 0)
this.bucket = bucket;
}
}
/**
* Computes each facet option.
*
* Same as compute(searcher, base, 10);
.
*
*
Defaults to 10.
*
* @see #compute(IndexSearcher, Query, int)
*
* @param searcher the index search to use.
* @param base the base query.
*
* @throws IOException if thrown by the searcher.
*/
public void compute(IndexSearcher searcher, Query base, int size) throws IOException {
compute(searcher, base, null, size);
}
/**
* Computes each facet option.
*
*
Same as compute(searcher, base, 10);
.
*
*
Defaults to 10.
*
* @see #compute(IndexSearcher, Query, int)
*
* @param searcher the index search to use.
* @param base the base query.
*
* @throws IOException if thrown by the searcher.
*/
public void compute(IndexSearcher searcher, Query base) throws IOException {
compute(searcher, base, null, DEFAULT_MAX_NUMBER_OF_VALUES);
}
/**
* Computes each facet option as a flexible facet.
*
*
Same as computeFlexible(searcher, base, filters, 10);
.
*
*
Defaults to 10.
*
* @see #compute(IndexSearcher, Query, List, int)
*
* @param searcher the index search to use.
* @param base the base query.
* @param filters the filters applied to the base query
*
* @throws IOException if thrown by the searcher.
*/
public void compute(IndexSearcher searcher, Query base, List filters) throws IOException {
compute(searcher, base, filters, DEFAULT_MAX_NUMBER_OF_VALUES);
}
/**
* Computes each facet option without a base query.
*
* Same as computeFlexible(searcher, 10);
.
*
*
Defaults to 10.
*
* @param searcher the index search to use.
*
* @throws IOException if thrown by the searcher.
*/
public void compute(IndexSearcher searcher) throws IOException {
compute(searcher, DEFAULT_MAX_NUMBER_OF_VALUES);
}
/**
* Computes each facet option without a base query.
*
* @param searcher the index search to use.
* @param size the number of facet values to calculate.
*
* @throws IOException if thrown by the searcher.
*/
public void compute(IndexSearcher searcher, int size) throws IOException {
if (size == 0) {
// reset
this.totalTerms = -1;
this.bucket = null;
// check if there are terms
this.hasResults = !Terms.terms(searcher.getIndexReader(), this._name).isEmpty();
} else {
// reset
this.totalTerms = 0;
this.hasResults = false;
this.bucket = null;
// find all terms
List terms = Terms.terms(searcher.getIndexReader(), this._name);
if (this._maxTerms > 0 && terms.size() > this._maxTerms) return;
Bucket bucket = new Bucket<>(size);
DocumentCounter counter = new DocumentCounter();
for (Term t : terms) {
searcher.search(termToQuery(t), counter);
bucket.add(t.text(), counter.getCount());
counter.reset();
this.totalTerms++;
this.hasResults = true;
}
// set bucket
this.bucket = bucket;
}
}
/**
* Create a query for the term given, using the numeric type if there is one.
*
* @param t the term
*
* @return the query
*/
protected abstract Query termToQuery(Term t);
protected abstract void termToXML(String term, int cardinality, XMLWriter xml) throws IOException;
@Override
public void toXML(XMLWriter xml) throws IOException {
xml.openElement("facet", true);
xml.attribute("name", this._name);
xml.attribute("type", getType());
xml.attribute("flexible", String.valueOf(this.flexible));
if (this.totalTerms == -1)
xml.attribute("has-results", this.hasResults ? "true" : "false");
else {
xml.attribute("has-results", this.totalTerms > 0 ? "true" : "false");
xml.attribute("total-terms", this.totalTerms);
}
if (this.bucket != null) {
for (Entry e : this.bucket.entrySet()) {
termToXML(e.item(), e.count(), xml);
}
}
xml.closeElement();
}
public Bucket getValues() {
return this.bucket;
}
public int getTotalTerms() {
return this.totalTerms;
}
public boolean hasResults() {
return this.hasResults;
}
}