org.apache.solr.handler.component.FacetComponent Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.component;
import java.io.IOException;
import java.net.URL;
import java.util.*;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.FacetParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.SolrException;
import org.apache.solr.request.SimpleFacets;
import org.apache.lucene.util.OpenBitSet;
import org.apache.solr.search.QueryParsing;
import org.apache.solr.schema.FieldType;
import org.apache.lucene.queryParser.ParseException;
/**
* TODO!
*
* @version $Id: FacetComponent.java 781801 2009-06-04 17:28:56Z yonik $
* @since solr 1.3
*/
public class FacetComponent extends SearchComponent
{
public static final String COMPONENT_NAME = "facet";
@Override
public void prepare(ResponseBuilder rb) throws IOException
{
if (rb.req.getParams().getBool(FacetParams.FACET,false)) {
rb.setNeedDocSet( true );
rb.doFacets = true;
}
}
/**
* Actually run the query
* @param rb
*/
@Override
public void process(ResponseBuilder rb) throws IOException
{
if (rb.doFacets) {
SolrParams params = rb.req.getParams();
SimpleFacets f = new SimpleFacets(rb.req,
rb.getResults().docSet,
params,
rb );
// TODO ???? add this directly to the response, or to the builder?
rb.rsp.add( "facet_counts", f.getFacetCounts() );
}
}
private static final String commandPrefix = "{!" + CommonParams.TERMS + "=$";
@Override
public int distributedProcess(ResponseBuilder rb) throws IOException {
if (!rb.doFacets) {
return ResponseBuilder.STAGE_DONE;
}
if (rb.stage == ResponseBuilder.STAGE_GET_FIELDS) {
// overlap facet refinement requests (those shards that we need a count for
// particular facet values from), where possible, with
// the requests to get fields (because we know that is the
// only other required phase).
// We do this in distributedProcess so we can look at all of the
// requests in the outgoing queue at once.
for (int shardNum=0; shardNum refinements = null;
for (DistribFieldFacet dff : rb._facetInfo.facets.values()) {
if (!dff.needRefinements) continue;
List refList = dff._toRefine[shardNum];
if (refList == null || refList.size()==0) continue;
String key = dff.getKey(); // reuse the same key that was used for the main facet
String termsKey = key + "__terms";
String termsVal = StrUtils.join(refList, ',');
String facetCommand;
// add terms into the original facet.field command
// do it via parameter reference to avoid another layer of encoding.
if (dff.localParams != null) {
facetCommand = commandPrefix+termsKey + " " + dff.facetStr.substring(2);
} else {
facetCommand = commandPrefix+termsKey+'}'+dff.field;
}
if (refinements == null) {
refinements = new ArrayList();
}
refinements.add(facetCommand);
refinements.add(termsKey);
refinements.add(termsVal);
}
if (refinements == null) continue;
String shard = rb.shards[shardNum];
ShardRequest refine = null;
boolean newRequest = false;
// try to find a request that is already going out to that shard.
// If nshards becomes to great, we way want to move to hashing for better
// scalability.
for (ShardRequest sreq : rb.outgoing) {
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FIELDS)!=0
&& sreq.shards != null
&& sreq.shards.length==1
&& sreq.shards[0].equals(shard))
{
refine = sreq;
break;
}
}
if (refine == null) {
// we didn't find any other suitable requests going out to that shard, so
// create one ourselves.
newRequest = true;
refine = new ShardRequest();
refine.shards = new String[]{rb.shards[shardNum]};
refine.params = new ModifiableSolrParams(rb.req.getParams());
// don't request any documents
refine.params.remove(CommonParams.START);
refine.params.set(CommonParams.ROWS,"0");
}
refine.purpose |= ShardRequest.PURPOSE_REFINE_FACETS;
refine.params.set(FacetParams.FACET, "true");
refine.params.remove(FacetParams.FACET_FIELD);
refine.params.remove(FacetParams.FACET_QUERY);
for (int i=0; i 0) {
// set the initial limit higher to increase accuracy
dff.initialLimit = dff.offset + dff.limit;
dff.initialLimit = (int)(dff.initialLimit * 1.5) + 10;
} else {
dff.initialLimit = dff.limit;
}
// TEST: Uncomment the following line when testing to supress over-requesting facets and
// thus cause more facet refinement queries.
// if (dff.limit > 0) dff.initialLimit = dff.offset + dff.limit;
sreq.params.set(paramStart + FacetParams.FACET_LIMIT, dff.initialLimit);
}
} else {
// turn off faceting on other requests
sreq.params.set(FacetParams.FACET, "false");
// we could optionally remove faceting params
}
}
@Override
public void handleResponses(ResponseBuilder rb, ShardRequest sreq) {
if (!rb.doFacets) return;
if ((sreq.purpose & ShardRequest.PURPOSE_GET_FACETS)!=0) {
countFacets(rb, sreq);
} else if ((sreq.purpose & ShardRequest.PURPOSE_REFINE_FACETS)!=0) {
refineFacets(rb, sreq);
}
}
private void countFacets(ResponseBuilder rb, ShardRequest sreq) {
FacetInfo fi = rb._facetInfo;
for (ShardResponse srsp: sreq.responses) {
int shardNum = rb.getShardNum(srsp.getShard());
NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
// handle facet queries
NamedList facet_queries = (NamedList)facet_counts.get("facet_queries");
if (facet_queries != null) {
for (int i=0; i= smallestCount, then flag for refinement
long maxCount = sfc.count;
for (int shardNum=0; shardNum= smallestCount) {
// TODO: on a tie, we could check the term values
needRefinement = true;
}
}
if (needRefinement) {
// add a query for each shard missing the term that needs refinement
for (int shardNum=0; shardNum0) {
dff.needRefinements = true;
List lst = dff._toRefine[shardNum];
if (lst == null) {
lst = dff._toRefine[shardNum] = new ArrayList();
}
lst.add(sfc.name);
}
}
}
}
}
}
private void refineFacets(ResponseBuilder rb, ShardRequest sreq) {
FacetInfo fi = rb._facetInfo;
for (ShardResponse srsp: sreq.responses) {
// int shardNum = rb.getShardNum(srsp.shard);
NamedList facet_counts = (NamedList)srsp.getSolrResponse().getResponse().get("facet_counts");
NamedList facet_fields = (NamedList)facet_counts.get("facet_fields");
for (int i=0; i tags for smaller facet counts (better back compatibility)
private Number num(long val) {
if (val < Integer.MAX_VALUE) return (int)val;
else return val;
}
private Number num(Long val) {
if (val.longValue() < Integer.MAX_VALUE) return val.intValue();
else return val;
}
/////////////////////////////////////////////
/// SolrInfoMBean
////////////////////////////////////////////
@Override
public String getDescription() {
return "Handle Faceting";
}
@Override
public String getVersion() {
return "$Revision: 781801 $";
}
@Override
public String getSourceId() {
return "$Id: FacetComponent.java 781801 2009-06-04 17:28:56Z yonik $";
}
@Override
public String getSource() {
return "$URL: https://svn.apache.org/repos/asf/lucene/solr/branches/branch-1.4/src/java/org/apache/solr/handler/component/FacetComponent.java $";
}
@Override
public URL[] getDocs() {
return null;
}
/**
* This API is experimental and subject to change
*/
public static class FacetInfo {
public LinkedHashMap queryFacets;
public LinkedHashMap facets;
void parse(SolrParams params, ResponseBuilder rb) {
queryFacets = new LinkedHashMap();
facets = new LinkedHashMap();
String[] facetQs = params.getParams(FacetParams.FACET_QUERY);
if (facetQs != null) {
for (String query : facetQs) {
QueryFacet queryFacet = new QueryFacet(rb, query);
queryFacets.put(queryFacet.getKey(), queryFacet);
}
}
String[] facetFs = params.getParams(FacetParams.FACET_FIELD);
if (facetFs != null) {
for (String field : facetFs) {
DistribFieldFacet ff = new DistribFieldFacet(rb, field);
facets.put(ff.getKey(), ff);
}
}
}
}
/**
* This API is experimental and subject to change
*/
public static class FacetBase {
String facetType; // facet.field, facet.query, etc (make enum?)
String facetStr; // original parameter value of facetStr
String facetOn; // the field or query, absent localParams if appropriate
private String key; // label in the response for the result... "foo" for {!key=foo}myfield
SolrParams localParams; // any local params for the facet
public FacetBase(ResponseBuilder rb, String facetType, String facetStr) {
this.facetType = facetType;
this.facetStr = facetStr;
try {
this.localParams = QueryParsing.getLocalParams(facetStr, rb.req.getParams());
} catch (ParseException e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
this.facetOn = facetStr;
this.key = facetStr;
if (localParams != null) {
// remove local params unless it's a query
if (!facetType.equals(FacetParams.FACET_QUERY)) {
facetOn = localParams.get(CommonParams.VALUE);
key = facetOn;
}
key = localParams.get(CommonParams.OUTPUT_KEY, key);
}
}
/** returns the key in the response that this facet will be under */
public String getKey() { return key; }
public String getType() { return facetType; }
}
/**
* This API is experimental and subject to change
*/
public static class QueryFacet extends FacetBase {
public long count;
public QueryFacet(ResponseBuilder rb, String facetStr) {
super(rb, FacetParams.FACET_QUERY, facetStr);
}
}
/**
* This API is experimental and subject to change
*/
public static class FieldFacet extends FacetBase {
public String field; // the field to facet on... "myfield" for {!key=foo}myfield
public FieldType ftype;
public int offset;
public int limit;
public int minCount;
public String sort;
public boolean missing;
public String prefix;
public long missingCount;
public FieldFacet(ResponseBuilder rb, String facetStr) {
super(rb, FacetParams.FACET_FIELD, facetStr);
fillParams(rb, rb.req.getParams(), facetOn);
}
private void fillParams(ResponseBuilder rb, SolrParams params, String field) {
this.field = field;
this.ftype = rb.req.getSchema().getFieldTypeNoEx(this.field);
this.offset = params.getFieldInt(field, FacetParams.FACET_OFFSET, 0);
this.limit = params.getFieldInt(field, FacetParams.FACET_LIMIT, 100);
Integer mincount = params.getFieldInt(field, FacetParams.FACET_MINCOUNT);
if (mincount==null) {
Boolean zeros = params.getFieldBool(field, FacetParams.FACET_ZEROS);
// mincount = (zeros!=null && zeros) ? 0 : 1;
mincount = (zeros!=null && !zeros) ? 1 : 0;
// current default is to include zeros.
}
this.minCount = mincount;
this.missing = params.getFieldBool(field, FacetParams.FACET_MISSING, false);
// default to sorting by count if there is a limit.
this.sort = params.getFieldParam(field, FacetParams.FACET_SORT, limit>0 ? FacetParams.FACET_SORT_COUNT : FacetParams.FACET_SORT_INDEX);
if (this.sort.equals(FacetParams.FACET_SORT_COUNT_LEGACY)) {
this.sort = FacetParams.FACET_SORT_COUNT;
} else if (this.sort.equals(FacetParams.FACET_SORT_INDEX_LEGACY)) {
this.sort = FacetParams.FACET_SORT_INDEX;
}
this.prefix = params.getFieldParam(field,FacetParams.FACET_PREFIX);
}
}
/**
* This API is experimental and subject to change
*/
public static class DistribFieldFacet extends FieldFacet {
public List[] _toRefine; // a List of refinements needed, one for each shard.
// SchemaField sf; // currently unneeded
// the max possible count for a term appearing on no list
public long missingMaxPossible;
// the max possible count for a missing term for each shard (indexed by shardNum)
public long[] missingMax;
public OpenBitSet[] counted; // a bitset for each shard, keeping track of which terms seen
public HashMap counts = new HashMap(128);
public int termNum;
public int initialLimit; // how many terms requested in first phase
public boolean needRefinements;
public ShardFacetCount[] countSorted;
DistribFieldFacet(ResponseBuilder rb, String facetStr) {
super(rb, facetStr);
// sf = rb.req.getSchema().getField(field);
missingMax = new long[rb.shards.length];
counted = new OpenBitSet[rb.shards.length];
}
void add(int shardNum, NamedList shardCounts, int numRequested) {
int sz = shardCounts.size();
int numReceived = sz;
OpenBitSet terms = new OpenBitSet(termNum+sz);
long last = 0;
for (int i=0; i() {
public int compare(ShardFacetCount o1, ShardFacetCount o2) {
return o1.indexed.compareTo(o2.indexed);
}
});
countSorted = arr;
return arr;
}
public ShardFacetCount[] getCountSorted() {
ShardFacetCount[] arr = counts.values().toArray(new ShardFacetCount[counts.size()]);
Arrays.sort(arr, new Comparator() {
public int compare(ShardFacetCount o1, ShardFacetCount o2) {
if (o2.count < o1.count) return -1;
else if (o1.count < o2.count) return 1;
return o1.indexed.compareTo(o2.indexed);
}
});
countSorted = arr;
return arr;
}
// returns the max possible value this ShardFacetCount could have for this shard
// (assumes the shard did not report a count for this value)
long maxPossible(ShardFacetCount sfc, int shardNum) {
return missingMax[shardNum];
// TODO: could store the last term in the shard to tell if this term
// comes before or after it. If it comes before, we could subtract 1
}
}
/**
* This API is experimental and subject to change
*/
public static class ShardFacetCount {
public String name;
public String indexed; // the indexed form of the name... used for comparisons.
public long count;
public int termNum; // term number starting at 0 (used in bit arrays)
public String toString() {
return "{term="+name+",termNum="+termNum+",count="+count+"}";
}
}
}