org.apache.solr.handler.component.TermsComponent Maven / Gradle / Ivy
The newest version!
package org.apache.solr.handler.component;
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.StringHelper;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.TermsParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.StrField;
import org.apache.solr.request.SimpleFacets.CountPair;
import org.apache.solr.util.BoundedTreeSet;
import java.io.IOException;
/**
* Return TermEnum information, useful for things like auto suggest.
*
* @see org.apache.solr.common.params.TermsParams
* See Lucene's TermEnum class
*/
public class TermsComponent extends SearchComponent {
public static final int UNLIMITED_MAX_COUNT = -1;
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (params.getBool(TermsParams.TERMS, false)) {
String lowerStr = params.get(TermsParams.TERMS_LOWER, null);
String[] fields = params.getParams(TermsParams.TERMS_FIELD);
if (fields != null && fields.length > 0) {
NamedList terms = new NamedList();
rb.rsp.add("terms", terms);
int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
if (limit < 0) {
limit = Integer.MAX_VALUE;
}
String upperStr = params.get(TermsParams.TERMS_UPPER);
boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(
params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin
int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax
if (freqmax<0) {
freqmax = Integer.MAX_VALUE;
}
String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
boolean raw = params.getBool(TermsParams.TERMS_RAW, false);
for (int j = 0; j < fields.length; j++) {
String field = StringHelper.intern(fields[j]);
FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
if (ft==null) ft = new StrField();
// If no lower bound was specified, use the prefix
String lower = lowerStr==null ? prefix : (raw ? lowerStr : ft.toInternal(lowerStr));
if (lower == null) lower="";
String upper = upperStr==null ? null : (raw ? upperStr : ft.toInternal(upperStr));
Term lowerTerm = new Term(field, lower);
Term upperTerm = upper==null ? null : new Term(field, upper);
TermEnum termEnum = rb.req.getSearcher().getReader().terms(lowerTerm); //this will be positioned ready to go
int i = 0;
BoundedTreeSet> queue = (sort ? new BoundedTreeSet>(limit) : null);
NamedList fieldTerms = new NamedList();
terms.add(field, fieldTerms);
Term lowerTestTerm = termEnum.term();
//Only advance the enum if we are excluding the lower bound and the lower Term actually matches
if (lowerTestTerm!=null && lowerIncl == false && lowerTestTerm.field() == field // intern'd comparison
&& lowerTestTerm.text().equals(lower)) {
termEnum.next();
}
while (i0 || (upperCmp==0 && !upperIncl)) break;
}
// This is a good term in the range. Check if mincount/maxcount conditions are satisfied.
int docFreq = termEnum.docFreq();
if (docFreq >= freqmin && docFreq <= freqmax) {
// add the term to the list
String label = raw ? indexedText : ft.indexedToReadable(indexedText);
if (sort) {
queue.add(new CountPair(label, docFreq));
} else {
fieldTerms.add(label, docFreq);
i++;
}
}
termEnum.next();
}
termEnum.close();
if (sort) {
for (CountPair item : queue) {
if (i < limit) {
fieldTerms.add(item.key, item.val);
i++;
} else {
break;
}
}
}
}
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified");
}
}
}
public void prepare(ResponseBuilder rb) throws IOException {
//nothing to do
}
public String getVersion() {
return "$Revision: 807289 $";
}
public String getSourceId() {
return "$Id: TermsComponent.java 807289 2009-08-24 15:56:32Z yonik $";
}
public String getSource() {
return "$URL: https://svn.apache.org/repos/asf/lucene/solr/branches/branch-1.4/src/java/org/apache/solr/handler/component/TermsComponent.java $";
}
public String getDescription() {
return "A Component for working with Term Enumerators";
}
}