All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.handler.component.TermsComponent Maven / Gradle / Ivy

The newest version!
package org.apache.solr.handler.component;
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermEnum;
import org.apache.lucene.util.StringHelper;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.CommonParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.TermsParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.StrField;
import org.apache.solr.request.SimpleFacets.CountPair;
import org.apache.solr.util.BoundedTreeSet;

import java.io.IOException;


/**
 * Return TermEnum information, useful for things like auto suggest.
 *
 * @see org.apache.solr.common.params.TermsParams
 *      See Lucene's TermEnum class
 */
public class TermsComponent extends SearchComponent {
  public static final int UNLIMITED_MAX_COUNT = -1;


  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (params.getBool(TermsParams.TERMS, false)) {
      String lowerStr = params.get(TermsParams.TERMS_LOWER, null);
      String[] fields = params.getParams(TermsParams.TERMS_FIELD);
      if (fields != null && fields.length > 0) {
        NamedList terms = new NamedList();
        rb.rsp.add("terms", terms);
        int limit = params.getInt(TermsParams.TERMS_LIMIT, 10);
        if (limit < 0) {
          limit = Integer.MAX_VALUE;
        }
        String upperStr = params.get(TermsParams.TERMS_UPPER);
        boolean upperIncl = params.getBool(TermsParams.TERMS_UPPER_INCLUSIVE, false);
        boolean lowerIncl = params.getBool(TermsParams.TERMS_LOWER_INCLUSIVE, true);
        boolean sort = !TermsParams.TERMS_SORT_INDEX.equals(
                          params.get(TermsParams.TERMS_SORT, TermsParams.TERMS_SORT_COUNT));
        int freqmin = params.getInt(TermsParams.TERMS_MINCOUNT, 1); // initialize freqmin
        int freqmax = params.getInt(TermsParams.TERMS_MAXCOUNT, UNLIMITED_MAX_COUNT); // initialize freqmax
        if (freqmax<0) {
          freqmax = Integer.MAX_VALUE;
        }
        String prefix = params.get(TermsParams.TERMS_PREFIX_STR);
        boolean raw = params.getBool(TermsParams.TERMS_RAW, false);
        for (int j = 0; j < fields.length; j++) {
          String field = StringHelper.intern(fields[j]);
          FieldType ft = raw ? null : rb.req.getSchema().getFieldTypeNoEx(field);
          if (ft==null) ft = new StrField();

          // If no lower bound was specified, use the prefix
          String lower = lowerStr==null ? prefix : (raw ? lowerStr : ft.toInternal(lowerStr));
          if (lower == null) lower="";
          String upper = upperStr==null ? null : (raw ? upperStr : ft.toInternal(upperStr));

          Term lowerTerm = new Term(field, lower);
          Term upperTerm = upper==null ? null : new Term(field, upper);
          
          TermEnum termEnum = rb.req.getSearcher().getReader().terms(lowerTerm); //this will be positioned ready to go
          int i = 0;
          BoundedTreeSet> queue = (sort ? new BoundedTreeSet>(limit) : null); 
          NamedList fieldTerms = new NamedList();
          terms.add(field, fieldTerms);
          Term lowerTestTerm = termEnum.term();

          //Only advance the enum if we are excluding the lower bound and the lower Term actually matches
          if (lowerTestTerm!=null && lowerIncl == false && lowerTestTerm.field() == field  // intern'd comparison
                  && lowerTestTerm.text().equals(lower)) {
            termEnum.next();
          }

          while (i0 || (upperCmp==0 && !upperIncl)) break;
            }

            // This is a good term in the range.  Check if mincount/maxcount conditions are satisfied.
            int docFreq = termEnum.docFreq();
            if (docFreq >= freqmin && docFreq <= freqmax) {
              // add the term to the list
              String label = raw ? indexedText : ft.indexedToReadable(indexedText);
              if (sort) {
                queue.add(new CountPair(label, docFreq));
              } else {
                fieldTerms.add(label, docFreq);
                i++;
              }
            }

            termEnum.next();
          }

          termEnum.close();
          
          if (sort) {
            for (CountPair item : queue) {
              if (i < limit) {
                fieldTerms.add(item.key, item.val);
                i++;
              } else {
                break;
              }
            }
          }
        }
      } else {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No terms.fl parameter specified");
      }
    }
  }

  public void prepare(ResponseBuilder rb) throws IOException {
    //nothing to do
  }

  public String getVersion() {
    return "$Revision: 807289 $";
  }

  public String getSourceId() {
    return "$Id: TermsComponent.java 807289 2009-08-24 15:56:32Z yonik $";
  }

  public String getSource() {
    return "$URL: https://svn.apache.org/repos/asf/lucene/solr/branches/branch-1.4/src/java/org/apache/solr/handler/component/TermsComponent.java $";
  }

  public String getDescription() {
    return "A Component for working with Term Enumerators";
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy