org.apache.solr.handler.component.TermsComponent Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr (module: core)
There is a newer version: 9.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.handler.component;

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.regex.Pattern;

import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermStates;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.CharsRefBuilder;
import org.apache.lucene.util.StringHelper;
import org.apache.lucene.util.mutable.MutableValue;
import org.apache.solr.client.solrj.response.TermsResponse;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.cloud.ClusterState;
import org.apache.solr.common.params.ModifiableSolrParams;
import org.apache.solr.common.params.ShardParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.params.TermsParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.SimpleOrderedMap;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.common.util.Utils;
import org.apache.solr.handler.component.HttpShardHandlerFactory.WhitelistHostChecker;
import org.apache.solr.request.SimpleFacets.CountPair;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.schema.StrField;
import org.apache.solr.search.PointMerger;
import org.apache.solr.search.SolrIndexSearcher;
import org.apache.solr.util.BoundedTreeSet;

/**
 * Return TermEnum information, useful for things like auto suggest.
 *
 *  * <searchComponent name="termsComponent" class="solr.TermsComponent"/>
 *
 * <requestHandler name="/terms" class="solr.SearchHandler">
 *   <lst name="defaults">
 *     <bool name="terms">true</bool>
 *   </lst>
 *   <arr name="components">
 *     <str>termsComponent</str>
 *   </arr>
 * </requestHandler>
 *
 * @see org.apache.solr.common.params.TermsParams
 *      See Lucene's TermEnum class
 *
 */
public class TermsComponent extends SearchComponent {
  public static final int UNLIMITED_MAX_COUNT = -1;
  public static final String COMPONENT_NAME = "terms";

  // This needs to be created here too, because Solr doesn't call init(...) on default components. Bug?
  private WhitelistHostChecker whitelistHostChecker = new WhitelistHostChecker(
      null, 
      !HttpShardHandlerFactory.doGetDisableShardsWhitelist());

  @Override
  public void init( NamedList args )
  {
    super.init(args);
    whitelistHostChecker = new WhitelistHostChecker(
        (String) args.get(HttpShardHandlerFactory.INIT_SHARDS_WHITELIST), 
        !HttpShardHandlerFactory.doGetDisableShardsWhitelist());
  }
  
  @Override
  public void prepare(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();

    //the terms parameter is also used by json facet API. So we will get errors if we try to parse as boolean
    if (params.get(TermsParams.TERMS, "false").equals("true")) {
      rb.doTerms = true;
    } else {
      return;
    }

    // TODO: temporary... this should go in a different component.
    String shards = params.get(ShardParams.SHARDS);
    if (shards != null) {
      rb.isDistrib = true;
      if (params.get(ShardParams.SHARDS_QT) == null) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "No shards.qt parameter specified");
      }
      List lst = StrUtils.splitSmart(shards, ",", true);
      checkShardsWhitelist(rb, lst);
      rb.shards = lst.toArray(new String[lst.size()]);
    }
  }

  protected void checkShardsWhitelist(final ResponseBuilder rb, final List lst) {
    final List urls = new LinkedList();
    for (final String ele : lst) {
      urls.addAll(StrUtils.splitSmart(ele, '|'));
    }
    
    if (whitelistHostChecker.isWhitelistHostCheckingEnabled() && rb.req.getCore().getCoreContainer().getZkController() == null && !whitelistHostChecker.hasExplicitWhitelist()) {
      throw new SolrException(ErrorCode.FORBIDDEN, "TermsComponent "+HttpShardHandlerFactory.INIT_SHARDS_WHITELIST
          +" not configured but required when using the '"+ShardParams.SHARDS+"' parameter with the TermsComponent."
          +HttpShardHandlerFactory.SET_SOLR_DISABLE_SHARDS_WHITELIST_CLUE);
    } else {
      ClusterState cs = null;
      if (rb.req.getCore().getCoreContainer().getZkController() != null) {
        cs = rb.req.getCore().getCoreContainer().getZkController().getClusterState();
      }
      whitelistHostChecker.checkWhitelist(cs, urls.toString(), urls);
    }
  }

  @Override
  public void process(ResponseBuilder rb) throws IOException {
    SolrParams params = rb.req.getParams();
    if (!params.get(TermsParams.TERMS, "false").equals("true")) {
      return;
    }

    String[] fields = params.getParams(TermsParams.TERMS_FIELD);

    NamedList