All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.spelling.SolrSpellChecker Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.spelling;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.core.WhitespaceAnalyzer;
import org.apache.lucene.search.spell.LevenshteinDistance;
import org.apache.lucene.search.spell.StringDistance;
import org.apache.lucene.search.spell.SuggestWord;
import org.apache.lucene.search.spell.SuggestWordQueue;
import org.apache.solr.client.solrj.response.SpellCheckResponse;
import org.apache.solr.common.params.SpellingParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.SolrCore;
import org.apache.solr.handler.component.ResponseBuilder;
import org.apache.solr.handler.component.ShardRequest;
import org.apache.solr.handler.component.SpellCheckMergeData;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.SolrIndexSearcher;

/**
 * Refer to https://solr.apache.org/guide/solr/latest/query-guide/spell-checking.html
 * for more details.
 *
 * @since solr 1.3
 */
public abstract class SolrSpellChecker {
  public static final String DICTIONARY_NAME = "name";
  public static final String DEFAULT_DICTIONARY_NAME = "default";
  public static final String FIELD = "field";
  public static final String FIELD_TYPE = "fieldType";

  /** Dictionary name */
  protected String name;

  protected Analyzer analyzer;
  protected String field;
  protected String fieldTypeName;

  public String init(NamedList config, SolrCore core) {
    name = (String) config.get(DICTIONARY_NAME);
    if (name == null) {
      name = DEFAULT_DICTIONARY_NAME;
    }
    field = (String) config.get(FIELD);
    IndexSchema schema = core.getLatestSchema();
    if (field != null && schema.getFieldTypeNoEx(field) != null) {
      analyzer = schema.getFieldType(field).getQueryAnalyzer();
    }
    fieldTypeName = (String) config.get(FIELD_TYPE);
    if (schema.getFieldTypes().containsKey(fieldTypeName)) {
      FieldType fieldType = schema.getFieldTypes().get(fieldTypeName);
      analyzer = fieldType.getQueryAnalyzer();
    }
    if (analyzer == null) {
      analyzer = new WhitespaceAnalyzer();
    }
    return name;
  }

  /** modify the shard request to be used in a distributed environment. */
  public void modifyRequest(ResponseBuilder rb, ShardRequest sreq) {
    /* No-Op */
  }

  /** Integrate spelling suggestions from the various shards in a distributed environment. */
  public SpellingResult mergeSuggestions(
      SpellCheckMergeData mergeData, int numSug, int count, boolean extendedResults) {
    float min = 0.5f;
    try {
      min = getAccuracy();
    } catch (UnsupportedOperationException uoe) {
      // just use .5 as a default
    }

    StringDistance sd = null;
    try {
      sd = getStringDistance() == null ? new LevenshteinDistance() : getStringDistance();
    } catch (UnsupportedOperationException uoe) {
      sd = new LevenshteinDistance();
    }

    SpellingResult result = new SpellingResult();
    for (Map.Entry> entry : mergeData.origVsSuggested.entrySet()) {
      String original = entry.getKey();

      // Only use this suggestion if all shards reported it as misspelled,
      // unless it was not a term original to the user's query
      // (WordBreakSolrSpellChecker can add new terms to the response, and we want to keep these)
      Integer numShards = mergeData.origVsShards.get(original);
      if (numShards < mergeData.totalNumberShardResponses
          && mergeData.isOriginalToQuery(original)) {
        continue;
      }

      HashSet suggested = entry.getValue();
      SuggestWordQueue sugQueue = new SuggestWordQueue(numSug);
      for (String suggestion : suggested) {
        SuggestWord sug = mergeData.suggestedVsWord.get(suggestion);
        sug.score = sd.getDistance(original, sug.string);
        if (sug.score < min) continue;
        sugQueue.insertWithOverflow(sug);
        if (sugQueue.size() == numSug) {
          // if queue full, maintain the minScore score
          min = sugQueue.top().score;
        }
      }

      // create token
      SpellCheckResponse.Suggestion suggestion = mergeData.origVsSuggestion.get(original);
      Token token = new Token(original, suggestion.getStartOffset(), suggestion.getEndOffset());

      // get top 'count' suggestions out of 'sugQueue.size()' candidates
      SuggestWord[] suggestions = new SuggestWord[Math.min(count, sugQueue.size())];
      // skip the first sugQueue.size() - count elements
      for (int k = 0; k < sugQueue.size() - count; k++) sugQueue.pop();
      // now collect the top 'count' responses
      for (int k = Math.min(count, sugQueue.size()) - 1; k >= 0; k--) {
        suggestions[k] = sugQueue.pop();
      }

      if (extendedResults) {
        Integer o = mergeData.origVsFreq.get(original);
        if (o != null) result.addFrequency(token, o);
        for (SuggestWord word : suggestions) result.add(token, word.string, word.freq);
      } else {
        List words = new ArrayList<>(sugQueue.size());
        for (SuggestWord word : suggestions) words.add(word.string);
        result.add(token, words);
      }
    }
    return result;
  }

  public Analyzer getQueryAnalyzer() {
    return analyzer;
  }

  public String getDictionaryName() {
    return name;
  }

  /**
   * Reloads the index. Useful if an external process is responsible for building the spell checker.
   *
   * @throws IOException If there is a low-level I/O error.
   */
  public abstract void reload(SolrCore core, SolrIndexSearcher searcher) throws IOException;

  /**
   * (re)Builds the spelling index. May be a NOOP if the implementation doesn't require building, or
   * can't be rebuilt.
   */
  public abstract void build(SolrCore core, SolrIndexSearcher searcher) throws IOException;

  /**
   * Get the value of {@link SpellingParams#SPELLCHECK_ACCURACY} if supported. Otherwise throws
   * UnsupportedOperationException.
   */
  protected float getAccuracy() {
    throw new UnsupportedOperationException();
  }

  /** Get the distance implementation used by this spellchecker, or NULL if not applicable. */
  protected StringDistance getStringDistance() {
    throw new UnsupportedOperationException();
  }

  /**
   * Get suggestions for the given query. Tokenizes the query using a field appropriate Analyzer.
   * The {@link SpellingResult#getSuggestions()} suggestions must be ordered by best suggestion
   * first.
   *
   * @param options The {@link SpellingOptions} to use
   * @return The {@link SpellingResult} suggestions
   * @throws IOException if there is an error producing suggestions
   */
  public abstract SpellingResult getSuggestions(SpellingOptions options) throws IOException;

  public boolean isSuggestionsMayOverlap() {
    return false;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy