org.apache.lucene.search.suggest.document.CompletionAnalyzer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-suggest Show documentation
Apache Lucene (module: suggest)
There is a newer version: 10.0.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.suggest.document;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.AnalyzerWrapper;
import org.apache.lucene.analysis.TokenStreamToAutomaton;
import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;

/**
 * Wraps an {@link org.apache.lucene.analysis.Analyzer} to provide additional completion-only tuning
 * (e.g. preserving token separators, preserving position increments while converting a token stream
 * to an automaton)
 *
 * Can be used to index {@link SuggestField} and {@link ContextSuggestField} and as a query
 * analyzer to {@link PrefixCompletionQuery} amd {@link FuzzyCompletionQuery}
 *
 * 
NOTE: In most cases, index and query analyzer should have same values for {@link
 * #preservePositionIncrements()} and {@link #preserveSep()}
 *
 * @lucene.experimental
 * @since 5.1.0
 */
public final class CompletionAnalyzer extends AnalyzerWrapper {

  /**
   * Represent a hole character, inserted by {@link
   * org.apache.lucene.analysis.TokenStreamToAutomaton}
   */
  static final int HOLE_CHARACTER = TokenStreamToAutomaton.HOLE;

  private final Analyzer analyzer;

  /**
   * Preserve separation between tokens when converting to an automaton
   *
   * 
Defaults to true
   */
  private final boolean preserveSep;

  /**
   * Preserve position increments for tokens when converting to an automaton
   *
   * 
Defaults to true
   */
  private final boolean preservePositionIncrements;

  /**
   * Sets the maximum number of graph expansions of a completion automaton
   *
   * Defaults to -1 (no limit)
   */
  private final int maxGraphExpansions;

  /**
   * Wraps an analyzer to convert its output token stream to an automaton
   *
   * @param analyzer token stream to be converted to an automaton
   * @param preserveSep Preserve separation between tokens when converting to an automaton
   * @param preservePositionIncrements Preserve position increments for tokens when converting to an
   *     automaton
   * @param maxGraphExpansions Sets the maximum number of graph expansions of a completion automaton
   */
  public CompletionAnalyzer(
      Analyzer analyzer,
      boolean preserveSep,
      boolean preservePositionIncrements,
      int maxGraphExpansions) {
    super(PER_FIELD_REUSE_STRATEGY);
    this.analyzer = analyzer;
    this.preserveSep = preserveSep;
    this.preservePositionIncrements = preservePositionIncrements;
    this.maxGraphExpansions = maxGraphExpansions;
  }

  /**
   * Calls {@link #CompletionAnalyzer(org.apache.lucene.analysis.Analyzer, boolean, boolean, int)}
   * preserving token separation, position increments and no limit on graph expansions
   */
  public CompletionAnalyzer(Analyzer analyzer) {
    this(
        analyzer,
        ConcatenateGraphFilter.DEFAULT_PRESERVE_SEP,
        ConcatenateGraphFilter.DEFAULT_PRESERVE_POSITION_INCREMENTS,
        ConcatenateGraphFilter.DEFAULT_MAX_GRAPH_EXPANSIONS);
  }

  /**
   * Calls {@link #CompletionAnalyzer(org.apache.lucene.analysis.Analyzer, boolean, boolean, int)}
   * with no limit on graph expansions
   */
  public CompletionAnalyzer(
      Analyzer analyzer, boolean preserveSep, boolean preservePositionIncrements) {
    this(
        analyzer,
        preserveSep,
        preservePositionIncrements,
        ConcatenateGraphFilter.DEFAULT_MAX_GRAPH_EXPANSIONS);
  }

  /**
   * Calls {@link #CompletionAnalyzer(org.apache.lucene.analysis.Analyzer, boolean, boolean, int)}
   * preserving token separation and position increments
   */
  public CompletionAnalyzer(Analyzer analyzer, int maxGraphExpansions) {
    this(
        analyzer,
        ConcatenateGraphFilter.DEFAULT_PRESERVE_SEP,
        ConcatenateGraphFilter.DEFAULT_PRESERVE_POSITION_INCREMENTS,
        maxGraphExpansions);
  }

  /**
   * Returns true if separation between tokens are preserved when converting the token stream to an
   * automaton
   */
  public boolean preserveSep() {
    return preserveSep;
  }

  /**
   * Returns true if position increments are preserved when converting the token stream to an
   * automaton
   */
  public boolean preservePositionIncrements() {
    return preservePositionIncrements;
  }

  @Override
  protected Analyzer getWrappedAnalyzer(String fieldName) {
    return analyzer;
  }

  @Override
  protected TokenStreamComponents wrapComponents(
      String fieldName, TokenStreamComponents components) {
    CompletionTokenStream tokenStream =
        new CompletionTokenStream(
            components.getTokenStream(),
            preserveSep,
            preservePositionIncrements,
            maxGraphExpansions);
    return new TokenStreamComponents(components.getSource(), tokenStream);
  }
}