org.apache.lucene.search.suggest.document.ContextSuggestField Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-suggest Show documentation
Apache Lucene (module: suggest)
There is a newer version: 10.0.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.suggest.document;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;

/**
 * {@link SuggestField} which additionally takes in a set of contexts. Example usage of adding a
 * suggestion with contexts is as follows:
 *
 *  *  document.add(
 *   new ContextSuggestField(name, "suggestion", Arrays.asList("context1", "context2"),  4));
 * 
 *
 * Use {@link ContextQuery} to boost and/or filter suggestions at query-time. Use {@link
 * PrefixCompletionQuery}, {@link RegexCompletionQuery} or {@link FuzzyCompletionQuery} if context
 * boost/filtering are not needed.
 *
 * @lucene.experimental
 */
public class ContextSuggestField extends SuggestField {

  /** Separator used between context value and the suggest field value */
  public static final int CONTEXT_SEPARATOR = '\u001D';

  static final byte TYPE = 1;

  private final Set contexts;

  /**
   * Creates a context-enabled suggest field
   *
   * @param name field name
   * @param value field value to get suggestion on
   * @param weight field weight
   * @param contexts associated contexts
   * @throws IllegalArgumentException if either the name or value is null, if value is an empty
   *     string, if the weight is negative, if value or contexts contains any reserved characters
   */
  public ContextSuggestField(String name, String value, int weight, CharSequence... contexts) {
    super(name, value, weight);
    validate(value);
    Collection contextColl =
        contexts != null ? Arrays.asList(contexts) : Collections.emptyList();
    this.contexts = new HashSet<>(contextColl);
  }

  /** Expert: Sub-classes can inject contexts at index-time */
  protected Iterable contexts() {
    return contexts;
  }

  @Override
  protected CompletionTokenStream wrapTokenStream(TokenStream stream) {
    final Iterable contexts = contexts();
    for (CharSequence context : contexts) {
      validate(context);
    }
    CompletionTokenStream completionTokenStream;
    if (stream instanceof CompletionTokenStream) {
      // TODO this is awkward; is there a better way avoiding re-creating the chain?
      completionTokenStream = (CompletionTokenStream) stream;
      PrefixTokenFilter prefixTokenFilter =
          new PrefixTokenFilter(
              completionTokenStream.inputTokenStream, (char) CONTEXT_SEPARATOR, contexts);
      completionTokenStream =
          new CompletionTokenStream(
              prefixTokenFilter,
              completionTokenStream.preserveSep,
              completionTokenStream.preservePositionIncrements,
              completionTokenStream.maxGraphExpansions);
    } else {
      completionTokenStream =
          new CompletionTokenStream(
              new PrefixTokenFilter(stream, (char) CONTEXT_SEPARATOR, contexts));
    }
    return completionTokenStream;
  }

  @Override
  protected byte type() {
    return TYPE;
  }

  /**
   * The {@link PrefixTokenFilter} wraps a {@link TokenStream} and adds a set prefixes ahead. The
   * position attribute will not be incremented for the prefixes.
   */
  private static final class PrefixTokenFilter extends TokenFilter {

    private final char separator;
    private final CharTermAttribute termAttr = addAttribute(CharTermAttribute.class);
    private final PositionIncrementAttribute posAttr =
        addAttribute(PositionIncrementAttribute.class);
    private final Iterable prefixes;

    private Iterator currentPrefix;

    /**
     * Create a new {@link PrefixTokenFilter}
     *
     * @param input {@link TokenStream} to wrap
     * @param separator Character used separate prefixes from other tokens
     * @param prefixes {@link Iterable} of {@link CharSequence} which keeps all prefixes
     */
    public PrefixTokenFilter(TokenStream input, char separator, Iterable prefixes) {
      super(input);
      this.prefixes = prefixes;
      this.currentPrefix = null;
      this.separator = separator;
    }

    @Override
    public boolean incrementToken() throws IOException {
      if (currentPrefix != null) {
        if (!currentPrefix.hasNext()) {
          return input.incrementToken();
        } else {
          posAttr.setPositionIncrement(0);
        }
      } else {
        currentPrefix = prefixes.iterator();
        termAttr.setEmpty();
        posAttr.setPositionIncrement(1);
      }
      termAttr.setEmpty();
      if (currentPrefix.hasNext()) {
        termAttr.append(currentPrefix.next());
      }
      termAttr.append(separator);
      return true;
    }

    @Override
    public void reset() throws IOException {
      super.reset();
      currentPrefix = null;
    }
  }

  private void validate(final CharSequence value) {
    for (int i = 0; i < value.length(); i++) {
      if (CONTEXT_SEPARATOR == value.charAt(i)) {
        throw new IllegalArgumentException(
            "Illegal value ["
                + value
                + "] UTF-16 codepoint [0x"
                + Integer.toHexString((int) value.charAt(i))
                + "] at position "
                + i
                + " is a reserved character");
      }
    }
  }
}