All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.suggest.document.ContextSuggestField Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.suggest.document;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;

/**
 * {@link SuggestField} which additionally takes in a set of contexts. Example usage of adding a
 * suggestion with contexts is as follows:
 *
 * 
 *  document.add(
 *   new ContextSuggestField(name, "suggestion", Arrays.asList("context1", "context2"),  4));
 * 
* * Use {@link ContextQuery} to boost and/or filter suggestions at query-time. Use {@link * PrefixCompletionQuery}, {@link RegexCompletionQuery} or {@link FuzzyCompletionQuery} if context * boost/filtering are not needed. * * @lucene.experimental */ public class ContextSuggestField extends SuggestField { /** Separator used between context value and the suggest field value */ public static final int CONTEXT_SEPARATOR = '\u001D'; static final byte TYPE = 1; private final Set contexts; /** * Creates a context-enabled suggest field * * @param name field name * @param value field value to get suggestion on * @param weight field weight * @param contexts associated contexts * @throws IllegalArgumentException if either the name or value is null, if value is an empty * string, if the weight is negative, if value or contexts contains any reserved characters */ public ContextSuggestField(String name, String value, int weight, CharSequence... contexts) { super(name, value, weight); validate(value); Collection contextColl = contexts != null ? Arrays.asList(contexts) : Collections.emptyList(); this.contexts = new HashSet<>(contextColl); } /** Expert: Sub-classes can inject contexts at index-time */ protected Iterable contexts() { return contexts; } @Override protected CompletionTokenStream wrapTokenStream(TokenStream stream) { final Iterable contexts = contexts(); for (CharSequence context : contexts) { validate(context); } CompletionTokenStream completionTokenStream; if (stream instanceof CompletionTokenStream) { // TODO this is awkward; is there a better way avoiding re-creating the chain? completionTokenStream = (CompletionTokenStream) stream; PrefixTokenFilter prefixTokenFilter = new PrefixTokenFilter( completionTokenStream.inputTokenStream, (char) CONTEXT_SEPARATOR, contexts); completionTokenStream = new CompletionTokenStream( prefixTokenFilter, completionTokenStream.preserveSep, completionTokenStream.preservePositionIncrements, completionTokenStream.maxGraphExpansions); } else { completionTokenStream = new CompletionTokenStream( new PrefixTokenFilter(stream, (char) CONTEXT_SEPARATOR, contexts)); } return completionTokenStream; } @Override protected byte type() { return TYPE; } /** * The {@link PrefixTokenFilter} wraps a {@link TokenStream} and adds a set prefixes ahead. The * position attribute will not be incremented for the prefixes. */ private static final class PrefixTokenFilter extends TokenFilter { private final char separator; private final CharTermAttribute termAttr = addAttribute(CharTermAttribute.class); private final PositionIncrementAttribute posAttr = addAttribute(PositionIncrementAttribute.class); private final Iterable prefixes; private Iterator currentPrefix; /** * Create a new {@link PrefixTokenFilter} * * @param input {@link TokenStream} to wrap * @param separator Character used separate prefixes from other tokens * @param prefixes {@link Iterable} of {@link CharSequence} which keeps all prefixes */ public PrefixTokenFilter(TokenStream input, char separator, Iterable prefixes) { super(input); this.prefixes = prefixes; this.currentPrefix = null; this.separator = separator; } @Override public boolean incrementToken() throws IOException { if (currentPrefix != null) { if (!currentPrefix.hasNext()) { return input.incrementToken(); } else { posAttr.setPositionIncrement(0); } } else { currentPrefix = prefixes.iterator(); termAttr.setEmpty(); posAttr.setPositionIncrement(1); } termAttr.setEmpty(); if (currentPrefix.hasNext()) { termAttr.append(currentPrefix.next()); } termAttr.append(separator); return true; } @Override public void reset() throws IOException { super.reset(); currentPrefix = null; } } private void validate(final CharSequence value) { for (int i = 0; i < value.length(); i++) { if (CONTEXT_SEPARATOR == value.charAt(i)) { throw new IllegalArgumentException( "Illegal value [" + value + "] UTF-16 codepoint [0x" + Integer.toHexString((int) value.charAt(i)) + "] at position " + i + " is a reserved character"); } } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy