All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.suggest.document.SuggestField Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.suggest.document;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.ConcatenateGraphFilter;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.FieldType;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.store.OutputStreamDataOutput;
import org.apache.lucene.util.BytesRef;

/**
 * Field that indexes a string value and a weight as a weighted completion against a named
 * suggester. Field is tokenized, not stored and stores documents, frequencies and positions. Field
 * can be used to provide near real time document suggestions.
 *
 * 

Besides the usual {@link org.apache.lucene.analysis.Analyzer}s, {@link CompletionAnalyzer} can * be used to tune suggest field only parameters (e.g. preserving token separators, preserving * position increments when converting the token stream to an automaton) * *

Example indexing usage: * *

 * document.add(new SuggestField(name, "suggestion", 4));
 * 
* * To perform document suggestions based on the this field, use {@link * SuggestIndexSearcher#suggest(CompletionQuery, int, boolean)} * * @lucene.experimental */ public class SuggestField extends Field { /** Default field type for suggest field */ public static final FieldType FIELD_TYPE = new FieldType(); static { FIELD_TYPE.setTokenized(true); FIELD_TYPE.setStored(false); FIELD_TYPE.setStoreTermVectors(false); FIELD_TYPE.setOmitNorms(false); FIELD_TYPE.setIndexOptions(IndexOptions.DOCS_AND_FREQS_AND_POSITIONS); FIELD_TYPE.freeze(); } static final byte TYPE = 0; private final BytesRef surfaceForm; private final int weight; /** * Creates a {@link SuggestField} * * @param name field name * @param value field value to get suggestions on * @param weight field weight * @throws IllegalArgumentException if either the name or value is null, if value is an empty * string, if the weight is negative, if value contains any reserved characters */ public SuggestField(String name, String value, int weight) { super(name, value, FIELD_TYPE); if (weight < 0) { throw new IllegalArgumentException("weight must be >= 0"); } if (value.length() == 0) { throw new IllegalArgumentException("value must have a length > 0"); } for (int i = 0; i < value.length(); i++) { if (isReserved(value.charAt(i))) { throw new IllegalArgumentException( "Illegal input [" + value + "] UTF-16 codepoint [0x" + Integer.toHexString((int) value.charAt(i)) + "] at position " + i + " is a reserved character"); } } this.surfaceForm = new BytesRef(value); this.weight = weight; } @Override public TokenStream tokenStream(Analyzer analyzer, TokenStream reuse) { CompletionTokenStream completionStream = wrapTokenStream(super.tokenStream(analyzer, reuse)); completionStream.setPayload(buildSuggestPayload()); return completionStream; } /** * Wraps a stream with a CompletionTokenStream. * *

Subclasses can override this method to change the indexing pipeline. */ protected CompletionTokenStream wrapTokenStream(TokenStream stream) { if (stream instanceof CompletionTokenStream) { return (CompletionTokenStream) stream; } else { return new CompletionTokenStream(stream); } } /** Returns a byte to denote the type of the field */ protected byte type() { return TYPE; } private BytesRef buildSuggestPayload() { ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); try (OutputStreamDataOutput output = new OutputStreamDataOutput(byteArrayOutputStream)) { output.writeVInt(surfaceForm.length); output.writeBytes(surfaceForm.bytes, surfaceForm.offset, surfaceForm.length); output.writeVInt(weight + 1); output.writeByte(type()); } catch (IOException e) { throw new RuntimeException(e); // not possible, it's a ByteArrayOutputStream! } return new BytesRef(byteArrayOutputStream.toByteArray()); } private boolean isReserved(char c) { switch (c) { case ConcatenateGraphFilter.SEP_LABEL: case CompletionAnalyzer.HOLE_CHARACTER: case NRTSuggesterBuilder.END_BYTE: return true; default: return false; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy