All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.search.suggest.phrase.PhraseSuggestionBuilder Maven / Gradle / Ivy

There is a newer version: 8.16.0
Show newest version
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0 and the Server Side Public License, v 1; you may not use this file except
 * in compliance with, at your election, the Elastic License 2.0 or the Server
 * Side Public License, v 1.
 */
package org.elasticsearch.search.suggest.phrase;

import org.apache.lucene.analysis.Analyzer;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.Version;
import org.elasticsearch.common.ParsingException;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.lucene.BytesRefs;
import org.elasticsearch.index.analysis.AnalyzerComponentsProvider;
import org.elasticsearch.index.analysis.IndexAnalyzers;
import org.elasticsearch.index.analysis.NamedAnalyzer;
import org.elasticsearch.index.analysis.ShingleTokenFilterFactory;
import org.elasticsearch.index.analysis.TokenFilterFactory;
import org.elasticsearch.index.query.SearchExecutionContext;
import org.elasticsearch.script.Script;
import org.elasticsearch.script.ScriptType;
import org.elasticsearch.script.TemplateScript;
import org.elasticsearch.search.suggest.SuggestionBuilder;
import org.elasticsearch.search.suggest.SuggestionSearchContext.SuggestionContext;
import org.elasticsearch.search.suggest.phrase.PhraseSuggestionContext.DirectCandidateGenerator;
import org.elasticsearch.xcontent.ParseField;
import org.elasticsearch.xcontent.ToXContentObject;
import org.elasticsearch.xcontent.XContentBuilder;
import org.elasticsearch.xcontent.XContentParser;
import org.elasticsearch.xcontent.XContentParser.Token;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Objects;
import java.util.Set;

/**
 * Defines the actual suggest command for phrase suggestions ( {@code phrase}).
 */
public class PhraseSuggestionBuilder extends SuggestionBuilder {

    public static final String SUGGESTION_NAME = "phrase";

    protected static final ParseField MAXERRORS_FIELD = new ParseField("max_errors");
    protected static final ParseField RWE_LIKELIHOOD_FIELD = new ParseField("real_word_error_likelihood");
    protected static final ParseField SEPARATOR_FIELD = new ParseField("separator");
    protected static final ParseField CONFIDENCE_FIELD = new ParseField("confidence");
    protected static final ParseField GRAMSIZE_FIELD = new ParseField("gram_size");
    protected static final ParseField SMOOTHING_MODEL_FIELD = new ParseField("smoothing");
    protected static final ParseField FORCE_UNIGRAM_FIELD = new ParseField("force_unigrams");
    protected static final ParseField TOKEN_LIMIT_FIELD = new ParseField("token_limit");
    protected static final ParseField HIGHLIGHT_FIELD = new ParseField("highlight");
    protected static final ParseField PRE_TAG_FIELD = new ParseField("pre_tag");
    protected static final ParseField POST_TAG_FIELD = new ParseField("post_tag");
    protected static final ParseField COLLATE_FIELD = new ParseField("collate");
    protected static final ParseField COLLATE_QUERY_FIELD = new ParseField("query");
    protected static final ParseField COLLATE_QUERY_PARAMS = new ParseField("params");
    protected static final ParseField COLLATE_QUERY_PRUNE = new ParseField("prune");

    private float maxErrors = PhraseSuggestionContext.DEFAULT_MAX_ERRORS;
    private String separator = PhraseSuggestionContext.DEFAULT_SEPARATOR;
    private float realWordErrorLikelihood = PhraseSuggestionContext.DEFAULT_RWE_ERRORLIKELIHOOD;
    private float confidence = PhraseSuggestionContext.DEFAULT_CONFIDENCE;
    // gramSize needs to be optional although there is a default, if unset parser try to detect and use shingle size
    private Integer gramSize;
    private boolean forceUnigrams = PhraseSuggestionContext.DEFAULT_REQUIRE_UNIGRAM;
    private int tokenLimit = NoisyChannelSpellChecker.DEFAULT_TOKEN_LIMIT;
    private String preTag;
    private String postTag;
    private Script collateQuery;
    private Map collateParams;
    private boolean collatePrune = PhraseSuggestionContext.DEFAULT_COLLATE_PRUNE;
    private SmoothingModel model;
    private final Map> generators = new HashMap<>();

    public PhraseSuggestionBuilder(String field) {
        super(field);
    }

    /**
     * internal copy constructor that copies over all class fields except for the field which is
     * set to the one provided in the first argument
     */
    private PhraseSuggestionBuilder(String fieldname, PhraseSuggestionBuilder in) {
        super(fieldname, in);
        maxErrors = in.maxErrors;
        separator = in.separator;
        realWordErrorLikelihood = in.realWordErrorLikelihood;
        confidence = in.confidence;
        gramSize = in.gramSize;
        forceUnigrams = in.forceUnigrams;
        tokenLimit = in.tokenLimit;
        preTag = in.preTag;
        postTag = in.postTag;
        collateQuery = in.collateQuery;
        collateParams = in.collateParams;
        collatePrune = in.collatePrune;
        model = in.model;
        generators.putAll(in.generators);
    }

    /**
     * Read from a stream.
     */
    public PhraseSuggestionBuilder(StreamInput in) throws IOException {
        super(in);
        maxErrors = in.readFloat();
        realWordErrorLikelihood = in.readFloat();
        confidence = in.readFloat();
        gramSize = in.readOptionalVInt();
        model = in.readOptionalNamedWriteable(SmoothingModel.class);
        forceUnigrams = in.readBoolean();
        tokenLimit = in.readVInt();
        preTag = in.readOptionalString();
        postTag = in.readOptionalString();
        separator = in.readString();
        if (in.readBoolean()) {
            collateQuery = new Script(in);
        }
        collateParams = in.readMap();
        collatePrune = in.readOptionalBoolean();
        int generatorsEntries = in.readVInt();
        for (int i = 0; i < generatorsEntries; i++) {
            String type = in.readString();
            int numberOfGenerators = in.readVInt();
            List generatorsList = new ArrayList<>(numberOfGenerators);
            for (int g = 0; g < numberOfGenerators; g++) {
                DirectCandidateGeneratorBuilder generator = new DirectCandidateGeneratorBuilder(in);
                generatorsList.add(generator);
            }
            generators.put(type, generatorsList);
        }
    }

    @Override
    public void doWriteTo(StreamOutput out) throws IOException {
        out.writeFloat(maxErrors);
        out.writeFloat(realWordErrorLikelihood);
        out.writeFloat(confidence);
        out.writeOptionalVInt(gramSize);
        out.writeOptionalNamedWriteable(model);
        out.writeBoolean(forceUnigrams);
        out.writeVInt(tokenLimit);
        out.writeOptionalString(preTag);
        out.writeOptionalString(postTag);
        out.writeString(separator);
        if (collateQuery != null) {
            out.writeBoolean(true);
            collateQuery.writeTo(out);
        } else {
            out.writeBoolean(false);
        }
        out.writeMapWithConsistentOrder(collateParams);
        out.writeOptionalBoolean(collatePrune);
        out.writeVInt(this.generators.size());
        for (Entry> entry : this.generators.entrySet()) {
            out.writeString(entry.getKey());
            List generatorsList = entry.getValue();
            out.writeVInt(generatorsList.size());
            for (CandidateGenerator generator : generatorsList) {
                generator.writeTo(out);
            }
        }
    }

    /**
     * Sets the gram size for the n-gram model used for this suggester. The
     * default value is {@code 1} corresponding to {@code unigrams}. Use
     * {@code 2} for {@code bigrams} and {@code 3} for {@code trigrams}.
     */
    public PhraseSuggestionBuilder gramSize(int gramSize) {
        if (gramSize < 1) {
            throw new IllegalArgumentException("gramSize must be >= 1");
        }
        this.gramSize = gramSize;
        return this;
    }

    /**
     * get the {@link #gramSize(int)} parameter
     */
    public Integer gramSize() {
        return this.gramSize;
    }

    /**
     * Sets the maximum percentage of the terms that at most considered to be
     * misspellings in order to form a correction. This method accepts a float
     * value in the range [0..1) as a fraction of the actual query terms a
     * number {@code >=1} as an absolute number of query terms.
     *
     * The default is set to {@code 1.0} which corresponds to that only
     * corrections with at most 1 misspelled term are returned.
     */
    public PhraseSuggestionBuilder maxErrors(float maxErrors) {
        if (maxErrors <= 0.0) {
            throw new IllegalArgumentException("max_error must be > 0.0");
        }
        this.maxErrors = maxErrors;
        return this;
    }

    /**
     * get the maxErrors setting
     */
    public Float maxErrors() {
        return this.maxErrors;
    }

    /**
     * Sets the separator that is used to separate terms in the bigram field. If
     * not set the whitespace character is used as a separator.
     */
    public PhraseSuggestionBuilder separator(String separator) {
        Objects.requireNonNull(separator, "separator cannot be set to null");
        this.separator = separator;
        return this;
    }

    /**
     * get the separator that is used to separate terms in the bigram field.
     */
    public String separator() {
        return this.separator;
    }

    /**
     * Sets the likelihood of a term being a misspelled even if the term exists
     * in the dictionary. The default it {@code 0.95} corresponding to 5% or
     * the real words are misspelled.
     */
    public PhraseSuggestionBuilder realWordErrorLikelihood(float realWordErrorLikelihood) {
        if (realWordErrorLikelihood <= 0.0) {
            throw new IllegalArgumentException("real_word_error_likelihood must be > 0.0");
        }
        this.realWordErrorLikelihood = realWordErrorLikelihood;
        return this;
    }

    /**
     * get the {@link #realWordErrorLikelihood(float)} parameter
     */
    public Float realWordErrorLikelihood() {
        return this.realWordErrorLikelihood;
    }

    /**
     * Sets the confidence level for this suggester. The confidence level
     * defines a factor applied to the input phrases score which is used as a
     * threshold for other suggest candidates. Only candidates that score higher
     * than the threshold will be included in the result. For instance a
     * confidence level of {@code 1.0} will only return suggestions that score
     * higher than the input phrase. If set to {@code 0.0} the top N candidates
     * are returned. The default is {@code 1.0}
     */
    public PhraseSuggestionBuilder confidence(float confidence) {
        if (confidence < 0.0) {
            throw new IllegalArgumentException("confidence must be >= 0.0");
        }
        this.confidence = confidence;
        return this;
    }

    /**
     * get the {@link #confidence()} parameter
     */
    public Float confidence() {
        return this.confidence;
    }

    /**
     * Adds a {@link CandidateGenerator} to this suggester. The
     * {@link CandidateGenerator} is used to draw candidates for each individual
     * phrase term before the candidates are scored.
     */
    public PhraseSuggestionBuilder addCandidateGenerator(CandidateGenerator generator) {
        List list = this.generators.get(generator.getType());
        if (list == null) {
            list = new ArrayList<>();
            this.generators.put(generator.getType(), list);
        }
        list.add(generator);
        return this;
    }

    /**
     * Clear the candidate generators.
     */
    public PhraseSuggestionBuilder clearCandidateGenerators() {
        this.generators.clear();
        return this;
    }

    /**
     * get the candidate generators.
     */
    Map> getCandidateGenerators() {
        return this.generators;
    }

    /**
     * If set to true the phrase suggester will fail if the analyzer only
     * produces ngrams. the default it true.
     */
    public PhraseSuggestionBuilder forceUnigrams(boolean forceUnigrams) {
        this.forceUnigrams = forceUnigrams;
        return this;
    }

    /**
     * get the setting for {@link #forceUnigrams()}
     */
    public Boolean forceUnigrams() {
        return this.forceUnigrams;
    }

    /**
     * Sets an explicit smoothing model used for this suggester. The default is
     * {@link StupidBackoff}.
     */
    public PhraseSuggestionBuilder smoothingModel(SmoothingModel model) {
        this.model = model;
        return this;
    }

    /**
     * Gets the {@link SmoothingModel}
     */
    public SmoothingModel smoothingModel() {
        return this.model;
    }

    public PhraseSuggestionBuilder tokenLimit(int tokenLimit) {
        if (tokenLimit <= 0) {
            throw new IllegalArgumentException("token_limit must be >= 1");
        }
        this.tokenLimit = tokenLimit;
        return this;
    }

    /**
     * get the {@link #tokenLimit(int)} parameter
     */
    public Integer tokenLimit() {
        return this.tokenLimit;
    }

    /**
     * Setup highlighting for suggestions.  If this is called a highlight field
     * is returned with suggestions wrapping changed tokens with preTag and postTag.
     */
    public PhraseSuggestionBuilder highlight(String preTag, String postTag) {
        if ((preTag == null) != (postTag == null)) {
            throw new IllegalArgumentException("Pre and post tag must both be null or both not be null.");
        }
        this.preTag = preTag;
        this.postTag = postTag;
        return this;
    }

    /**
     * get the pre-tag for the highlighter set with {@link #highlight(String, String)}
     */
    public String preTag() {
        return this.preTag;
    }

    /**
     * get the post-tag for the highlighter set with {@link #highlight(String, String)}
     */
    public String postTag() {
        return this.postTag;
    }

    /**
     * Sets a query used for filtering out suggested phrases (collation).
     */
    public PhraseSuggestionBuilder collateQuery(String collateQuery) {
        this.collateQuery = new Script(ScriptType.INLINE, "mustache", collateQuery, Collections.emptyMap());
        return this;
    }

    /**
     * Sets a query used for filtering out suggested phrases (collation).
     */
    public PhraseSuggestionBuilder collateQuery(Script collateQueryTemplate) {
        this.collateQuery = collateQueryTemplate;
        return this;
    }

    /**
     * gets the query used for filtering out suggested phrases (collation).
     */
    public Script collateQuery() {
        return this.collateQuery;
    }

    /**
     * Adds additional parameters for collate scripts. Previously added parameters on the
     * same builder will be overwritten.
     */
    public PhraseSuggestionBuilder collateParams(Map collateParams) {
        Objects.requireNonNull(collateParams, "collate parameters cannot be null.");
        this.collateParams = new HashMap<>(collateParams);
        return this;
    }

    /**
     * gets additional params for collate script
     */
    public Map collateParams() {
        return this.collateParams;
    }

    /**
     * Sets whether to prune suggestions after collation
     */
    public PhraseSuggestionBuilder collatePrune(boolean collatePrune) {
        this.collatePrune = collatePrune;
        return this;
    }

    /**
     * Gets whether to prune suggestions after collation
     */
    public Boolean collatePrune() {
        return this.collatePrune;
    }

    @Override
    public XContentBuilder innerToXContent(XContentBuilder builder, Params params) throws IOException {
        builder.field(RWE_LIKELIHOOD_FIELD.getPreferredName(), realWordErrorLikelihood);
        builder.field(CONFIDENCE_FIELD.getPreferredName(), confidence);
        builder.field(SEPARATOR_FIELD.getPreferredName(), separator);
        builder.field(MAXERRORS_FIELD.getPreferredName(), maxErrors);
        if (gramSize != null) {
            builder.field(GRAMSIZE_FIELD.getPreferredName(), gramSize);
        }
        builder.field(FORCE_UNIGRAM_FIELD.getPreferredName(), forceUnigrams);
        builder.field(TOKEN_LIMIT_FIELD.getPreferredName(), tokenLimit);
        if (generators.isEmpty() == false) {
            Set>> entrySet = generators.entrySet();
            for (Entry> entry : entrySet) {
                builder.startArray(entry.getKey());
                for (CandidateGenerator generator : entry.getValue()) {
                    generator.toXContent(builder, params);
                }
                builder.endArray();
            }
        }
        if (model != null) {
            builder.startObject(SMOOTHING_MODEL_FIELD.getPreferredName());
            model.toXContent(builder, params);
            builder.endObject();
        }
        if (preTag != null) {
            builder.startObject(HIGHLIGHT_FIELD.getPreferredName());
            builder.field(PRE_TAG_FIELD.getPreferredName(), preTag);
            builder.field(POST_TAG_FIELD.getPreferredName(), postTag);
            builder.endObject();
        }
        if (collateQuery != null) {
            builder.startObject(COLLATE_FIELD.getPreferredName());
            builder.field(COLLATE_QUERY_FIELD.getPreferredName(), collateQuery);
            if (collateParams != null) {
                builder.field(COLLATE_QUERY_PARAMS.getPreferredName(), collateParams);
            }
            builder.field(COLLATE_QUERY_PRUNE.getPreferredName(), collatePrune);
            builder.endObject();
        }
        return builder;
    }

    public static PhraseSuggestionBuilder fromXContent(XContentParser parser) throws IOException {
        PhraseSuggestionBuilder tmpSuggestion = new PhraseSuggestionBuilder("_na_");
        XContentParser.Token token;
        String currentFieldName = null;
        String fieldname = null;
        while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
            if (token == XContentParser.Token.FIELD_NAME) {
                currentFieldName = parser.currentName();
            } else if (token.isValue()) {
                if (SuggestionBuilder.ANALYZER_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                    tmpSuggestion.analyzer(parser.text());
                } else if (SuggestionBuilder.FIELDNAME_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                    fieldname = parser.text();
                } else if (SuggestionBuilder.SIZE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                    tmpSuggestion.size(parser.intValue());
                } else if (SuggestionBuilder.SHARDSIZE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                    tmpSuggestion.shardSize(parser.intValue());
                } else if (PhraseSuggestionBuilder.RWE_LIKELIHOOD_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                    tmpSuggestion.realWordErrorLikelihood(parser.floatValue());
                } else if (PhraseSuggestionBuilder.CONFIDENCE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                    tmpSuggestion.confidence(parser.floatValue());
                } else if (PhraseSuggestionBuilder.SEPARATOR_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                    tmpSuggestion.separator(parser.text());
                } else if (PhraseSuggestionBuilder.MAXERRORS_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                    tmpSuggestion.maxErrors(parser.floatValue());
                } else if (PhraseSuggestionBuilder.GRAMSIZE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                    tmpSuggestion.gramSize(parser.intValue());
                } else if (PhraseSuggestionBuilder.FORCE_UNIGRAM_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                    tmpSuggestion.forceUnigrams(parser.booleanValue());
                } else if (PhraseSuggestionBuilder.TOKEN_LIMIT_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                    tmpSuggestion.tokenLimit(parser.intValue());
                } else {
                    throw new ParsingException(
                        parser.getTokenLocation(),
                        "suggester[phrase] doesn't support field [" + currentFieldName + "]"
                    );
                }
            } else if (token == Token.START_ARRAY) {
                if (DirectCandidateGeneratorBuilder.DIRECT_GENERATOR_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                    // for now we only have a single type of generators
                    while ((token = parser.nextToken()) == Token.START_OBJECT) {
                        tmpSuggestion.addCandidateGenerator(DirectCandidateGeneratorBuilder.PARSER.apply(parser, null));
                    }
                } else {
                    throw new ParsingException(
                        parser.getTokenLocation(),
                        "suggester[phrase]  doesn't support array field [" + currentFieldName + "]"
                    );
                }
            } else if (token == Token.START_OBJECT) {
                if (PhraseSuggestionBuilder.SMOOTHING_MODEL_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                    ensureNoSmoothing(tmpSuggestion);
                    tmpSuggestion.smoothingModel(SmoothingModel.fromXContent(parser));
                } else if (PhraseSuggestionBuilder.HIGHLIGHT_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                    String preTag = null;
                    String postTag = null;
                    while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
                        if (token == XContentParser.Token.FIELD_NAME) {
                            currentFieldName = parser.currentName();
                        } else if (token.isValue()) {
                            if (PhraseSuggestionBuilder.PRE_TAG_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                                preTag = parser.text();
                            } else if (PhraseSuggestionBuilder.POST_TAG_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                                postTag = parser.text();
                            } else {
                                throw new ParsingException(
                                    parser.getTokenLocation(),
                                    "suggester[phrase][highlight] doesn't support field [" + currentFieldName + "]"
                                );
                            }
                        }
                    }
                    tmpSuggestion.highlight(preTag, postTag);
                } else if (PhraseSuggestionBuilder.COLLATE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                    while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
                        if (token == XContentParser.Token.FIELD_NAME) {
                            currentFieldName = parser.currentName();
                        } else if (PhraseSuggestionBuilder.COLLATE_QUERY_FIELD.match(currentFieldName, parser.getDeprecationHandler())) {
                            if (tmpSuggestion.collateQuery() != null) {
                                throw new ParsingException(
                                    parser.getTokenLocation(),
                                    "suggester[phrase][collate] query already set, doesn't support additional [" + currentFieldName + "]"
                                );
                            }
                            Script template = Script.parse(parser, Script.DEFAULT_TEMPLATE_LANG);
                            tmpSuggestion.collateQuery(template);
                        } else if (PhraseSuggestionBuilder.COLLATE_QUERY_PARAMS.match(currentFieldName, parser.getDeprecationHandler())) {
                            tmpSuggestion.collateParams(parser.map());
                        } else if (PhraseSuggestionBuilder.COLLATE_QUERY_PRUNE.match(currentFieldName, parser.getDeprecationHandler())) {
                            if (parser.isBooleanValue()) {
                                tmpSuggestion.collatePrune(parser.booleanValue());
                            } else {
                                throw new ParsingException(
                                    parser.getTokenLocation(),
                                    "suggester[phrase][collate] prune must be either 'true' or 'false'"
                                );
                            }
                        } else {
                            throw new ParsingException(
                                parser.getTokenLocation(),
                                "suggester[phrase][collate] doesn't support field [" + currentFieldName + "]"
                            );
                        }
                    }
                } else {
                    throw new ParsingException(
                        parser.getTokenLocation(),
                        "suggester[phrase]  doesn't support array field [" + currentFieldName + "]"
                    );
                }
            } else {
                throw new ParsingException(parser.getTokenLocation(), "suggester[phrase] doesn't support field [" + currentFieldName + "]");
            }
        }

        // now we should have field name, check and copy fields over to the suggestion builder we return
        if (fieldname == null) {
            throw new ElasticsearchParseException("the required field option [" + FIELDNAME_FIELD.getPreferredName() + "] is missing");
        }
        return new PhraseSuggestionBuilder(fieldname, tmpSuggestion);
    }

    @Override
    public SuggestionContext build(SearchExecutionContext context) throws IOException {
        PhraseSuggestionContext suggestionContext = new PhraseSuggestionContext(context);
        // copy over common settings to each suggestion builder
        populateCommonFields(context, suggestionContext);
        suggestionContext.setSeparator(BytesRefs.toBytesRef(this.separator));
        suggestionContext.setRealWordErrorLikelihood(this.realWordErrorLikelihood);
        suggestionContext.setConfidence(this.confidence);
        suggestionContext.setMaxErrors(this.maxErrors);
        suggestionContext.setRequireUnigram(this.forceUnigrams);
        suggestionContext.setTokenLimit(this.tokenLimit);
        suggestionContext.setPreTag(BytesRefs.toBytesRef(this.preTag));
        suggestionContext.setPostTag(BytesRefs.toBytesRef(this.postTag));

        if (this.gramSize != null) {
            suggestionContext.setGramSize(this.gramSize);
        }

        for (List candidateGenerators : this.generators.values()) {
            for (CandidateGenerator candidateGenerator : candidateGenerators) {
                suggestionContext.addGenerator(candidateGenerator.build(context.getIndexAnalyzers()));
            }
        }

        if (this.model != null) {
            suggestionContext.setModel(this.model.buildWordScorerFactory());
        }

        if (this.collateQuery != null) {
            TemplateScript.Factory scriptFactory = context.compile(this.collateQuery, TemplateScript.CONTEXT);
            suggestionContext.setCollateQueryScript(scriptFactory);
            if (this.collateParams != null) {
                suggestionContext.setCollateScriptParams(this.collateParams);
            }
            suggestionContext.setCollatePrune(this.collatePrune);
        }

        if (this.gramSize == null || suggestionContext.generators().isEmpty()) {
            final ShingleTokenFilterFactory.Factory shingleFilterFactory = getShingleFilterFactory(suggestionContext.getAnalyzer());
            if (this.gramSize == null) {
                // try to detect the shingle size
                if (shingleFilterFactory != null) {
                    suggestionContext.setGramSize(shingleFilterFactory.getMaxShingleSize());
                    if (suggestionContext.getAnalyzer() == null
                        && shingleFilterFactory.getMinShingleSize() > 1
                        && shingleFilterFactory.getOutputUnigrams() == false) {
                        throw new IllegalArgumentException(
                            "The default analyzer for field: ["
                                + suggestionContext.getField()
                                + "] doesn't emit unigrams. If this is intentional try to set the analyzer explicitly"
                        );
                    }
                }
            }
            if (suggestionContext.generators().isEmpty()) {
                if (shingleFilterFactory != null
                    && shingleFilterFactory.getMinShingleSize() > 1
                    && shingleFilterFactory.getOutputUnigrams() == false
                    && suggestionContext.getRequireUnigram()) {
                    throw new IllegalArgumentException(
                        "The default candidate generator for phrase suggest can't operate on field: ["
                            + suggestionContext.getField()
                            + "] since it doesn't emit unigrams. "
                            + "If this is intentional try to set the candidate generator field explicitly"
                    );
                }
                // use a default generator on the same field
                DirectCandidateGenerator generator = new DirectCandidateGenerator();
                generator.setField(suggestionContext.getField());
                suggestionContext.addGenerator(generator);
            }
        }
        return suggestionContext;
    }

    private static ShingleTokenFilterFactory.Factory getShingleFilterFactory(Analyzer analyzer) {
        if (analyzer instanceof NamedAnalyzer) {
            analyzer = ((NamedAnalyzer) analyzer).analyzer();
        }
        if (analyzer instanceof AnalyzerComponentsProvider) {
            final TokenFilterFactory[] tokenFilters = ((AnalyzerComponentsProvider) analyzer).getComponents().getTokenFilters();
            for (TokenFilterFactory tokenFilterFactory : tokenFilters) {
                if (tokenFilterFactory instanceof ShingleTokenFilterFactory) {
                    return ((ShingleTokenFilterFactory) tokenFilterFactory).getInnerFactory();
                } else if (tokenFilterFactory instanceof ShingleTokenFilterFactory.Factory) {
                    return (ShingleTokenFilterFactory.Factory) tokenFilterFactory;
                }
            }
        }
        return null;
    }

    private static void ensureNoSmoothing(PhraseSuggestionBuilder suggestion) {
        if (suggestion.smoothingModel() != null) {
            throw new IllegalArgumentException("only one smoothing model supported");
        }
    }

    @Override
    public String getWriteableName() {
        return SUGGESTION_NAME;
    }

    @Override
    public Version getMinimalSupportedVersion() {
        return Version.V_EMPTY;
    }

    @Override
    protected boolean doEquals(PhraseSuggestionBuilder other) {
        return Objects.equals(maxErrors, other.maxErrors)
            && Objects.equals(separator, other.separator)
            && Objects.equals(realWordErrorLikelihood, other.realWordErrorLikelihood)
            && Objects.equals(confidence, other.confidence)
            && Objects.equals(generators, other.generators)
            && Objects.equals(gramSize, other.gramSize)
            && Objects.equals(model, other.model)
            && Objects.equals(forceUnigrams, other.forceUnigrams)
            && Objects.equals(tokenLimit, other.tokenLimit)
            && Objects.equals(preTag, other.preTag)
            && Objects.equals(postTag, other.postTag)
            && Objects.equals(collateQuery, other.collateQuery)
            && Objects.equals(collateParams, other.collateParams)
            && Objects.equals(collatePrune, other.collatePrune);
    }

    @Override
    protected int doHashCode() {
        return Objects.hash(
            maxErrors,
            separator,
            realWordErrorLikelihood,
            confidence,
            generators,
            gramSize,
            model,
            forceUnigrams,
            tokenLimit,
            preTag,
            postTag,
            collateQuery,
            collateParams,
            collatePrune
        );
    }

    /**
     * {@link CandidateGenerator} interface.
     */
    public interface CandidateGenerator extends Writeable, ToXContentObject {
        String getType();

        PhraseSuggestionContext.DirectCandidateGenerator build(IndexAnalyzers indexAnalyzers) throws IOException;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy