All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opensearch.index.query.MatchQueryBuilder Maven / Gradle / Ivy

There is a newer version: 2.18.0
Show newest version
/*
 * SPDX-License-Identifier: Apache-2.0
 *
 * The OpenSearch Contributors require contributions made to
 * this file be licensed under the Apache-2.0 license or a
 * compatible open source license.
 */

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

/*
 * Modifications Copyright OpenSearch Contributors. See
 * GitHub history for details.
 */

package org.opensearch.index.query;

import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.Query;
import org.opensearch.common.lucene.search.Queries;
import org.opensearch.common.unit.Fuzziness;
import org.opensearch.common.xcontent.LoggingDeprecationHandler;
import org.opensearch.core.ParseField;
import org.opensearch.core.common.ParsingException;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.index.query.support.QueryParsers;
import org.opensearch.index.search.MatchQuery;
import org.opensearch.index.search.MatchQuery.ZeroTermsQuery;

import java.io.IOException;
import java.util.Objects;

/**
 * Match query is a query that analyzes the text and constructs a query as the
 * result of the analysis.
 *
 * @opensearch.internal
 */
public class MatchQueryBuilder extends AbstractQueryBuilder {

    private static final String CUTOFF_FREQUENCY_DEPRECATION_MSG = "you can omit this option, "
        + "the [match] query can skip block of documents efficiently if the total number of hits is not tracked";

    public static final ParseField ZERO_TERMS_QUERY_FIELD = new ParseField("zero_terms_query");
    /**
     * @deprecated Since max_optimization optimization landed in 7.0, normal MatchQuery
     *             will achieve the same result without any configuration.
     */
    @Deprecated
    public static final ParseField CUTOFF_FREQUENCY_FIELD = new ParseField("cutoff_frequency").withAllDeprecated(
        CUTOFF_FREQUENCY_DEPRECATION_MSG
    );
    public static final ParseField LENIENT_FIELD = new ParseField("lenient");
    public static final ParseField FUZZY_TRANSPOSITIONS_FIELD = new ParseField("fuzzy_transpositions");
    public static final ParseField FUZZY_REWRITE_FIELD = new ParseField("fuzzy_rewrite");
    public static final ParseField MINIMUM_SHOULD_MATCH_FIELD = new ParseField("minimum_should_match");
    public static final ParseField OPERATOR_FIELD = new ParseField("operator");
    public static final ParseField MAX_EXPANSIONS_FIELD = new ParseField("max_expansions");
    public static final ParseField PREFIX_LENGTH_FIELD = new ParseField("prefix_length");
    public static final ParseField ANALYZER_FIELD = new ParseField("analyzer");
    public static final ParseField QUERY_FIELD = new ParseField("query");
    public static final ParseField GENERATE_SYNONYMS_PHRASE_QUERY = new ParseField("auto_generate_synonyms_phrase_query");

    /** The name for the match query */
    public static final String NAME = "match";

    /** The default mode terms are combined in a match query */
    public static final Operator DEFAULT_OPERATOR = Operator.OR;

    private final String fieldName;

    private final Object value;

    private Operator operator = DEFAULT_OPERATOR;

    private String analyzer;

    private Fuzziness fuzziness = null;

    private int prefixLength = FuzzyQuery.defaultPrefixLength;

    private int maxExpansions = FuzzyQuery.defaultMaxExpansions;

    private boolean fuzzyTranspositions = FuzzyQuery.defaultTranspositions;

    private String minimumShouldMatch;

    private String fuzzyRewrite = null;

    private boolean lenient = MatchQuery.DEFAULT_LENIENCY;

    private MatchQuery.ZeroTermsQuery zeroTermsQuery = MatchQuery.DEFAULT_ZERO_TERMS_QUERY;

    private Float cutoffFrequency = null;

    private boolean autoGenerateSynonymsPhraseQuery = true;

    /**
     * Constructs a new match query.
     */
    public MatchQueryBuilder(String fieldName, Object value) {
        if (fieldName == null) {
            throw new IllegalArgumentException("[" + NAME + "] requires fieldName");
        }
        if (value == null) {
            throw new IllegalArgumentException("[" + NAME + "] requires query value");
        }
        this.fieldName = fieldName;
        this.value = value;
    }

    /**
     * Read from a stream.
     */
    public MatchQueryBuilder(StreamInput in) throws IOException {
        super(in);
        fieldName = in.readString();
        value = in.readGenericValue();
        operator = Operator.readFromStream(in);
        prefixLength = in.readVInt();
        maxExpansions = in.readVInt();
        fuzzyTranspositions = in.readBoolean();
        lenient = in.readBoolean();
        zeroTermsQuery = MatchQuery.ZeroTermsQuery.readFromStream(in);
        // optional fields
        analyzer = in.readOptionalString();
        minimumShouldMatch = in.readOptionalString();
        fuzzyRewrite = in.readOptionalString();
        fuzziness = in.readOptionalWriteable(Fuzziness::new);
        cutoffFrequency = in.readOptionalFloat();
        autoGenerateSynonymsPhraseQuery = in.readBoolean();
    }

    @Override
    protected void doWriteTo(StreamOutput out) throws IOException {
        out.writeString(fieldName);
        out.writeGenericValue(value);
        operator.writeTo(out);
        out.writeVInt(prefixLength);
        out.writeVInt(maxExpansions);
        out.writeBoolean(fuzzyTranspositions);
        out.writeBoolean(lenient);
        zeroTermsQuery.writeTo(out);
        // optional fields
        out.writeOptionalString(analyzer);
        out.writeOptionalString(minimumShouldMatch);
        out.writeOptionalString(fuzzyRewrite);
        out.writeOptionalWriteable(fuzziness);
        out.writeOptionalFloat(cutoffFrequency);
        out.writeBoolean(autoGenerateSynonymsPhraseQuery);
    }

    /** Returns the field name used in this query. */
    public String fieldName() {
        return this.fieldName;
    }

    /** Returns the value used in this query. */
    public Object value() {
        return this.value;
    }

    /** Sets the operator to use when using a boolean query. Defaults to {@code OR}. */
    public MatchQueryBuilder operator(Operator operator) {
        if (operator == null) {
            throw new IllegalArgumentException("[" + NAME + "] requires operator to be non-null");
        }
        this.operator = operator;
        return this;
    }

    /** Returns the operator to use in a boolean query.*/
    public Operator operator() {
        return this.operator;
    }

    /**
     * Explicitly set the analyzer to use. Defaults to use explicit mapping config for the field, or, if not
     * set, the default search analyzer.
     */
    public MatchQueryBuilder analyzer(String analyzer) {
        this.analyzer = analyzer;
        return this;
    }

    /** Get the analyzer to use, if previously set, otherwise {@code null} */
    public String analyzer() {
        return this.analyzer;
    }

    @Deprecated
    /* Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */
    public MatchQueryBuilder fuzziness(Object fuzziness) {
        this.fuzziness = Fuzziness.build(fuzziness);
        return this;
    }

    /** Sets the fuzziness used when evaluated to a fuzzy query type. Defaults to "AUTO". */
    public MatchQueryBuilder fuzziness(Fuzziness fuzziness) {
        this.fuzziness = fuzziness;
        return this;
    }

    /**  Gets the fuzziness used when evaluated to a fuzzy query type. */
    public Fuzziness fuzziness() {
        return this.fuzziness;
    }

    /**
     * Sets the length of a length of common (non-fuzzy) prefix for fuzzy match queries
     * @param prefixLength non-negative length of prefix
     * @throws IllegalArgumentException in case the prefix is negative
     */
    public MatchQueryBuilder prefixLength(int prefixLength) {
        if (prefixLength < 0) {
            throw new IllegalArgumentException("[" + NAME + "] requires prefix length to be non-negative.");
        }
        this.prefixLength = prefixLength;
        return this;
    }

    /**
     * Gets the length of a length of common (non-fuzzy) prefix for fuzzy match queries
     */
    public int prefixLength() {
        return this.prefixLength;
    }

    /**
     * When using fuzzy or prefix type query, the number of term expansions to use.
     */
    public MatchQueryBuilder maxExpansions(int maxExpansions) {
        if (maxExpansions <= 0) {
            throw new IllegalArgumentException("[" + NAME + "] requires maxExpansions to be positive.");
        }
        this.maxExpansions = maxExpansions;
        return this;
    }

    /**
     * Get the (optional) number of term expansions when using fuzzy or prefix type query.
     */
    public int maxExpansions() {
        return this.maxExpansions;
    }

    /**
     * Set a cutoff value in [0..1] (or absolute number >=1) representing the
     * maximum threshold of a terms document frequency to be considered a low
     * frequency term.
     *
     * @deprecated see {@link MatchQueryBuilder#CUTOFF_FREQUENCY_FIELD} for more details
     */
    @Deprecated
    public MatchQueryBuilder cutoffFrequency(float cutoff) {
        this.cutoffFrequency = cutoff;
        return this;
    }

    /** Gets the optional cutoff value, can be {@code null} if not set previously */
    public Float cutoffFrequency() {
        return this.cutoffFrequency;
    }

    /** Sets optional minimumShouldMatch value to apply to the query */
    public MatchQueryBuilder minimumShouldMatch(String minimumShouldMatch) {
        this.minimumShouldMatch = minimumShouldMatch;
        return this;
    }

    /** Gets the minimumShouldMatch value */
    public String minimumShouldMatch() {
        return this.minimumShouldMatch;
    }

    /** Sets the fuzzy_rewrite parameter controlling how the fuzzy query will get rewritten */
    public MatchQueryBuilder fuzzyRewrite(String fuzzyRewrite) {
        this.fuzzyRewrite = fuzzyRewrite;
        return this;
    }

    /**
     * Get the fuzzy_rewrite parameter
     * @see #fuzzyRewrite(String)
     */
    public String fuzzyRewrite() {
        return this.fuzzyRewrite;
    }

    /**
     * Sets whether transpositions are supported in fuzzy queries.

* The default metric used by fuzzy queries to determine a match is the Damerau-Levenshtein * distance formula which supports transpositions. Setting transposition to false will * switch to classic Levenshtein distance.
* If not set, Damerau-Levenshtein distance metric will be used. */ public MatchQueryBuilder fuzzyTranspositions(boolean fuzzyTranspositions) { this.fuzzyTranspositions = fuzzyTranspositions; return this; } /** Gets the fuzzy query transposition setting. */ public boolean fuzzyTranspositions() { return this.fuzzyTranspositions; } /** * Sets whether format based failures will be ignored. */ public MatchQueryBuilder lenient(boolean lenient) { this.lenient = lenient; return this; } /** * Gets leniency setting that controls if format based failures will be ignored. */ public boolean lenient() { return this.lenient; } /** * Sets query to use in case no query terms are available, e.g. after analysis removed them. * Defaults to {@link MatchQuery.ZeroTermsQuery#NONE}, but can be set to * {@link MatchQuery.ZeroTermsQuery#ALL} instead. */ public MatchQueryBuilder zeroTermsQuery(MatchQuery.ZeroTermsQuery zeroTermsQuery) { if (zeroTermsQuery == null) { throw new IllegalArgumentException("[" + NAME + "] requires zeroTermsQuery to be non-null"); } this.zeroTermsQuery = zeroTermsQuery; return this; } /** * Returns the setting for handling zero terms queries. */ public MatchQuery.ZeroTermsQuery zeroTermsQuery() { return this.zeroTermsQuery; } public MatchQueryBuilder autoGenerateSynonymsPhraseQuery(boolean enable) { this.autoGenerateSynonymsPhraseQuery = enable; return this; } /** * Whether phrase queries should be automatically generated for multi terms synonyms. * Defaults to {@code true}. */ public boolean autoGenerateSynonymsPhraseQuery() { return autoGenerateSynonymsPhraseQuery; } @Override public void doXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(NAME); builder.startObject(fieldName); builder.field(QUERY_FIELD.getPreferredName(), value); builder.field(OPERATOR_FIELD.getPreferredName(), operator.toString()); if (analyzer != null) { builder.field(ANALYZER_FIELD.getPreferredName(), analyzer); } if (fuzziness != null) { fuzziness.toXContent(builder, params); } builder.field(PREFIX_LENGTH_FIELD.getPreferredName(), prefixLength); builder.field(MAX_EXPANSIONS_FIELD.getPreferredName(), maxExpansions); if (minimumShouldMatch != null) { builder.field(MINIMUM_SHOULD_MATCH_FIELD.getPreferredName(), minimumShouldMatch); } if (fuzzyRewrite != null) { builder.field(FUZZY_REWRITE_FIELD.getPreferredName(), fuzzyRewrite); } // LUCENE 4 UPGRADE we need to document this & test this builder.field(FUZZY_TRANSPOSITIONS_FIELD.getPreferredName(), fuzzyTranspositions); builder.field(LENIENT_FIELD.getPreferredName(), lenient); builder.field(ZERO_TERMS_QUERY_FIELD.getPreferredName(), zeroTermsQuery.toString()); if (cutoffFrequency != null) { builder.field(CUTOFF_FREQUENCY_FIELD.getPreferredName(), cutoffFrequency); } builder.field(GENERATE_SYNONYMS_PHRASE_QUERY.getPreferredName(), autoGenerateSynonymsPhraseQuery); printBoostAndQueryName(builder); builder.endObject(); builder.endObject(); } @Override protected Query doToQuery(QueryShardContext context) throws IOException { // validate context specific fields if (analyzer != null && context.getIndexAnalyzers().get(analyzer) == null) { throw new QueryShardException(context, "[" + NAME + "] analyzer [" + analyzer + "] not found"); } MatchQuery matchQuery = new MatchQuery(context); matchQuery.setOccur(operator.toBooleanClauseOccur()); if (analyzer != null) { matchQuery.setAnalyzer(analyzer); } matchQuery.setFuzziness(fuzziness); matchQuery.setFuzzyPrefixLength(prefixLength); matchQuery.setMaxExpansions(maxExpansions); matchQuery.setTranspositions(fuzzyTranspositions); matchQuery.setFuzzyRewriteMethod(QueryParsers.parseRewriteMethod(fuzzyRewrite, null, LoggingDeprecationHandler.INSTANCE)); matchQuery.setLenient(lenient); matchQuery.setCommonTermsCutoff(cutoffFrequency); matchQuery.setZeroTermsQuery(zeroTermsQuery); matchQuery.setAutoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery); Query query = matchQuery.parse(MatchQuery.Type.BOOLEAN, fieldName, value); return Queries.maybeApplyMinimumShouldMatch(query, minimumShouldMatch); } @Override protected boolean doEquals(MatchQueryBuilder other) { return Objects.equals(fieldName, other.fieldName) && Objects.equals(value, other.value) && Objects.equals(operator, other.operator) && Objects.equals(analyzer, other.analyzer) && Objects.equals(fuzziness, other.fuzziness) && Objects.equals(prefixLength, other.prefixLength) && Objects.equals(maxExpansions, other.maxExpansions) && Objects.equals(minimumShouldMatch, other.minimumShouldMatch) && Objects.equals(fuzzyRewrite, other.fuzzyRewrite) && Objects.equals(lenient, other.lenient) && Objects.equals(fuzzyTranspositions, other.fuzzyTranspositions) && Objects.equals(zeroTermsQuery, other.zeroTermsQuery) && Objects.equals(cutoffFrequency, other.cutoffFrequency) && Objects.equals(autoGenerateSynonymsPhraseQuery, other.autoGenerateSynonymsPhraseQuery); } @Override protected int doHashCode() { return Objects.hash( fieldName, value, operator, analyzer, fuzziness, prefixLength, maxExpansions, minimumShouldMatch, fuzzyRewrite, lenient, fuzzyTranspositions, zeroTermsQuery, cutoffFrequency, autoGenerateSynonymsPhraseQuery ); } @Override public String getWriteableName() { return NAME; } public static MatchQueryBuilder fromXContent(XContentParser parser) throws IOException { String fieldName = null; Object value = null; float boost = AbstractQueryBuilder.DEFAULT_BOOST; String minimumShouldMatch = null; String analyzer = null; Operator operator = MatchQueryBuilder.DEFAULT_OPERATOR; Fuzziness fuzziness = null; int prefixLength = FuzzyQuery.defaultPrefixLength; int maxExpansion = FuzzyQuery.defaultMaxExpansions; boolean fuzzyTranspositions = FuzzyQuery.defaultTranspositions; String fuzzyRewrite = null; boolean lenient = MatchQuery.DEFAULT_LENIENCY; Float cutOffFrequency = null; ZeroTermsQuery zeroTermsQuery = MatchQuery.DEFAULT_ZERO_TERMS_QUERY; boolean autoGenerateSynonymsPhraseQuery = true; String queryName = null; String currentFieldName = null; XContentParser.Token token; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token == XContentParser.Token.START_OBJECT) { throwParsingExceptionOnMultipleFields(NAME, parser.getTokenLocation(), fieldName, currentFieldName); fieldName = currentFieldName; while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) { if (token == XContentParser.Token.FIELD_NAME) { currentFieldName = parser.currentName(); } else if (token.isValue()) { if (QUERY_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { value = parser.objectText(); } else if (ANALYZER_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { analyzer = parser.text(); } else if (AbstractQueryBuilder.BOOST_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { boost = parser.floatValue(); } else if (Fuzziness.FIELD.match(currentFieldName, parser.getDeprecationHandler())) { fuzziness = Fuzziness.parse(parser); } else if (PREFIX_LENGTH_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { prefixLength = parser.intValue(); } else if (MAX_EXPANSIONS_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { maxExpansion = parser.intValue(); } else if (OPERATOR_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { operator = Operator.fromString(parser.text()); } else if (MINIMUM_SHOULD_MATCH_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { minimumShouldMatch = parser.textOrNull(); } else if (FUZZY_REWRITE_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { fuzzyRewrite = parser.textOrNull(); } else if (FUZZY_TRANSPOSITIONS_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { fuzzyTranspositions = parser.booleanValue(); } else if (LENIENT_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { lenient = parser.booleanValue(); } else if (CUTOFF_FREQUENCY_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { cutOffFrequency = parser.floatValue(); } else if (ZERO_TERMS_QUERY_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { String zeroTermsValue = parser.text(); if ("none".equalsIgnoreCase(zeroTermsValue)) { zeroTermsQuery = MatchQuery.ZeroTermsQuery.NONE; } else if ("all".equalsIgnoreCase(zeroTermsValue)) { zeroTermsQuery = MatchQuery.ZeroTermsQuery.ALL; } else { throw new ParsingException( parser.getTokenLocation(), "Unsupported zero_terms_query value [" + zeroTermsValue + "]" ); } } else if (AbstractQueryBuilder.NAME_FIELD.match(currentFieldName, parser.getDeprecationHandler())) { queryName = parser.text(); } else if (GENERATE_SYNONYMS_PHRASE_QUERY.match(currentFieldName, parser.getDeprecationHandler())) { autoGenerateSynonymsPhraseQuery = parser.booleanValue(); } else { throw new ParsingException( parser.getTokenLocation(), "[" + NAME + "] query does not support [" + currentFieldName + "]" ); } } else { throw new ParsingException( parser.getTokenLocation(), "[" + NAME + "] unknown token [" + token + "] after [" + currentFieldName + "]" ); } } } else { throwParsingExceptionOnMultipleFields(NAME, parser.getTokenLocation(), fieldName, parser.currentName()); fieldName = parser.currentName(); value = parser.objectText(); } } if (value == null) { throw new ParsingException(parser.getTokenLocation(), "No text specified for text query"); } MatchQueryBuilder matchQuery = new MatchQueryBuilder(fieldName, value); matchQuery.operator(operator); matchQuery.analyzer(analyzer); matchQuery.minimumShouldMatch(minimumShouldMatch); matchQuery.fuzziness(fuzziness); matchQuery.fuzzyRewrite(fuzzyRewrite); matchQuery.prefixLength(prefixLength); matchQuery.fuzzyTranspositions(fuzzyTranspositions); matchQuery.maxExpansions(maxExpansion); matchQuery.lenient(lenient); if (cutOffFrequency != null) { matchQuery.cutoffFrequency(cutOffFrequency); } matchQuery.zeroTermsQuery(zeroTermsQuery); matchQuery.autoGenerateSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery); matchQuery.queryName(queryName); matchQuery.boost(boost); return matchQuery; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy