org.opensearch.index.search.SimpleQueryStringQueryParser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearch Show documentation
OpenSearch subproject :server
There is a newer version: 2.18.0
/*
 * SPDX-License-Identifier: Apache-2.0
 *
 * The OpenSearch Contributors require contributions made to
 * this file be licensed under the Apache-2.0 license or a
 * compatible open source license.
 */

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
/*
 * Modifications Copyright OpenSearch Contributors. See
 * GitHub history for details.
 */

package org.opensearch.index.search;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.simple.SimpleQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostAttribute;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.MatchNoDocsQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.SynonymQuery;
import org.apache.lucene.util.BytesRef;
import org.opensearch.common.lucene.search.Queries;
import org.opensearch.common.unit.Fuzziness;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.query.AbstractQueryBuilder;
import org.opensearch.index.query.MultiMatchQueryBuilder;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.index.query.SimpleQueryStringBuilder;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Objects;

import static org.opensearch.common.lucene.search.Queries.newUnmappedFieldQuery;

/**
 * Wrapper class for Lucene's SimpleQueryStringQueryParser that allows us to redefine
 * different types of queries.
 *
 * @opensearch.internal
 */
public class SimpleQueryStringQueryParser extends SimpleQueryParser {

    private final Settings settings;
    private QueryShardContext context;
    private final MultiMatchQuery queryBuilder;

    /** Creates a new parser with custom flags used to enable/disable certain features. */
    public SimpleQueryStringQueryParser(Map weights, int flags, Settings settings, QueryShardContext context) {
        this(null, weights, flags, settings, context);
    }

    /** Creates a new parser with custom flags used to enable/disable certain features. */
    public SimpleQueryStringQueryParser(
        Analyzer analyzer,
        Map weights,
        int flags,
        Settings settings,
        QueryShardContext context
    ) {
        super(analyzer, weights, flags);
        this.settings = settings;
        this.context = context;
        this.queryBuilder = new MultiMatchQuery(context);
        this.queryBuilder.setAutoGenerateSynonymsPhraseQuery(settings.autoGenerateSynonymsPhraseQuery());
        this.queryBuilder.setLenient(settings.lenient());
        this.queryBuilder.setZeroTermsQuery(MatchQuery.ZeroTermsQuery.NULL);
        if (analyzer != null) {
            this.queryBuilder.setAnalyzer(analyzer);
        }
    }

    private Analyzer getAnalyzer(MappedFieldType ft) {
        if (getAnalyzer() != null) {
            return analyzer;
        }
        return ft.getTextSearchInfo().getSearchAnalyzer();
    }

    /**
     * Rethrow the runtime exception, unless the lenient flag has been set, returns {@link MatchNoDocsQuery}
     */
    private Query rethrowUnlessLenient(RuntimeException e) {
        if (settings.lenient()) {
            return Queries.newMatchNoDocsQuery("failed query, caused by " + e.getMessage());
        }
        throw e;
    }

    @Override
    public void setDefaultOperator(BooleanClause.Occur operator) {
        super.setDefaultOperator(operator);
        queryBuilder.setOccur(operator);
    }

    @Override
    protected Query newTermQuery(Term term, float boost) {
        MappedFieldType ft = context.fieldMapper(term.field());
        if (ft == null) {
            return newUnmappedFieldQuery(term.field());
        }
        return ft.termQuery(term.bytes(), context);
    }

    @Override
    public Query newDefaultQuery(String text) {
        try {
            return queryBuilder.parse(MultiMatchQueryBuilder.Type.MOST_FIELDS, weights, text, null);
        } catch (IOException e) {
            return rethrowUnlessLenient(new IllegalStateException(e.getMessage()));
        }
    }

    @Override
    public Query newFuzzyQuery(String text, int fuzziness) {
        List disjuncts = new ArrayList<>();
        for (Map.Entry entry : weights.entrySet()) {
            final String fieldName = entry.getKey();
            final MappedFieldType ft = context.fieldMapper(fieldName);
            if (ft == null) {
                disjuncts.add(newUnmappedFieldQuery(fieldName));
                continue;
            }
            try {
                final BytesRef term = getAnalyzer(ft).normalize(fieldName, text);
                Query query = ft.fuzzyQuery(
                    term,
                    Fuzziness.fromEdits(fuzziness),
                    settings.fuzzyPrefixLength,
                    settings.fuzzyMaxExpansions,
                    settings.fuzzyTranspositions,
                    context
                );
                disjuncts.add(wrapWithBoost(query, entry.getValue()));
            } catch (RuntimeException e) {
                disjuncts.add(rethrowUnlessLenient(e));
            }
        }
        if (disjuncts.size() == 1) {
            return disjuncts.get(0);
        }
        return new DisjunctionMaxQuery(disjuncts, 1.0f);
    }

    @Override
    public Query newPhraseQuery(String text, int slop) {
        try {
            queryBuilder.setPhraseSlop(slop);
            Map phraseWeights;
            if (settings.quoteFieldSuffix() != null) {
                phraseWeights = QueryParserHelper.resolveMappingFields(context, weights, settings.quoteFieldSuffix());
            } else {
                phraseWeights = weights;
            }
            return queryBuilder.parse(MultiMatchQueryBuilder.Type.PHRASE, phraseWeights, text, null);
        } catch (IOException e) {
            return rethrowUnlessLenient(new IllegalStateException(e.getMessage()));
        } finally {
            queryBuilder.setPhraseSlop(0);
        }
    }

    @Override
    public Query newPrefixQuery(String text) {
        List disjuncts = new ArrayList<>();
        for (Map.Entry entry : weights.entrySet()) {
            final String fieldName = entry.getKey();
            final MappedFieldType ft = context.fieldMapper(fieldName);
            if (ft == null) {
                disjuncts.add(newUnmappedFieldQuery(fieldName));
                continue;
            }
            try {
                if (settings.analyzeWildcard()) {
                    Query analyzedQuery = newPossiblyAnalyzedQuery(fieldName, text, getAnalyzer(ft));
                    if (analyzedQuery != null) {
                        disjuncts.add(wrapWithBoost(analyzedQuery, entry.getValue()));
                    }
                } else {
                    BytesRef term = getAnalyzer(ft).normalize(fieldName, text);
                    Query query = ft.prefixQuery(term.utf8ToString(), null, context);
                    disjuncts.add(wrapWithBoost(query, entry.getValue()));
                }
            } catch (RuntimeException e) {
                disjuncts.add(rethrowUnlessLenient(e));
            }
        }
        if (disjuncts.size() == 1) {
            return disjuncts.get(0);
        }
        return new DisjunctionMaxQuery(disjuncts, 1.0f);
    }

    private static Query wrapWithBoost(Query query, float boost) {
        if (query instanceof MatchNoDocsQuery) {
            return query;
        }
        if (boost != AbstractQueryBuilder.DEFAULT_BOOST) {
            return new BoostQuery(query, boost);
        }
        return query;
    }

    /**
     * Analyze the given string using its analyzer, constructing either a
     * {@code PrefixQuery} or a {@code BooleanQuery} made up
     * of {@code TermQuery}s and {@code PrefixQuery}s
     */
    private Query newPossiblyAnalyzedQuery(String field, String termStr, Analyzer analyzer) {
        List> tlist = new ArrayList<>();
        try (TokenStream source = analyzer.tokenStream(field, termStr)) {
            source.reset();
            List currentPos = new ArrayList<>();
            CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
            PositionIncrementAttribute posAtt = source.addAttribute(PositionIncrementAttribute.class);

            try {
                boolean hasMoreTokens = source.incrementToken();
                while (hasMoreTokens) {
                    if (currentPos.isEmpty() == false && posAtt.getPositionIncrement() > 0) {
                        tlist.add(currentPos);
                        currentPos = new ArrayList<>();
                    }
                    final BytesRef term = analyzer.normalize(field, termAtt.toString());
                    currentPos.add(term);
                    hasMoreTokens = source.incrementToken();
                }
                if (currentPos.isEmpty() == false) {
                    tlist.add(currentPos);
                }
            } catch (IOException e) {
                // ignore
                // TODO: we should not ignore the exception and return a prefix query with the original term ?
            }
        } catch (IOException e) {
            // Bail on any exceptions, going with a regular prefix query
            return new PrefixQuery(new Term(field, termStr));
        }

        if (tlist.size() == 0) {
            return null;
        }

        if (tlist.size() == 1 && tlist.get(0).size() == 1) {
            return new PrefixQuery(new Term(field, tlist.get(0).get(0)));
        }

        // build a boolean query with prefix on the last position only.
        BooleanQuery.Builder builder = new BooleanQuery.Builder();
        for (int pos = 0; pos < tlist.size(); pos++) {
            List plist = tlist.get(pos);
            boolean isLastPos = (pos == tlist.size() - 1);
            Query posQuery;
            if (plist.size() == 1) {
                if (isLastPos) {
                    posQuery = new PrefixQuery(new Term(field, plist.get(0)));
                } else {
                    posQuery = newTermQuery(new Term(field, plist.get(0)), BoostAttribute.DEFAULT_BOOST);
                }
            } else if (isLastPos == false) {
                // build a synonym query for terms in the same position.
                SynonymQuery.Builder sb = new SynonymQuery.Builder(field);
                for (BytesRef bytesRef : plist) {
                    sb.addTerm(new Term(field, bytesRef));

                }
                posQuery = sb.build();
            } else {
                BooleanQuery.Builder innerBuilder = new BooleanQuery.Builder();
                for (BytesRef token : plist) {
                    innerBuilder.add(new BooleanClause(new PrefixQuery(new Term(field, token)), BooleanClause.Occur.SHOULD));
                }
                posQuery = innerBuilder.build();
            }
            builder.add(new BooleanClause(posQuery, getDefaultOperator()));
        }
        return builder.build();
    }

    /**
     * Class encapsulating the settings for the SimpleQueryString query, with
     * their default values
     *
     * @opensearch.internal
     */
    public static class Settings {
        /** Specifies whether lenient query parsing should be used. */
        private boolean lenient = SimpleQueryStringBuilder.DEFAULT_LENIENT;
        /** Specifies whether wildcards should be analyzed. */
        private boolean analyzeWildcard = SimpleQueryStringBuilder.DEFAULT_ANALYZE_WILDCARD;
        /** Specifies a suffix, if any, to apply to field names for phrase matching. */
        private String quoteFieldSuffix = null;
        /** Whether phrase queries should be automatically generated for multi terms synonyms. */
        private boolean autoGenerateSynonymsPhraseQuery = true;
        /** Prefix length in fuzzy queries.*/
        private int fuzzyPrefixLength = SimpleQueryStringBuilder.DEFAULT_FUZZY_PREFIX_LENGTH;
        /** The number of terms fuzzy queries will expand to.*/
        private int fuzzyMaxExpansions = SimpleQueryStringBuilder.DEFAULT_FUZZY_MAX_EXPANSIONS;
        /** Whether transpositions are supported in fuzzy queries.*/
        private boolean fuzzyTranspositions = SimpleQueryStringBuilder.DEFAULT_FUZZY_TRANSPOSITIONS;

        /**
         * Generates default {@link Settings} object (uses ROOT locale, does
         * lowercase terms, no lenient parsing, no wildcard analysis).
         * */
        public Settings() {}

        public Settings(Settings other) {
            this.lenient = other.lenient;
            this.analyzeWildcard = other.analyzeWildcard;
            this.quoteFieldSuffix = other.quoteFieldSuffix;
            this.autoGenerateSynonymsPhraseQuery = other.autoGenerateSynonymsPhraseQuery;
            this.fuzzyPrefixLength = other.fuzzyPrefixLength;
            this.fuzzyMaxExpansions = other.fuzzyMaxExpansions;
            this.fuzzyTranspositions = other.fuzzyTranspositions;
        }

        /** Specifies whether to use lenient parsing, defaults to false. */
        public void lenient(boolean lenient) {
            this.lenient = lenient;
        }

        /** Returns whether to use lenient parsing. */
        public boolean lenient() {
            return this.lenient;
        }

        /** Specifies whether to analyze wildcards. Defaults to false if unset. */
        public void analyzeWildcard(boolean analyzeWildcard) {
            this.analyzeWildcard = analyzeWildcard;
        }

        /** Returns whether to analyze wildcards. */
        public boolean analyzeWildcard() {
            return analyzeWildcard;
        }

        /**
         * Set the suffix to append to field names for phrase matching.
         */
        public void quoteFieldSuffix(String suffix) {
            this.quoteFieldSuffix = suffix;
        }

        /**
         * Return the suffix to append for phrase matching, or {@code null} if
         * no suffix should be appended.
         */
        public String quoteFieldSuffix() {
            return quoteFieldSuffix;
        }

        public void autoGenerateSynonymsPhraseQuery(boolean value) {
            this.autoGenerateSynonymsPhraseQuery = value;
        }

        /**
         * Whether phrase queries should be automatically generated for multi terms synonyms.
         * Defaults to {@code true}.
         */
        public boolean autoGenerateSynonymsPhraseQuery() {
            return autoGenerateSynonymsPhraseQuery;
        }

        public int fuzzyPrefixLength() {
            return fuzzyPrefixLength;
        }

        public void fuzzyPrefixLength(int fuzzyPrefixLength) {
            this.fuzzyPrefixLength = fuzzyPrefixLength;
        }

        public int fuzzyMaxExpansions() {
            return fuzzyMaxExpansions;
        }

        public void fuzzyMaxExpansions(int fuzzyMaxExpansions) {
            this.fuzzyMaxExpansions = fuzzyMaxExpansions;
        }

        public boolean fuzzyTranspositions() {
            return fuzzyTranspositions;
        }

        public void fuzzyTranspositions(boolean fuzzyTranspositions) {
            this.fuzzyTranspositions = fuzzyTranspositions;
        }

        @Override
        public int hashCode() {
            return Objects.hash(
                lenient,
                analyzeWildcard,
                quoteFieldSuffix,
                autoGenerateSynonymsPhraseQuery,
                fuzzyPrefixLength,
                fuzzyMaxExpansions,
                fuzzyTranspositions
            );
        }

        @Override
        public boolean equals(Object obj) {
            if (this == obj) {
                return true;
            }
            if (obj == null || getClass() != obj.getClass()) {
                return false;
            }
            Settings other = (Settings) obj;
            return Objects.equals(lenient, other.lenient)
                && Objects.equals(analyzeWildcard, other.analyzeWildcard)
                && Objects.equals(quoteFieldSuffix, other.quoteFieldSuffix)
                && Objects.equals(autoGenerateSynonymsPhraseQuery, other.autoGenerateSynonymsPhraseQuery)
                && Objects.equals(fuzzyPrefixLength, other.fuzzyPrefixLength)
                && Objects.equals(fuzzyMaxExpansions, other.fuzzyMaxExpansions)
                && Objects.equals(fuzzyTranspositions, other.fuzzyTranspositions);
        }
    }
}