All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.opensearch.index.search.MatchQuery Maven / Gradle / Ivy

There is a newer version: 2.18.0
Show newest version
/*
 * SPDX-License-Identifier: Apache-2.0
 *
 * The OpenSearch Contributors require contributions made to
 * this file be licensed under the Apache-2.0 license or a
 * compatible open source license.
 */

/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

/*
 * Modifications Copyright OpenSearch Contributors. See
 * GitHub history for details.
 */

package org.opensearch.index.search;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.CachingTokenFilter;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.miscellaneous.DisableGraphAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.analysis.tokenattributes.TermToBytesRefAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
import org.apache.lucene.queries.spans.SpanMultiTermQueryWrapper;
import org.apache.lucene.queries.spans.SpanNearQuery;
import org.apache.lucene.queries.spans.SpanOrQuery;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.queries.spans.SpanTermQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostAttribute;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.util.QueryBuilder;
import org.apache.lucene.util.graph.GraphTokenStreamFiniteStrings;
import org.opensearch.OpenSearchException;
import org.opensearch.common.lucene.Lucene;
import org.opensearch.common.lucene.search.Queries;
import org.opensearch.common.lucene.search.SpanBooleanQueryRewriteWithMaxClause;
import org.opensearch.common.unit.Fuzziness;
import org.opensearch.core.common.io.stream.StreamInput;
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.common.io.stream.Writeable;
import org.opensearch.index.mapper.KeywordFieldMapper;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.MatchOnlyTextFieldMapper;
import org.opensearch.index.mapper.TextFieldMapper;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.index.query.support.QueryParsers;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Set;
import java.util.function.Supplier;

import static org.opensearch.common.lucene.search.Queries.newLenientFieldQuery;
import static org.opensearch.common.lucene.search.Queries.newUnmappedFieldQuery;

/**
 * Foundation match query
 *
 * @opensearch.internal
 */
public class MatchQuery {

    /**
     * Type of the match
     *
     * @opensearch.internal
     */
    public enum Type implements Writeable {
        /**
         * The text is analyzed and terms are added to a boolean query.
         */
        BOOLEAN(0),
        /**
         * The text is analyzed and used as a phrase query.
         */
        PHRASE(1),
        /**
         * The text is analyzed and used in a phrase query, with the last term acting as a prefix.
         */
        PHRASE_PREFIX(2),
        /**
         * The text is analyzed, terms are added to a boolean query with the last term acting as a prefix.
         */
        BOOLEAN_PREFIX(3);

        private final int ordinal;

        Type(int ordinal) {
            this.ordinal = ordinal;
        }

        public static Type readFromStream(StreamInput in) throws IOException {
            int ord = in.readVInt();
            for (Type type : Type.values()) {
                if (type.ordinal == ord) {
                    return type;
                }
            }
            throw new OpenSearchException("unknown serialized type [" + ord + "]");
        }

        @Override
        public void writeTo(StreamOutput out) throws IOException {
            out.writeVInt(this.ordinal);
        }
    }

    /**
     * Query with zero terms
     *
     * @opensearch.internal
     */
    public enum ZeroTermsQuery implements Writeable {
        NONE(0),
        ALL(1),
        // this is used internally to make sure that query_string and simple_query_string
        // ignores query part that removes all tokens.
        NULL(2);

        private final int ordinal;

        ZeroTermsQuery(int ordinal) {
            this.ordinal = ordinal;
        }

        public static ZeroTermsQuery readFromStream(StreamInput in) throws IOException {
            int ord = in.readVInt();
            for (ZeroTermsQuery zeroTermsQuery : ZeroTermsQuery.values()) {
                if (zeroTermsQuery.ordinal == ord) {
                    return zeroTermsQuery;
                }
            }
            throw new OpenSearchException("unknown serialized type [" + ord + "]");
        }

        @Override
        public void writeTo(StreamOutput out) throws IOException {
            out.writeVInt(this.ordinal);
        }
    }

    public static final int DEFAULT_PHRASE_SLOP = 0;

    public static final boolean DEFAULT_LENIENCY = false;

    public static final ZeroTermsQuery DEFAULT_ZERO_TERMS_QUERY = ZeroTermsQuery.NONE;

    protected final QueryShardContext context;

    protected Analyzer analyzer;

    protected BooleanClause.Occur occur = BooleanClause.Occur.SHOULD;

    protected boolean enablePositionIncrements = true;

    protected int phraseSlop = DEFAULT_PHRASE_SLOP;

    protected Fuzziness fuzziness = null;

    protected int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;

    protected int maxExpansions = FuzzyQuery.defaultMaxExpansions;

    protected SpanMultiTermQueryWrapper.SpanRewriteMethod spanRewriteMethod = new SpanBooleanQueryRewriteWithMaxClause(
        FuzzyQuery.defaultMaxExpansions,
        false
    );

    protected boolean transpositions = FuzzyQuery.defaultTranspositions;

    protected MultiTermQuery.RewriteMethod fuzzyRewriteMethod;

    protected boolean lenient = DEFAULT_LENIENCY;

    protected ZeroTermsQuery zeroTermsQuery = DEFAULT_ZERO_TERMS_QUERY;

    protected Float commonTermsCutoff = null;

    protected boolean autoGenerateSynonymsPhraseQuery = true;

    public MatchQuery(QueryShardContext context) {
        this.context = context;
    }

    public void setAnalyzer(String analyzerName) {
        this.analyzer = context.getMapperService().getIndexAnalyzers().get(analyzerName);
        if (analyzer == null) {
            throw new IllegalArgumentException("No analyzer found for [" + analyzerName + "]");
        }
    }

    public void setAnalyzer(Analyzer analyzer) {
        this.analyzer = analyzer;
    }

    public void setOccur(BooleanClause.Occur occur) {
        this.occur = occur;
    }

    /**
     * @deprecated See {@link MatchQueryBuilder#setCommonTermsCutoff(Float)} for more details
     */
    @Deprecated
    public void setCommonTermsCutoff(Float cutoff) {
        this.commonTermsCutoff = cutoff;
    }

    public void setEnablePositionIncrements(boolean enablePositionIncrements) {
        this.enablePositionIncrements = enablePositionIncrements;
    }

    public void setPhraseSlop(int phraseSlop) {
        this.phraseSlop = phraseSlop;
    }

    public void setFuzziness(Fuzziness fuzziness) {
        this.fuzziness = fuzziness;
    }

    public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
        this.fuzzyPrefixLength = fuzzyPrefixLength;
    }

    public void setMaxExpansions(int maxExpansions) {
        this.maxExpansions = maxExpansions;
        this.spanRewriteMethod = new SpanBooleanQueryRewriteWithMaxClause(maxExpansions, false);
    }

    public void setTranspositions(boolean transpositions) {
        this.transpositions = transpositions;
    }

    public void setFuzzyRewriteMethod(MultiTermQuery.RewriteMethod fuzzyRewriteMethod) {
        this.fuzzyRewriteMethod = fuzzyRewriteMethod;
    }

    public void setLenient(boolean lenient) {
        this.lenient = lenient;
    }

    public void setZeroTermsQuery(ZeroTermsQuery zeroTermsQuery) {
        this.zeroTermsQuery = zeroTermsQuery;
    }

    public void setAutoGenerateSynonymsPhraseQuery(boolean enabled) {
        this.autoGenerateSynonymsPhraseQuery = enabled;
    }

    public Query parse(Type type, String fieldName, Object value) throws IOException {
        final MappedFieldType fieldType = context.fieldMapper(fieldName);
        if (fieldType == null) {
            return newUnmappedFieldQuery(fieldName);
        }
        Set fields = context.simpleMatchToIndexNames(fieldName);
        if (fields.contains(fieldName)) {
            assert fields.size() == 1;
            // this field is a concrete field or an alias so we use the
            // field type name directly
            fieldName = fieldType.name();
        }

        Analyzer analyzer = getAnalyzer(fieldType, type == Type.PHRASE || type == Type.PHRASE_PREFIX);
        assert analyzer != null;

        MatchQueryBuilder builder = new MatchQueryBuilder(analyzer, fieldType, enablePositionIncrements, autoGenerateSynonymsPhraseQuery);

        /*
         * If a keyword analyzer is used, we know that further analysis isn't
         * needed and can immediately return a term query. If the query is a bool
         * prefix query and the field type supports prefix queries, we return
         * a prefix query instead
         */
        if (analyzer == Lucene.KEYWORD_ANALYZER && type != Type.PHRASE_PREFIX) {
            final Term term = new Term(fieldName, value.toString());
            if (type == Type.BOOLEAN_PREFIX
                && (fieldType instanceof TextFieldMapper.TextFieldType || fieldType instanceof KeywordFieldMapper.KeywordFieldType)) {
                return builder.newPrefixQuery(term);
            } else {
                return builder.newTermQuery(term, BoostAttribute.DEFAULT_BOOST);
            }
        }

        return parseInternal(type, fieldName, builder, value);
    }

    protected final Query parseInternal(Type type, String fieldName, MatchQueryBuilder builder, Object value) throws IOException {
        final Query query;
        switch (type) {
            case BOOLEAN:
                if (commonTermsCutoff == null) {
                    query = builder.createBooleanQuery(fieldName, value.toString(), occur);
                } else {
                    query = createCommonTermsQuery(builder, fieldName, value.toString(), occur, occur, commonTermsCutoff);
                }
                break;

            case BOOLEAN_PREFIX:
                query = builder.createBooleanPrefixQuery(fieldName, value.toString(), occur);
                break;

            case PHRASE:
                query = builder.createPhraseQuery(fieldName, value.toString(), phraseSlop);
                break;

            case PHRASE_PREFIX:
                query = builder.createPhrasePrefixQuery(fieldName, value.toString(), phraseSlop);
                break;

            default:
                throw new IllegalStateException("No type found for [" + type + "]");
        }

        return query == null ? zeroTermsQuery() : query;
    }

    private Query createCommonTermsQuery(
        MatchQueryBuilder builder,
        String field,
        String queryText,
        Occur highFreqOccur,
        Occur lowFreqOccur,
        float maxTermFrequency
    ) {
        Query booleanQuery = builder.createBooleanQuery(field, queryText, lowFreqOccur);
        if (booleanQuery != null && booleanQuery instanceof BooleanQuery) {
            BooleanQuery bq = (BooleanQuery) booleanQuery;
            return boolToExtendedCommonTermsQuery(bq, highFreqOccur, lowFreqOccur, maxTermFrequency);
        }
        return booleanQuery;
    }

    private Query boolToExtendedCommonTermsQuery(BooleanQuery bq, Occur highFreqOccur, Occur lowFreqOccur, float maxTermFrequency) {
        ExtendedCommonTermsQuery query = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency);
        for (BooleanClause clause : bq.clauses()) {
            if ((clause.getQuery() instanceof TermQuery) == false) {
                return bq;
            }
            query.add(((TermQuery) clause.getQuery()).getTerm());
        }
        return query;
    }

    protected Analyzer getAnalyzer(MappedFieldType fieldType, boolean quoted) {
        if (analyzer == null) {
            return quoted ? context.getSearchQuoteAnalyzer(fieldType) : context.getSearchAnalyzer(fieldType);
        } else {
            return analyzer;
        }
    }

    protected Query zeroTermsQuery() {
        switch (zeroTermsQuery) {
            case NULL:
                return null;
            case NONE:
                return Queries.newMatchNoDocsQuery("Matching no documents because no terms present");
            case ALL:
                return Queries.newMatchAllQuery();
            default:
                throw new IllegalStateException("unknown zeroTermsQuery " + zeroTermsQuery);
        }
    }

    class MatchQueryBuilder extends QueryBuilder {
        private final MappedFieldType fieldType;

        /**
         * Creates a new QueryBuilder using the given analyzer.
         */
        MatchQueryBuilder(
            Analyzer analyzer,
            MappedFieldType fieldType,
            boolean enablePositionIncrements,
            boolean autoGenerateSynonymsPhraseQuery
        ) {
            super(analyzer);
            this.fieldType = fieldType;
            setEnablePositionIncrements(enablePositionIncrements);
            if (fieldType.getTextSearchInfo().hasPositions()) {
                setAutoGenerateMultiTermSynonymsPhraseQuery(autoGenerateSynonymsPhraseQuery);
            } else {
                setAutoGenerateMultiTermSynonymsPhraseQuery(false);
            }
        }

        @Override
        protected Query createFieldQuery(
            Analyzer analyzer,
            BooleanClause.Occur operator,
            String field,
            String queryText,
            boolean quoted,
            int slop
        ) {
            assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;
            Type type = quoted ? Type.PHRASE : Type.BOOLEAN;
            return createQuery(field, queryText, type, operator, slop);
        }

        /**
         * Creates a phrase prefix query from the query text.
         *
         * @param field field name
         * @param queryText text to be passed to the analyzer
         * @return {@code PrefixQuery}, {@code MultiPhrasePrefixQuery}, based on the analysis of {@code queryText}
         */
        protected Query createPhrasePrefixQuery(String field, String queryText, int slop) {
            return createQuery(field, queryText, Type.PHRASE_PREFIX, occur, slop);
        }

        /**
         * Creates a boolean prefix query from the query text.
         *
         * @param field field name
         * @param queryText text to be passed to the analyzer
         * @return {@code PrefixQuery}, {@code BooleanQuery}, based on the analysis of {@code queryText}
         */
        protected Query createBooleanPrefixQuery(String field, String queryText, BooleanClause.Occur occur) {
            return createQuery(field, queryText, Type.BOOLEAN_PREFIX, occur, 0);
        }

        private Query createFieldQuery(TokenStream source, Type type, BooleanClause.Occur operator, String field, int phraseSlop) {
            assert operator == BooleanClause.Occur.SHOULD || operator == BooleanClause.Occur.MUST;

            // Build an appropriate query based on the analysis chain.
            try (CachingTokenFilter stream = new CachingTokenFilter(source)) {

                TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
                PositionIncrementAttribute posIncAtt = stream.addAttribute(PositionIncrementAttribute.class);
                PositionLengthAttribute posLenAtt = stream.addAttribute(PositionLengthAttribute.class);

                if (termAtt == null) {
                    return null;
                }

                // phase 1: read through the stream and assess the situation:
                // counting the number of tokens/positions and marking if we have any synonyms.

                int numTokens = 0;
                int positionCount = 0;
                boolean hasSynonyms = false;
                boolean isGraph = false;

                stream.reset();
                while (stream.incrementToken()) {
                    numTokens++;
                    int positionIncrement = posIncAtt.getPositionIncrement();
                    if (positionIncrement != 0) {
                        positionCount += positionIncrement;
                    } else {
                        hasSynonyms = true;
                    }

                    int positionLength = posLenAtt.getPositionLength();
                    if (enableGraphQueries && positionLength > 1) {
                        isGraph = true;
                    }
                }

                // phase 2: based on token count, presence of synonyms, and options
                // formulate a single term, boolean, or phrase.
                if (numTokens == 0) {
                    return null;
                } else if (numTokens == 1) {
                    // single term
                    if (type == Type.PHRASE_PREFIX) {
                        return analyzePhrasePrefix(field, stream, phraseSlop, positionCount);
                    } else {
                        return analyzeTerm(field, stream, type == Type.BOOLEAN_PREFIX);
                    }
                } else if (isGraph) {
                    // graph
                    if (type == Type.PHRASE || type == Type.PHRASE_PREFIX) {
                        return analyzeGraphPhrase(stream, field, type, phraseSlop);
                    } else {
                        return analyzeGraphBoolean(field, stream, operator, type == Type.BOOLEAN_PREFIX);
                    }
                } else if (type == Type.PHRASE && positionCount > 1) {
                    // phrase
                    if (hasSynonyms) {
                        // complex phrase with synonyms
                        return analyzeMultiPhrase(field, stream, phraseSlop);
                    } else {
                        // simple phrase
                        return analyzePhrase(field, stream, phraseSlop);
                    }
                } else if (type == Type.PHRASE_PREFIX) {
                    // phrase prefix
                    return analyzePhrasePrefix(field, stream, phraseSlop, positionCount);
                } else {
                    // boolean
                    if (positionCount == 1) {
                        // only one position, with synonyms
                        return analyzeBoolean(field, stream);
                    } else {
                        // complex case: multiple positions
                        return analyzeMultiBoolean(field, stream, operator, type == Type.BOOLEAN_PREFIX);
                    }
                }
            } catch (IOException e) {
                throw new RuntimeException("Error analyzing query text", e);
            }
        }

        private Query createQuery(String field, String queryText, Type type, BooleanClause.Occur operator, int phraseSlop) {
            // Use the analyzer to get all the tokens, and then build an appropriate
            // query based on the analysis chain.
            try (TokenStream source = analyzer.tokenStream(field, queryText)) {
                if (source.hasAttribute(DisableGraphAttribute.class)) {
                    /*
                     * A {@link TokenFilter} in this {@link TokenStream} disabled the graph analysis to avoid
                     * paths explosion. See {@link org.opensearch.index.analysis.ShingleTokenFilterFactory} for details.
                     */
                    setEnableGraphQueries(false);
                }
                try {
                    return createFieldQuery(source, type, operator, field, phraseSlop);
                } finally {
                    setEnableGraphQueries(true);
                }
            } catch (IOException e) {
                throw new RuntimeException("Error analyzing query text", e);
            }
        }

        private SpanQuery newSpanQuery(Term[] terms, boolean isPrefix) {
            if (terms.length == 1) {
                return isPrefix ? fieldType.spanPrefixQuery(terms[0].text(), spanRewriteMethod, context) : new SpanTermQuery(terms[0]);
            }
            SpanQuery[] spanQueries = new SpanQuery[terms.length];
            for (int i = 0; i < terms.length; i++) {
                spanQueries[i] = isPrefix
                    ? fieldType.spanPrefixQuery(terms[i].text(), spanRewriteMethod, context)
                    : new SpanTermQuery(terms[i]);
            }
            return new SpanOrQuery(spanQueries);
        }

        private SpanQuery createSpanQuery(TokenStream in, String field, boolean isPrefix) throws IOException {
            TermToBytesRefAttribute termAtt = in.getAttribute(TermToBytesRefAttribute.class);
            PositionIncrementAttribute posIncAtt = in.getAttribute(PositionIncrementAttribute.class);
            if (termAtt == null) {
                return null;
            }

            SpanNearQuery.Builder builder = new SpanNearQuery.Builder(field, true);
            Term lastTerm = null;
            while (in.incrementToken()) {
                if (posIncAtt.getPositionIncrement() > 1) {
                    builder.addGap(posIncAtt.getPositionIncrement() - 1);
                }
                if (lastTerm != null) {
                    builder.addClause(new SpanTermQuery(lastTerm));
                }
                lastTerm = new Term(field, termAtt.getBytesRef());
            }
            if (lastTerm != null) {
                SpanQuery spanQuery = isPrefix
                    ? fieldType.spanPrefixQuery(lastTerm.text(), spanRewriteMethod, context)
                    : new SpanTermQuery(lastTerm);
                builder.addClause(spanQuery);
            }
            SpanNearQuery query = builder.build();
            SpanQuery[] clauses = query.getClauses();
            if (clauses.length == 1) {
                return clauses[0];
            } else {
                return query;
            }
        }

        @Override
        protected Query newTermQuery(Term term, float boost) {
            Supplier querySupplier;
            if (fuzziness != null) {
                querySupplier = () -> {
                    Query query = fieldType.fuzzyQuery(term.text(), fuzziness, fuzzyPrefixLength, maxExpansions, transpositions, context);
                    if (query instanceof FuzzyQuery) {
                        QueryParsers.setRewriteMethod((FuzzyQuery) query, fuzzyRewriteMethod);
                    }
                    return query;
                };
            } else {
                querySupplier = () -> fieldType.termQuery(term.bytes(), context);
            }
            try {
                Query query = querySupplier.get();
                return query;
            } catch (RuntimeException e) {
                if (lenient) {
                    return newLenientFieldQuery(fieldType.name(), e);
                } else {
                    throw e;
                }
            }
        }

        /**
         * Builds a new prefix query instance.
         */
        protected Query newPrefixQuery(Term term) {
            try {
                return fieldType.prefixQuery(term.text(), null, context);
            } catch (RuntimeException e) {
                if (lenient) {
                    return newLenientFieldQuery(term.field(), e);
                }
                throw e;
            }
        }

        private Query analyzeTerm(String field, TokenStream stream, boolean isPrefix) throws IOException {
            TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
            OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);

            stream.reset();
            if (!stream.incrementToken()) {
                throw new AssertionError();
            }
            final Term term = new Term(field, termAtt.getBytesRef());
            int lastOffset = offsetAtt.endOffset();
            stream.end();
            return isPrefix && lastOffset == offsetAtt.endOffset()
                ? newPrefixQuery(term)
                : newTermQuery(term, BoostAttribute.DEFAULT_BOOST);
        }

        private void add(BooleanQuery.Builder q, String field, List current, BooleanClause.Occur operator, boolean isPrefix) {
            if (current.isEmpty()) {
                return;
            }
            if (current.size() == 1) {
                if (isPrefix) {
                    q.add(newPrefixQuery(current.get(0)), operator);
                } else {
                    q.add(newTermQuery(current.get(0), BoostAttribute.DEFAULT_BOOST), operator);
                }
            } else {
                // We don't apply prefix on synonyms
                final TermAndBoost[] termAndBoosts = current.stream()
                    .map(t -> new TermAndBoost(t.bytes(), BoostAttribute.DEFAULT_BOOST))
                    .toArray(TermAndBoost[]::new);
                q.add(newSynonymQuery(field, termAndBoosts), operator);
            }
        }

        private Query analyzeMultiBoolean(String field, TokenStream stream, BooleanClause.Occur operator, boolean isPrefix)
            throws IOException {
            BooleanQuery.Builder q = newBooleanQuery();
            List currentQuery = new ArrayList<>();

            TermToBytesRefAttribute termAtt = stream.getAttribute(TermToBytesRefAttribute.class);
            PositionIncrementAttribute posIncrAtt = stream.getAttribute(PositionIncrementAttribute.class);
            OffsetAttribute offsetAtt = stream.addAttribute(OffsetAttribute.class);

            stream.reset();
            int lastOffset = 0;
            while (stream.incrementToken()) {
                if (posIncrAtt.getPositionIncrement() != 0) {
                    add(q, field, currentQuery, operator, false);
                    currentQuery.clear();
                }
                currentQuery.add(new Term(field, termAtt.getBytesRef()));
                lastOffset = offsetAtt.endOffset();
            }
            stream.end();
            add(q, field, currentQuery, operator, isPrefix && lastOffset == offsetAtt.endOffset());
            return q.build();
        }

        @Override
        protected Query analyzePhrase(String field, TokenStream stream, int slop) throws IOException {
            try {
                checkForPositions(field);
                return fieldType.phraseQuery(stream, slop, enablePositionIncrements, context);
            } catch (IllegalArgumentException | IllegalStateException e) {
                if (lenient) {
                    return newLenientFieldQuery(field, e);
                }
                throw e;
            }
        }

        @Override
        protected Query analyzeMultiPhrase(String field, TokenStream stream, int slop) throws IOException {
            try {
                checkForPositions(field);
                return fieldType.multiPhraseQuery(stream, slop, enablePositionIncrements, context);
            } catch (IllegalArgumentException | IllegalStateException e) {
                if (lenient) {
                    return newLenientFieldQuery(field, e);
                }
                throw e;
            }
        }

        private Query analyzePhrasePrefix(String field, TokenStream stream, int slop, int positionCount) throws IOException {
            try {
                if (positionCount > 1) {
                    checkForPositions(field);
                }
                return fieldType.phrasePrefixQuery(stream, slop, maxExpansions, context);
            } catch (IllegalArgumentException | IllegalStateException e) {
                if (lenient) {
                    return newLenientFieldQuery(field, e);
                }
                throw e;
            }
        }

        private Query analyzeGraphBoolean(String field, TokenStream source, BooleanClause.Occur operator, boolean isPrefix)
            throws IOException {
            source.reset();
            GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source);
            BooleanQuery.Builder builder = new BooleanQuery.Builder();
            int[] articulationPoints = graph.articulationPoints();
            int lastState = 0;
            for (int i = 0; i <= articulationPoints.length; i++) {
                int start = lastState;
                int end = -1;
                if (i < articulationPoints.length) {
                    end = articulationPoints[i];
                }
                lastState = end;
                final Query queryPos;
                boolean usePrefix = isPrefix && end == -1;
                /*
                  check if the GraphTokenStreamFiniteStrings graph is empty
                  return empty BooleanQuery result
                 */
                Iterator graphIt = graph.getFiniteStrings();
                if (!graphIt.hasNext()) {
                    return builder.build();
                }
                if (graph.hasSidePath(start)) {
                    final Iterator it = graph.getFiniteStrings(start, end);
                    Iterator queries = new Iterator() {
                        @Override
                        public boolean hasNext() {
                            return it.hasNext();
                        }

                        @Override
                        public Query next() {
                            TokenStream ts = it.next();
                            final Type type;
                            if (getAutoGenerateMultiTermSynonymsPhraseQuery()) {
                                type = usePrefix ? Type.PHRASE_PREFIX : Type.PHRASE;
                            } else {
                                type = Type.BOOLEAN;
                            }
                            return createFieldQuery(ts, type, BooleanClause.Occur.MUST, field, 0);
                        }
                    };
                    queryPos = newGraphSynonymQuery(queries);
                } else {
                    Term[] terms = graph.getTerms(field, start);
                    assert terms.length > 0;
                    if (terms.length == 1) {
                        queryPos = usePrefix ? newPrefixQuery(terms[0]) : newTermQuery(terms[0], BoostAttribute.DEFAULT_BOOST);
                    } else {
                        // We don't apply prefix on synonyms
                        final TermAndBoost[] termAndBoosts = Arrays.stream(terms)
                            .map(t -> new TermAndBoost(t.bytes(), BoostAttribute.DEFAULT_BOOST))
                            .toArray(TermAndBoost[]::new);
                        queryPos = newSynonymQuery(field, termAndBoosts);
                    }
                }
                if (queryPos != null) {
                    builder.add(queryPos, operator);
                }
            }
            return builder.build();
        }

        private Query analyzeGraphPhrase(TokenStream source, String field, Type type, int slop) throws IOException {
            assert type == Type.PHRASE_PREFIX || type == Type.PHRASE;

            source.reset();
            GraphTokenStreamFiniteStrings graph = new GraphTokenStreamFiniteStrings(source);
            if (phraseSlop > 0) {
                /*
                 * Creates a boolean query from the graph token stream by extracting all the finite strings from the graph
                 * and using them to create phrase queries with the appropriate slop.
                 */
                BooleanQuery.Builder builder = new BooleanQuery.Builder();
                Iterator it = graph.getFiniteStrings();
                while (it.hasNext()) {
                    Query query = createFieldQuery(it.next(), type, BooleanClause.Occur.MUST, field, slop);
                    if (query != null) {
                        builder.add(query, BooleanClause.Occur.SHOULD);
                    }
                }
                return builder.build();
            }

            /*
             * Creates a span near (phrase) query from a graph token stream.
             * The articulation points of the graph are visited in order and the queries
             * created at each point are merged in the returned near query.
             */
            List clauses = new ArrayList<>();
            int[] articulationPoints = graph.articulationPoints();
            int lastState = 0;
            int maxClauseCount = BooleanQuery.getMaxClauseCount();
            for (int i = 0; i <= articulationPoints.length; i++) {
                int start = lastState;
                int end = -1;
                if (i < articulationPoints.length) {
                    end = articulationPoints[i];
                }
                lastState = end;
                final SpanQuery queryPos;
                boolean usePrefix = end == -1 && type == Type.PHRASE_PREFIX;
                if (graph.hasSidePath(start)) {
                    List queries = new ArrayList<>();
                    Iterator it = graph.getFiniteStrings(start, end);
                    while (it.hasNext()) {
                        TokenStream ts = it.next();
                        SpanQuery q = createSpanQuery(ts, field, usePrefix);
                        if (q != null) {
                            if (queries.size() >= maxClauseCount) {
                                throw new BooleanQuery.TooManyClauses();
                            }
                            queries.add(q);
                        }
                    }
                    if (queries.size() > 0) {
                        queryPos = new SpanOrQuery(queries.toArray(new SpanQuery[0]));
                    } else {
                        queryPos = null;
                    }
                } else {
                    Term[] terms = graph.getTerms(field, start);
                    assert terms.length > 0;
                    if (terms.length >= maxClauseCount) {
                        throw new BooleanQuery.TooManyClauses();
                    }
                    queryPos = newSpanQuery(terms, usePrefix);
                }

                if (queryPos != null) {
                    if (clauses.size() >= maxClauseCount) {
                        throw new BooleanQuery.TooManyClauses();
                    }
                    clauses.add(queryPos);
                }
            }

            if (clauses.isEmpty()) {
                return null;
            } else if (clauses.size() == 1) {
                return clauses.get(0);
            } else {
                return new SpanNearQuery(clauses.toArray(new SpanQuery[0]), 0, true);
            }
        }

        private void checkForPositions(String field) {
            if (fieldType.getTextSearchInfo().hasPositions() == false) {
                if (fieldType instanceof MatchOnlyTextFieldMapper.MatchOnlyTextFieldType) {
                    return;
                }
                throw new IllegalStateException("field:[" + field + "] was indexed without position data; cannot run PhraseQuery");
            }
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy