All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.index.query.CommonTermsQueryParser Maven / Gradle / Ivy

There is a newer version: 8.16.0
Show newest version
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.index.query;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.ExtendedCommonTermsQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRefBuilder;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.MappedFieldType;

import java.io.IOException;

/**
 *
 */
public class CommonTermsQueryParser implements QueryParser {

    public static final String NAME = "common";

    static final float DEFAULT_MAX_TERM_DOC_FREQ = 0.01f;

    static final Occur DEFAULT_HIGH_FREQ_OCCUR = Occur.SHOULD;

    static final Occur DEFAULT_LOW_FREQ_OCCUR = Occur.SHOULD;

    static final boolean DEFAULT_DISABLE_COORD = true;


    @Inject
    public CommonTermsQueryParser() {
    }

    @Override
    public String[] names() {
        return new String[] { NAME };
    }

    @Override
    public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
        XContentParser parser = parseContext.parser();
        XContentParser.Token token = parser.nextToken();
        if (token != XContentParser.Token.FIELD_NAME) {
            throw new QueryParsingException(parseContext, "[common] query malformed, no field");
        }
        String fieldName = parser.currentName();
        Object value = null;
        float boost = 1.0f;
        String queryAnalyzer = null;
        String lowFreqMinimumShouldMatch = null;
        String highFreqMinimumShouldMatch = null;
        boolean disableCoord = DEFAULT_DISABLE_COORD;
        Occur highFreqOccur = DEFAULT_HIGH_FREQ_OCCUR;
        Occur lowFreqOccur = DEFAULT_LOW_FREQ_OCCUR;
        float maxTermFrequency = DEFAULT_MAX_TERM_DOC_FREQ;
        String queryName = null;
        token = parser.nextToken();
        if (token == XContentParser.Token.START_OBJECT) {
            String currentFieldName = null;
            while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
                if (token == XContentParser.Token.FIELD_NAME) {
                    currentFieldName = parser.currentName();
                } else if (token == XContentParser.Token.START_OBJECT) {
                    if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
                        String innerFieldName = null;
                        while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
                            if (token == XContentParser.Token.FIELD_NAME) {
                                innerFieldName = parser.currentName();
                            } else if (token.isValue()) {
                                if ("low_freq".equals(innerFieldName) || "lowFreq".equals(innerFieldName)) {
                                    lowFreqMinimumShouldMatch = parser.text();
                                } else if ("high_freq".equals(innerFieldName) || "highFreq".equals(innerFieldName)) {
                                    highFreqMinimumShouldMatch = parser.text();
                                } else {
                                    throw new QueryParsingException(parseContext, "[common] query does not support [" + innerFieldName
                                            + "] for [" + currentFieldName + "]");
                                }
                            }
                        }
                    } else {
                        throw new QueryParsingException(parseContext, "[common] query does not support [" + currentFieldName + "]");
                    }
                } else if (token.isValue()) {
                    if ("query".equals(currentFieldName)) {
                        value = parser.objectText();
                    } else if ("analyzer".equals(currentFieldName)) {
                        String analyzer = parser.text();
                        if (parseContext.analysisService().analyzer(analyzer) == null) {
                            throw new QueryParsingException(parseContext, "[common] analyzer [" + parser.text() + "] not found");
                        }
                        queryAnalyzer = analyzer;
                    } else if ("disable_coord".equals(currentFieldName) || "disableCoord".equals(currentFieldName)) {
                        disableCoord = parser.booleanValue();
                    } else if ("boost".equals(currentFieldName)) {
                        boost = parser.floatValue();
                    } else if ("high_freq_operator".equals(currentFieldName) || "highFreqOperator".equals(currentFieldName)) {
                        String op = parser.text();
                        if ("or".equalsIgnoreCase(op)) {
                            highFreqOccur = BooleanClause.Occur.SHOULD;
                        } else if ("and".equalsIgnoreCase(op)) {
                            highFreqOccur = BooleanClause.Occur.MUST;
                        } else {
                            throw new QueryParsingException(parseContext,
                                    "[common] query requires operator to be either 'and' or 'or', not [" + op + "]");
                        }
                    } else if ("low_freq_operator".equals(currentFieldName) || "lowFreqOperator".equals(currentFieldName)) {
                        String op = parser.text();
                        if ("or".equalsIgnoreCase(op)) {
                            lowFreqOccur = BooleanClause.Occur.SHOULD;
                        } else if ("and".equalsIgnoreCase(op)) {
                            lowFreqOccur = BooleanClause.Occur.MUST;
                        } else {
                            throw new QueryParsingException(parseContext,
                                    "[common] query requires operator to be either 'and' or 'or', not [" + op + "]");
                        }
                    } else if ("minimum_should_match".equals(currentFieldName) || "minimumShouldMatch".equals(currentFieldName)) {
                        lowFreqMinimumShouldMatch = parser.text();
                    } else if ("cutoff_frequency".equals(currentFieldName)) {
                        maxTermFrequency = parser.floatValue();
                    } else if ("_name".equals(currentFieldName)) {
                        queryName = parser.text();
                    } else {
                        throw new QueryParsingException(parseContext, "[common] query does not support [" + currentFieldName + "]");
                    }
                }
            }
            parser.nextToken();
        } else {
            value = parser.objectText();
            // move to the next token
            token = parser.nextToken();
            if (token != XContentParser.Token.END_OBJECT) {
                throw new QueryParsingException(
                        parseContext,
                        "[common] query parsed in simplified form, with direct field name, but included more options than just the field name, possibly use its 'options' form, with 'query' element?");
            }
        }

        if (value == null) {
            throw new QueryParsingException(parseContext, "No text specified for text query");
        }
        String field;
        MappedFieldType fieldType = parseContext.fieldMapper(fieldName);
        if (fieldType != null) {
            field = fieldType.names().indexName();
        } else {
            field = fieldName;
        }

        Analyzer analyzer = null;
        if (queryAnalyzer == null) {
            if (fieldType != null) {
                analyzer = fieldType.searchAnalyzer();
            }
            if (analyzer == null && fieldType != null) {
                analyzer = parseContext.getSearchAnalyzer(fieldType);
            }
            if (analyzer == null) {
                analyzer = parseContext.mapperService().searchAnalyzer();
            }
        } else {
            analyzer = parseContext.mapperService().analysisService().analyzer(queryAnalyzer);
            if (analyzer == null) {
                throw new IllegalArgumentException("No analyzer found for [" + queryAnalyzer + "]");
            }
        }

        ExtendedCommonTermsQuery commonsQuery = new ExtendedCommonTermsQuery(highFreqOccur, lowFreqOccur, maxTermFrequency, disableCoord, fieldType);
        commonsQuery.setBoost(boost);
        Query query = parseQueryString(commonsQuery, value.toString(), field, parseContext, analyzer, lowFreqMinimumShouldMatch, highFreqMinimumShouldMatch);
        if (queryName != null) {
            parseContext.addNamedQuery(queryName, query);
        }
        return query;
    }


    private final Query parseQueryString(ExtendedCommonTermsQuery query, String queryString, String field, QueryParseContext parseContext,
            Analyzer analyzer, String lowFreqMinimumShouldMatch, String highFreqMinimumShouldMatch) throws IOException {
        // Logic similar to QueryParser#getFieldQuery
        int count = 0;
        try (TokenStream source = analyzer.tokenStream(field, queryString.toString())) {
            source.reset();
            CharTermAttribute termAtt = source.addAttribute(CharTermAttribute.class);
            BytesRefBuilder builder = new BytesRefBuilder();
            while (source.incrementToken()) {
                // UTF-8
                builder.copyChars(termAtt);
                query.add(new Term(field, builder.toBytesRef()));
                count++;
            }
        }

        if (count == 0) {
            return null;
        }
        query.setLowFreqMinimumNumberShouldMatch(lowFreqMinimumShouldMatch);
        query.setHighFreqMinimumNumberShouldMatch(highFreqMinimumShouldMatch);
        return query;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy