All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.index.query.CommonTermsQueryBuilder Maven / Gradle / Ivy

There is a newer version: 8.14.1
Show newest version
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.index.query;

import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.similarities.Similarity;
import org.elasticsearch.common.xcontent.XContentBuilder;

import java.io.IOException;

/**
 * CommonTermsQuery query is a query that executes high-frequency terms in a
 * optional sub-query to prevent slow queries due to "common" terms like
 * stopwords. This query basically builds 2 queries off the {@code #add(Term)
 * added} terms where low-frequency terms are added to a required boolean clause
 * and high-frequency terms are added to an optional boolean clause. The
 * optional clause is only executed if the required "low-frequency' clause
 * matches. Scores produced by this query will be slightly different to plain
 * {@link BooleanQuery} scorer mainly due to differences in the
 * {@link Similarity#coord(int,int) number of leave queries} in the required
 * boolean clause. In the most cases high-frequency terms are unlikely to
 * significantly contribute to the document score unless at least one of the
 * low-frequency terms are matched such that this query can improve query
 * execution times significantly if applicable.
 */
public class CommonTermsQueryBuilder extends QueryBuilder implements BoostableQueryBuilder {

    public static enum Operator {
        OR, AND
    }

    private final String name;

    private final Object text;

    private Operator highFreqOperator = null;

    private Operator lowFreqOperator = null;

    private String analyzer = null;

    private Float boost = null;

    private String lowFreqMinimumShouldMatch = null;

    private String highFreqMinimumShouldMatch = null;

    private Boolean disableCoord = null;

    private Float cutoffFrequency = null;

    private String queryName;

    /**
     * Constructs a new common terms query.
     */
    public CommonTermsQueryBuilder(String name, Object text) {
        if (name == null) {
            throw new IllegalArgumentException("Field name must not be null");
        }
        if (text == null) {
            throw new IllegalArgumentException("Query must not be null");
        }
        this.text = text;
        this.name = name;
    }

    /**
     * Sets the operator to use for terms with a high document frequency
     * (greater than or equal to {@link #cutoffFrequency(float)}. Defaults to
     * AND.
     */
    public CommonTermsQueryBuilder highFreqOperator(Operator operator) {
        this.highFreqOperator = operator;
        return this;
    }

    /**
     * Sets the operator to use for terms with a low document frequency (less
     * than {@link #cutoffFrequency(float)}. Defaults to AND.
     */
    public CommonTermsQueryBuilder lowFreqOperator(Operator operator) {
        this.lowFreqOperator = operator;
        return this;
    }

    /**
     * Explicitly set the analyzer to use. Defaults to use explicit mapping
     * config for the field, or, if not set, the default search analyzer.
     */
    public CommonTermsQueryBuilder analyzer(String analyzer) {
        this.analyzer = analyzer;
        return this;
    }

    /**
     * Set the boost to apply to the query.
     */
    @Override
    public CommonTermsQueryBuilder boost(float boost) {
        this.boost = boost;
        return this;
    }

    /**
     * Sets the cutoff document frequency for high / low frequent terms. A value
     * in [0..1] (or absolute number >=1) representing the maximum threshold of
     * a terms document frequency to be considered a low frequency term.
     * Defaults to
     * {@value CommonTermsQueryParser#DEFAULT_MAX_TERM_DOC_FREQ}
     */
    public CommonTermsQueryBuilder cutoffFrequency(float cutoffFrequency) {
        this.cutoffFrequency = cutoffFrequency;
        return this;
    }

    /**
     * Sets the minimum number of high frequent query terms that need to match in order to
     * produce a hit when there are no low frequen terms.
     */
    public CommonTermsQueryBuilder highFreqMinimumShouldMatch(String highFreqMinimumShouldMatch) {
        this.highFreqMinimumShouldMatch = highFreqMinimumShouldMatch;
        return this;
    }

    /**
     * Sets the minimum number of low frequent query terms that need to match in order to
     * produce a hit.
     */
    public CommonTermsQueryBuilder lowFreqMinimumShouldMatch(String lowFreqMinimumShouldMatch) {
        this.lowFreqMinimumShouldMatch = lowFreqMinimumShouldMatch;
        return this;
    }
    
    public CommonTermsQueryBuilder disableCoord(boolean disableCoord) {
        this.disableCoord = disableCoord;
        return this;
    }

    /**
     * Sets the query name for the filter that can be used when searching for matched_filters per hit.
     */
    public CommonTermsQueryBuilder queryName(String queryName) {
        this.queryName = queryName;
        return this;
    }

    @Override
    public void doXContent(XContentBuilder builder, Params params) throws IOException {
        builder.startObject(CommonTermsQueryParser.NAME);
        builder.startObject(name);

        builder.field("query", text);
        if (disableCoord != null) {
            builder.field("disable_coord", disableCoord);
        }
        if (highFreqOperator != null) {
            builder.field("high_freq_operator", highFreqOperator.toString());
        }
        if (lowFreqOperator != null) {
            builder.field("low_freq_operator", lowFreqOperator.toString());
        }
        if (analyzer != null) {
            builder.field("analyzer", analyzer);
        }
        if (boost != null) {
            builder.field("boost", boost);
        }
        if (cutoffFrequency != null) {
            builder.field("cutoff_frequency", cutoffFrequency);
        }
        if (lowFreqMinimumShouldMatch != null || highFreqMinimumShouldMatch != null) {
            builder.startObject("minimum_should_match");
            if (lowFreqMinimumShouldMatch != null) {
                builder.field("low_freq", lowFreqMinimumShouldMatch);
            }
            if (highFreqMinimumShouldMatch != null) {
                builder.field("high_freq", highFreqMinimumShouldMatch);
            }
            builder.endObject();
        }
        if (queryName != null) {
            builder.field("_name", queryName);
        }

        builder.endObject();
        builder.endObject();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy