org.elasticsearch.index.query.xcontent.MoreLikeThisQueryBuilder Maven / Gradle / Ivy
Show all versions of elasticsearch Show documentation
/*
* Licensed to Elastic Search and Shay Banon under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. Elastic Search licenses this
* file to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query.xcontent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryBuilderException;
import java.io.IOException;
/**
* A more like this query that finds documents that are "like" the provided {@link #likeText(String)}
* which is checked against the fields the query is constructed with.
*
* @author kimchy (shay.banon)
*/
public class MoreLikeThisQueryBuilder extends BaseQueryBuilder {
private final String[] fields;
private String likeText;
private float percentTermsToMatch = -1;
private int minTermFreq = -1;
private int maxQueryTerms = -1;
private String[] stopWords = null;
private int minDocFreq = -1;
private int maxDocFreq = -1;
private int minWordLen = -1;
private int maxWordLen = -1;
private float boostTerms = -1;
private float boost = -1;
/**
* Constructs a new more like this query which uses the "_all" field.
*/
public MoreLikeThisQueryBuilder() {
this.fields = null;
}
/**
* Sets the field names that will be used when generating the 'More Like This' query.
*
* @param fields the field names that will be used when generating the 'More Like This' query.
*/
public MoreLikeThisQueryBuilder(String... fields) {
this.fields = fields;
}
/**
* The text to use in order to find documents that are "like" this.
*/
public MoreLikeThisQueryBuilder likeText(String likeText) {
this.likeText = likeText;
return this;
}
/**
* The percentage of terms to match. Defaults to 0.3.
*/
public MoreLikeThisQueryBuilder percentTermsToMatch(float percentTermsToMatch) {
this.percentTermsToMatch = percentTermsToMatch;
return this;
}
/**
* The frequency below which terms will be ignored in the source doc. The default
* frequency is 2.
*/
public MoreLikeThisQueryBuilder minTermFreq(int minTermFreq) {
this.minTermFreq = minTermFreq;
return this;
}
/**
* Sets the maximum number of query terms that will be included in any generated query.
* Defaults to 25.
*/
public MoreLikeThisQueryBuilder maxQueryTerms(int maxQueryTerms) {
this.maxQueryTerms = maxQueryTerms;
return this;
}
/**
* Set the set of stopwords.
*
* Any word in this set is considered "uninteresting" and ignored. Even if your Analyzer allows stopwords, you
* might want to tell the MoreLikeThis code to ignore them, as for the purposes of document similarity it seems
* reasonable to assume that "a stop word is never interesting".
*/
public MoreLikeThisQueryBuilder stopWords(String... stopWords) {
this.stopWords = stopWords;
return this;
}
/**
* Sets the frequency at which words will be ignored which do not occur in at least this
* many docs. Defaults to 5.
*/
public MoreLikeThisQueryBuilder minDocFreq(int minDocFreq) {
this.minDocFreq = minDocFreq;
return this;
}
/**
* Set the maximum frequency in which words may still appear. Words that appear
* in more than this many docs will be ignored. Defaults to unbounded.
*/
public MoreLikeThisQueryBuilder maxDocFreq(int maxDocFreq) {
this.maxDocFreq = maxDocFreq;
return this;
}
/**
* Sets the minimum word length below which words will be ignored. Defaults
* to 0.
*/
public MoreLikeThisQueryBuilder minWordLen(int minWordLen) {
this.minWordLen = minWordLen;
return this;
}
/**
* Sets the maximum word length above which words will be ignored. Defaults to
* unbounded (0).
*/
public MoreLikeThisQueryBuilder maxWordLen(int maxWordLen) {
this.maxWordLen = maxWordLen;
return this;
}
/**
* Sets the boost factor to use when boosting terms. Defaults to 1.
*/
public MoreLikeThisQueryBuilder boostTerms(float boostTerms) {
this.boostTerms = boostTerms;
return this;
}
public MoreLikeThisQueryBuilder boost(float boost) {
this.boost = boost;
return this;
}
@Override protected void doXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject(MoreLikeThisQueryParser.NAME);
if (fields != null) {
builder.startArray("fields");
for (String field : fields) {
builder.value(field);
}
builder.endArray();
}
if (likeText == null) {
throw new QueryBuilderException("moreLikeThis requires 'likeText' to be provided");
}
builder.field("like_text", likeText);
if (percentTermsToMatch != -1) {
builder.field("percent_terms_to_match", percentTermsToMatch);
}
if (minTermFreq != -1) {
builder.field("min_term_freq", minTermFreq);
}
if (maxQueryTerms != -1) {
builder.field("max_query_terms", maxQueryTerms);
}
if (stopWords != null && stopWords.length > 0) {
builder.startArray("stop_words");
for (String stopWord : stopWords) {
builder.value(stopWord);
}
builder.endArray();
}
if (minDocFreq != -1) {
builder.field("min_doc_freq", minDocFreq);
}
if (maxDocFreq != -1) {
builder.field("max_doc_freq", maxDocFreq);
}
if (minWordLen != -1) {
builder.field("min_word_len", minWordLen);
}
if (maxWordLen != -1) {
builder.field("max_word_len", maxWordLen);
}
if (boostTerms != -1) {
builder.field("boost_terms", boostTerms);
}
if (boost != -1) {
builder.field("boost", boost);
}
builder.endObject();
}
}