org.elasticsearch.index.query.SimpleQueryStringParser Maven / Gradle / Ivy
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.index.query;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.inject.Inject;
import org.elasticsearch.common.lucene.search.Queries;
import org.elasticsearch.common.regex.Regex;
import org.elasticsearch.common.util.LocaleUtils;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.index.mapper.MappedFieldType;
import java.io.IOException;
import java.util.Collections;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
/**
* SimpleQueryStringParser is a query parser that acts similar to a query_string
* query, but won't throw exceptions for any weird string syntax. It supports
* the following:
*
* - '{@code +}' specifies {@code AND} operation: token1+token2
*
- '{@code |}' specifies {@code OR} operation: token1|token2
*
- '{@code -}' negates a single token: -token0
*
- '{@code "}' creates phrases of terms: "term1 term2 ..."
*
- '{@code *}' at the end of terms specifies prefix query: term*
*
- '{@code (}' and '{@code)}' specifies precedence: token1 + (token2 | token3)
*
- '{@code ~}N' at the end of terms specifies fuzzy query: term~1
*
- '{@code ~}N' at the end of phrases specifies near/slop query: "term1 term2"~5
*
*
* See: {@link SimpleQueryParser} for more information.
*
* This query supports these options:
*
* Required:
* {@code query} - query text to be converted into other queries
*
* Optional:
* {@code analyzer} - anaylzer to be used for analyzing tokens to determine
* which kind of query they should be converted into, defaults to "standard"
* {@code default_operator} - default operator for boolean queries, defaults
* to OR
* {@code fields} - fields to search, defaults to _all if not set, allows
* boosting a field with ^n
*/
public class SimpleQueryStringParser implements QueryParser {
public static final String NAME = "simple_query_string";
@Inject
public SimpleQueryStringParser() {
}
@Override
public String[] names() {
return new String[]{NAME};
}
@Override
public Query parse(QueryParseContext parseContext) throws IOException, QueryParsingException {
XContentParser parser = parseContext.parser();
String currentFieldName = null;
String queryBody = null;
float boost = 1.0f;
String queryName = null;
String minimumShouldMatch = null;
Map fieldsAndWeights = null;
BooleanClause.Occur defaultOperator = null;
Analyzer analyzer = null;
int flags = -1;
SimpleQueryParser.Settings sqsSettings = new SimpleQueryParser.Settings();
XContentParser.Token token;
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.START_ARRAY) {
if ("fields".equals(currentFieldName)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_ARRAY) {
String fField = null;
float fBoost = 1;
char[] text = parser.textCharacters();
int end = parser.textOffset() + parser.textLength();
for (int i = parser.textOffset(); i < end; i++) {
if (text[i] == '^') {
int relativeLocation = i - parser.textOffset();
fField = new String(text, parser.textOffset(), relativeLocation);
fBoost = Float.parseFloat(new String(text, i + 1, parser.textLength() - relativeLocation - 1));
break;
}
}
if (fField == null) {
fField = parser.text();
}
if (fieldsAndWeights == null) {
fieldsAndWeights = new HashMap<>();
}
if (Regex.isSimpleMatchPattern(fField)) {
for (String fieldName : parseContext.mapperService().simpleMatchToIndexNames(fField)) {
fieldsAndWeights.put(fieldName, fBoost);
}
} else {
MappedFieldType fieldType = parseContext.fieldMapper(fField);
if (fieldType != null) {
fieldsAndWeights.put(fieldType.names().indexName(), fBoost);
} else {
fieldsAndWeights.put(fField, fBoost);
}
}
}
} else {
throw new QueryParsingException(parseContext, "[" + NAME + "] query does not support [" + currentFieldName + "]");
}
} else if (token.isValue()) {
if ("query".equals(currentFieldName)) {
queryBody = parser.text();
} else if ("boost".equals(currentFieldName)) {
boost = parser.floatValue();
} else if ("analyzer".equals(currentFieldName)) {
analyzer = parseContext.analysisService().analyzer(parser.text());
if (analyzer == null) {
throw new QueryParsingException(parseContext, "[" + NAME + "] analyzer [" + parser.text() + "] not found");
}
} else if ("default_operator".equals(currentFieldName) || "defaultOperator".equals(currentFieldName)) {
String op = parser.text();
if ("or".equalsIgnoreCase(op)) {
defaultOperator = BooleanClause.Occur.SHOULD;
} else if ("and".equalsIgnoreCase(op)) {
defaultOperator = BooleanClause.Occur.MUST;
} else {
throw new QueryParsingException(parseContext, "[" + NAME + "] default operator [" + op + "] is not allowed");
}
} else if ("flags".equals(currentFieldName)) {
if (parser.currentToken() != XContentParser.Token.VALUE_NUMBER) {
// Possible options are:
// ALL, NONE, AND, OR, PREFIX, PHRASE, PRECEDENCE, ESCAPE, WHITESPACE, FUZZY, NEAR, SLOP
flags = SimpleQueryStringFlag.resolveFlags(parser.text());
} else {
flags = parser.intValue();
if (flags < 0) {
flags = SimpleQueryStringFlag.ALL.value();
}
}
} else if ("locale".equals(currentFieldName)) {
String localeStr = parser.text();
Locale locale = LocaleUtils.parse(localeStr);
sqsSettings.locale(locale);
} else if ("lowercase_expanded_terms".equals(currentFieldName)) {
sqsSettings.lowercaseExpandedTerms(parser.booleanValue());
} else if ("lenient".equals(currentFieldName)) {
sqsSettings.lenient(parser.booleanValue());
} else if ("analyze_wildcard".equals(currentFieldName)) {
sqsSettings.analyzeWildcard(parser.booleanValue());
} else if ("_name".equals(currentFieldName)) {
queryName = parser.text();
} else if ("minimum_should_match".equals(currentFieldName)) {
minimumShouldMatch = parser.textOrNull();
} else {
throw new QueryParsingException(parseContext, "[" + NAME + "] unsupported field [" + parser.currentName() + "]");
}
}
}
// Query text is required
if (queryBody == null) {
throw new QueryParsingException(parseContext, "[" + NAME + "] query text missing");
}
// Use standard analyzer by default
if (analyzer == null) {
analyzer = parseContext.mapperService().searchAnalyzer();
}
if (fieldsAndWeights == null) {
fieldsAndWeights = Collections.singletonMap(parseContext.defaultField(), 1.0F);
}
SimpleQueryParser sqp = new SimpleQueryParser(analyzer, fieldsAndWeights, flags, sqsSettings);
if (defaultOperator != null) {
sqp.setDefaultOperator(defaultOperator);
}
Query query = sqp.parse(queryBody);
if (queryName != null) {
parseContext.addNamedQuery(queryName, query);
}
// If the coordination factor is disabled on a boolean query we don't apply the minimum should match.
// This is done to make sure that the minimum_should_match doesn't get applied when there is only one word
// and multiple variations of the same word in the query (synonyms for instance).
if (minimumShouldMatch != null && query instanceof BooleanQuery && !((BooleanQuery) query).isCoordDisabled()) {
query = Queries.applyMinimumShouldMatch((BooleanQuery) query, minimumShouldMatch);
}
if (query != null) {
query.setBoost(boost * query.getBoost());
}
return query;
}
}