org.apache.lucene.queryparser.classic.MultiFieldQueryParser Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.queryparser.classic;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
/** A QueryParser which constructs queries to search multiple fields. */
public class MultiFieldQueryParser extends QueryParser {
protected String[] fields;
protected Map boosts;
/**
* Creates a MultiFieldQueryParser. Allows passing of a map with term to Boost, and the boost to
* apply to each term.
*
* It will, when parse(String query) is called, construct a query like this (assuming the query
* consists of two terms and you specify the two fields title
and body
):
*
* (title:term1 body:term1) (title:term2 body:term2)
*
*
*
When setDefaultOperator(AND_OPERATOR) is set, the result will be:
* +(title:term1 body:term1) +(title:term2 body:term2)
*
*
*
When you pass a boost (title=>5 body=>10) you can get
* +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0)
*
*
*
In other words, all the query's terms must appear, but it doesn't matter in what fields they
* appear.
*/
public MultiFieldQueryParser(String[] fields, Analyzer analyzer, Map boosts) {
this(fields, analyzer);
this.boosts = boosts;
}
/**
* Creates a MultiFieldQueryParser.
*
* It will, when parse(String query) is called, construct a query like this (assuming the query
* consists of two terms and you specify the two fields title
and body
):
*
* (title:term1 body:term1) (title:term2 body:term2)
*
*
*
When setDefaultOperator(AND_OPERATOR) is set, the result will be:
* +(title:term1 body:term1) +(title:term2 body:term2)
*
*
*
In other words, all the query's terms must appear, but it doesn't matter in what fields they
* appear.
*/
public MultiFieldQueryParser(String[] fields, Analyzer analyzer) {
super(null, analyzer);
this.fields = fields;
}
@Override
protected Query getFieldQuery(String field, String queryText, int slop) throws ParseException {
if (field == null) {
List clauses = new ArrayList<>();
for (int i = 0; i < fields.length; i++) {
Query q = super.getFieldQuery(fields[i], queryText, true);
if (q != null) {
// If the user passes a map of boosts
if (boosts != null) {
// Get the boost from the map and apply them
Float boost = boosts.get(fields[i]);
if (boost != null) {
q = new BoostQuery(q, boost.floatValue());
}
}
q = applySlop(q, slop);
clauses.add(q);
}
}
if (clauses.size() == 0) // happens for stopwords
return null;
return getMultiFieldQuery(clauses);
}
Query q = super.getFieldQuery(field, queryText, true);
q = applySlop(q, slop);
return q;
}
private Query applySlop(Query q, int slop) {
if (q instanceof PhraseQuery) {
PhraseQuery.Builder builder = new PhraseQuery.Builder();
builder.setSlop(slop);
PhraseQuery pq = (PhraseQuery) q;
org.apache.lucene.index.Term[] terms = pq.getTerms();
int[] positions = pq.getPositions();
for (int i = 0; i < terms.length; ++i) {
builder.add(terms[i], positions[i]);
}
q = builder.build();
} else if (q instanceof MultiPhraseQuery) {
MultiPhraseQuery mpq = (MultiPhraseQuery) q;
if (slop != mpq.getSlop()) {
q = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build();
}
} else if (q instanceof BoostQuery) {
Query subQuery = ((BoostQuery) q).getQuery();
subQuery = applySlop(subQuery, slop);
q = new BoostQuery(subQuery, ((BoostQuery) q).getBoost());
}
return q;
}
private Query applyBoost(Query q, String field) {
if (boosts != null) {
Float boost = boosts.get(field);
if (boost != null) {
q = new BoostQuery(q, boost);
}
}
return q;
}
@Override
protected Query getFieldQuery(String field, String queryText, boolean quoted)
throws ParseException {
if (field == null) {
List clauses = new ArrayList<>();
Query[] fieldQueries = new Query[fields.length];
int maxTerms = 0;
for (int i = 0; i < fields.length; i++) {
Query q = super.getFieldQuery(fields[i], queryText, quoted);
if (q != null) {
if (q instanceof BooleanQuery) {
maxTerms = Math.max(maxTerms, ((BooleanQuery) q).clauses().size());
} else {
maxTerms = Math.max(1, maxTerms);
}
fieldQueries[i] = q;
}
}
for (int termNum = 0; termNum < maxTerms; termNum++) {
List termClauses = new ArrayList<>();
for (int i = 0; i < fields.length; i++) {
if (fieldQueries[i] != null) {
Query q = null;
if (fieldQueries[i] instanceof BooleanQuery) {
List nestedClauses = ((BooleanQuery) fieldQueries[i]).clauses();
if (termNum < nestedClauses.size()) {
q = nestedClauses.get(termNum).getQuery();
}
} else if (termNum == 0) { // e.g. TermQuery-s
q = fieldQueries[i];
}
if (q != null) {
if (boosts != null) {
// Get the boost from the map and apply them
Float boost = boosts.get(fields[i]);
if (boost != null) {
q = new BoostQuery(q, boost);
}
}
termClauses.add(q);
}
}
}
if (maxTerms > 1) {
if (termClauses.size() > 0) {
BooleanQuery.Builder builder = newBooleanQuery();
for (Query termClause : termClauses) {
builder.add(termClause, BooleanClause.Occur.SHOULD);
}
clauses.add(builder.build());
}
} else {
clauses.addAll(termClauses);
}
}
if (clauses.size() == 0) // happens for stopwords
return null;
return getMultiFieldQuery(clauses);
}
Query q = super.getFieldQuery(field, queryText, quoted);
return q;
}
@Override
protected Query getFuzzyQuery(String field, String termStr, float minSimilarity)
throws ParseException {
if (field == null) {
List clauses = new ArrayList<>();
for (int i = 0; i < fields.length; i++) {
clauses.add(getFuzzyQuery(fields[i], termStr, minSimilarity));
}
return getMultiFieldQuery(clauses);
}
Query q = super.getFuzzyQuery(field, termStr, minSimilarity);
return applyBoost(q, field);
}
@Override
protected Query getPrefixQuery(String field, String termStr) throws ParseException {
if (field == null) {
List clauses = new ArrayList<>();
for (int i = 0; i < fields.length; i++) {
clauses.add(getPrefixQuery(fields[i], termStr));
}
return getMultiFieldQuery(clauses);
}
Query q = super.getPrefixQuery(field, termStr);
return applyBoost(q, field);
}
@Override
protected Query getWildcardQuery(String field, String termStr) throws ParseException {
if (field == null) {
List clauses = new ArrayList<>();
for (int i = 0; i < fields.length; i++) {
clauses.add(getWildcardQuery(fields[i], termStr));
}
return getMultiFieldQuery(clauses);
}
Query q = super.getWildcardQuery(field, termStr);
return applyBoost(q, field);
}
@Override
protected Query getRangeQuery(
String field, String part1, String part2, boolean startInclusive, boolean endInclusive)
throws ParseException {
if (field == null) {
List clauses = new ArrayList<>();
for (int i = 0; i < fields.length; i++) {
clauses.add(getRangeQuery(fields[i], part1, part2, startInclusive, endInclusive));
}
return getMultiFieldQuery(clauses);
}
Query q = super.getRangeQuery(field, part1, part2, startInclusive, endInclusive);
return applyBoost(q, field);
}
@Override
protected Query getRegexpQuery(String field, String termStr) throws ParseException {
if (field == null) {
List clauses = new ArrayList<>();
for (int i = 0; i < fields.length; i++) {
clauses.add(getRegexpQuery(fields[i], termStr));
}
return getMultiFieldQuery(clauses);
}
Query q = super.getRegexpQuery(field, termStr);
return applyBoost(q, field);
}
/** Creates a multifield query */
// TODO: investigate more general approach by default, e.g. DisjunctionMaxQuery?
protected Query getMultiFieldQuery(List queries) throws ParseException {
if (queries.isEmpty()) {
return null; // all clause words were filtered away by the analyzer.
}
BooleanQuery.Builder query = newBooleanQuery();
for (Query sub : queries) {
query.add(sub, BooleanClause.Occur.SHOULD);
}
return query.build();
}
/**
* Parses a query which searches on the fields specified.
*
* If x fields are specified, this effectively constructs:
*
*
*
* (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
*
*
*
* @param queries Queries strings to parse
* @param fields Fields to search on
* @param analyzer Analyzer to use
* @throws ParseException if query parsing fails
* @throws IllegalArgumentException if the length of the queries array differs from the length of
* the fields array
*/
public static Query parse(String[] queries, String[] fields, Analyzer analyzer)
throws ParseException {
if (queries.length != fields.length)
throw new IllegalArgumentException("queries.length != fields.length");
BooleanQuery.Builder bQuery = new BooleanQuery.Builder();
for (int i = 0; i < fields.length; i++) {
QueryParser qp = new QueryParser(fields[i], analyzer);
Query q = qp.parse(queries[i]);
if (q != null
&& // q never null, just being defensive
(!(q instanceof BooleanQuery) || ((BooleanQuery) q).clauses().size() > 0)) {
bQuery.add(q, BooleanClause.Occur.SHOULD);
}
}
return bQuery.build();
}
/**
* Parses a query, searching on the fields specified. Use this if you need to specify certain
* fields as required, and others as prohibited.
*
* Usage:
*
*
*
* String[] fields = {"filename", "contents", "description"};
* BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
* BooleanClause.Occur.MUST,
* BooleanClause.Occur.MUST_NOT};
* MultiFieldQueryParser.parse("query", fields, flags, analyzer);
*
*
*
* The code above would construct a query:
*
*
*
* (filename:query) +(contents:query) -(description:query)
*
*
*
* @param query Query string to parse
* @param fields Fields to search on
* @param flags Flags describing the fields
* @param analyzer Analyzer to use
* @throws ParseException if query parsing fails
* @throws IllegalArgumentException if the length of the fields array differs from the length of
* the flags array
*/
public static Query parse(
String query, String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer)
throws ParseException {
if (fields.length != flags.length)
throw new IllegalArgumentException("fields.length != flags.length");
BooleanQuery.Builder bQuery = new BooleanQuery.Builder();
for (int i = 0; i < fields.length; i++) {
QueryParser qp = new QueryParser(fields[i], analyzer);
Query q = qp.parse(query);
if (q != null
&& // q never null, just being defensive
(!(q instanceof BooleanQuery) || ((BooleanQuery) q).clauses().size() > 0)) {
bQuery.add(q, flags[i]);
}
}
return bQuery.build();
}
/**
* Parses a query, searching on the fields specified. Use this if you need to specify certain
* fields as required, and others as prohibited.
*
* Usage:
*
*
*
* String[] query = {"query1", "query2", "query3"};
* String[] fields = {"filename", "contents", "description"};
* BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
* BooleanClause.Occur.MUST,
* BooleanClause.Occur.MUST_NOT};
* MultiFieldQueryParser.parse(query, fields, flags, analyzer);
*
*
*
* The code above would construct a query:
*
*
*
* (filename:query1) +(contents:query2) -(description:query3)
*
*
*
* @param queries Queries string to parse
* @param fields Fields to search on
* @param flags Flags describing the fields
* @param analyzer Analyzer to use
* @throws ParseException if query parsing fails
* @throws IllegalArgumentException if the length of the queries, fields, and flags array differ
*/
public static Query parse(
String[] queries, String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer)
throws ParseException {
if (!(queries.length == fields.length && queries.length == flags.length))
throw new IllegalArgumentException(
"queries, fields, and flags array have have different length");
BooleanQuery.Builder bQuery = new BooleanQuery.Builder();
for (int i = 0; i < fields.length; i++) {
QueryParser qp = new QueryParser(fields[i], analyzer);
Query q = qp.parse(queries[i]);
if (q != null
&& // q never null, just being defensive
(!(q instanceof BooleanQuery) || ((BooleanQuery) q).clauses().size() > 0)) {
bQuery.add(q, flags[i]);
}
}
return bQuery.build();
}
}