All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.queryparser.classic.MultiFieldQueryParser Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.queryparser.classic;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;

/** A QueryParser which constructs queries to search multiple fields. */
public class MultiFieldQueryParser extends QueryParser {
  protected String[] fields;
  protected Map boosts;

  /**
   * Creates a MultiFieldQueryParser. Allows passing of a map with term to Boost, and the boost to
   * apply to each term.
   *
   * 

It will, when parse(String query) is called, construct a query like this (assuming the query * consists of two terms and you specify the two fields title and body): * * (title:term1 body:term1) (title:term2 body:term2) * * *

When setDefaultOperator(AND_OPERATOR) is set, the result will be: * +(title:term1 body:term1) +(title:term2 body:term2) * * *

When you pass a boost (title=>5 body=>10) you can get * +(title:term1^5.0 body:term1^10.0) +(title:term2^5.0 body:term2^10.0) * * *

In other words, all the query's terms must appear, but it doesn't matter in what fields they * appear. */ public MultiFieldQueryParser(String[] fields, Analyzer analyzer, Map boosts) { this(fields, analyzer); this.boosts = boosts; } /** * Creates a MultiFieldQueryParser. * *

It will, when parse(String query) is called, construct a query like this (assuming the query * consists of two terms and you specify the two fields title and body): * * (title:term1 body:term1) (title:term2 body:term2) * * *

When setDefaultOperator(AND_OPERATOR) is set, the result will be: * +(title:term1 body:term1) +(title:term2 body:term2) * * *

In other words, all the query's terms must appear, but it doesn't matter in what fields they * appear. */ public MultiFieldQueryParser(String[] fields, Analyzer analyzer) { super(null, analyzer); this.fields = fields; } @Override protected Query getFieldQuery(String field, String queryText, int slop) throws ParseException { if (field == null) { List clauses = new ArrayList<>(); for (int i = 0; i < fields.length; i++) { Query q = super.getFieldQuery(fields[i], queryText, true); if (q != null) { // If the user passes a map of boosts if (boosts != null) { // Get the boost from the map and apply them Float boost = boosts.get(fields[i]); if (boost != null) { q = new BoostQuery(q, boost.floatValue()); } } q = applySlop(q, slop); clauses.add(q); } } if (clauses.size() == 0) // happens for stopwords return null; return getMultiFieldQuery(clauses); } Query q = super.getFieldQuery(field, queryText, true); q = applySlop(q, slop); return q; } private Query applySlop(Query q, int slop) { if (q instanceof PhraseQuery) { PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.setSlop(slop); PhraseQuery pq = (PhraseQuery) q; org.apache.lucene.index.Term[] terms = pq.getTerms(); int[] positions = pq.getPositions(); for (int i = 0; i < terms.length; ++i) { builder.add(terms[i], positions[i]); } q = builder.build(); } else if (q instanceof MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery) q; if (slop != mpq.getSlop()) { q = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build(); } } else if (q instanceof BoostQuery) { Query subQuery = ((BoostQuery) q).getQuery(); subQuery = applySlop(subQuery, slop); q = new BoostQuery(subQuery, ((BoostQuery) q).getBoost()); } return q; } private Query applyBoost(Query q, String field) { if (boosts != null) { Float boost = boosts.get(field); if (boost != null) { q = new BoostQuery(q, boost); } } return q; } @Override protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException { if (field == null) { List clauses = new ArrayList<>(); Query[] fieldQueries = new Query[fields.length]; int maxTerms = 0; for (int i = 0; i < fields.length; i++) { Query q = super.getFieldQuery(fields[i], queryText, quoted); if (q != null) { if (q instanceof BooleanQuery) { maxTerms = Math.max(maxTerms, ((BooleanQuery) q).clauses().size()); } else { maxTerms = Math.max(1, maxTerms); } fieldQueries[i] = q; } } for (int termNum = 0; termNum < maxTerms; termNum++) { List termClauses = new ArrayList<>(); for (int i = 0; i < fields.length; i++) { if (fieldQueries[i] != null) { Query q = null; if (fieldQueries[i] instanceof BooleanQuery) { List nestedClauses = ((BooleanQuery) fieldQueries[i]).clauses(); if (termNum < nestedClauses.size()) { q = nestedClauses.get(termNum).getQuery(); } } else if (termNum == 0) { // e.g. TermQuery-s q = fieldQueries[i]; } if (q != null) { if (boosts != null) { // Get the boost from the map and apply them Float boost = boosts.get(fields[i]); if (boost != null) { q = new BoostQuery(q, boost); } } termClauses.add(q); } } } if (maxTerms > 1) { if (termClauses.size() > 0) { BooleanQuery.Builder builder = newBooleanQuery(); for (Query termClause : termClauses) { builder.add(termClause, BooleanClause.Occur.SHOULD); } clauses.add(builder.build()); } } else { clauses.addAll(termClauses); } } if (clauses.size() == 0) // happens for stopwords return null; return getMultiFieldQuery(clauses); } Query q = super.getFieldQuery(field, queryText, quoted); return q; } @Override protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException { if (field == null) { List clauses = new ArrayList<>(); for (int i = 0; i < fields.length; i++) { clauses.add(getFuzzyQuery(fields[i], termStr, minSimilarity)); } return getMultiFieldQuery(clauses); } Query q = super.getFuzzyQuery(field, termStr, minSimilarity); return applyBoost(q, field); } @Override protected Query getPrefixQuery(String field, String termStr) throws ParseException { if (field == null) { List clauses = new ArrayList<>(); for (int i = 0; i < fields.length; i++) { clauses.add(getPrefixQuery(fields[i], termStr)); } return getMultiFieldQuery(clauses); } Query q = super.getPrefixQuery(field, termStr); return applyBoost(q, field); } @Override protected Query getWildcardQuery(String field, String termStr) throws ParseException { if (field == null) { List clauses = new ArrayList<>(); for (int i = 0; i < fields.length; i++) { clauses.add(getWildcardQuery(fields[i], termStr)); } return getMultiFieldQuery(clauses); } Query q = super.getWildcardQuery(field, termStr); return applyBoost(q, field); } @Override protected Query getRangeQuery( String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException { if (field == null) { List clauses = new ArrayList<>(); for (int i = 0; i < fields.length; i++) { clauses.add(getRangeQuery(fields[i], part1, part2, startInclusive, endInclusive)); } return getMultiFieldQuery(clauses); } Query q = super.getRangeQuery(field, part1, part2, startInclusive, endInclusive); return applyBoost(q, field); } @Override protected Query getRegexpQuery(String field, String termStr) throws ParseException { if (field == null) { List clauses = new ArrayList<>(); for (int i = 0; i < fields.length; i++) { clauses.add(getRegexpQuery(fields[i], termStr)); } return getMultiFieldQuery(clauses); } Query q = super.getRegexpQuery(field, termStr); return applyBoost(q, field); } /** Creates a multifield query */ // TODO: investigate more general approach by default, e.g. DisjunctionMaxQuery? protected Query getMultiFieldQuery(List queries) throws ParseException { if (queries.isEmpty()) { return null; // all clause words were filtered away by the analyzer. } BooleanQuery.Builder query = newBooleanQuery(); for (Query sub : queries) { query.add(sub, BooleanClause.Occur.SHOULD); } return query.build(); } /** * Parses a query which searches on the fields specified. * *

If x fields are specified, this effectively constructs: * *

   * 
   * (field1:query1) (field2:query2) (field3:query3)...(fieldx:queryx)
   * 
   * 
* * @param queries Queries strings to parse * @param fields Fields to search on * @param analyzer Analyzer to use * @throws ParseException if query parsing fails * @throws IllegalArgumentException if the length of the queries array differs from the length of * the fields array */ public static Query parse(String[] queries, String[] fields, Analyzer analyzer) throws ParseException { if (queries.length != fields.length) throw new IllegalArgumentException("queries.length != fields.length"); BooleanQuery.Builder bQuery = new BooleanQuery.Builder(); for (int i = 0; i < fields.length; i++) { QueryParser qp = new QueryParser(fields[i], analyzer); Query q = qp.parse(queries[i]); if (q != null && // q never null, just being defensive (!(q instanceof BooleanQuery) || ((BooleanQuery) q).clauses().size() > 0)) { bQuery.add(q, BooleanClause.Occur.SHOULD); } } return bQuery.build(); } /** * Parses a query, searching on the fields specified. Use this if you need to specify certain * fields as required, and others as prohibited. * *

Usage: * *

   * 
   * String[] fields = {"filename", "contents", "description"};
   * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
   *                BooleanClause.Occur.MUST,
   *                BooleanClause.Occur.MUST_NOT};
   * MultiFieldQueryParser.parse("query", fields, flags, analyzer);
   * 
   * 
* *

The code above would construct a query: * *

   * 
   * (filename:query) +(contents:query) -(description:query)
   * 
   * 
* * @param query Query string to parse * @param fields Fields to search on * @param flags Flags describing the fields * @param analyzer Analyzer to use * @throws ParseException if query parsing fails * @throws IllegalArgumentException if the length of the fields array differs from the length of * the flags array */ public static Query parse( String query, String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException { if (fields.length != flags.length) throw new IllegalArgumentException("fields.length != flags.length"); BooleanQuery.Builder bQuery = new BooleanQuery.Builder(); for (int i = 0; i < fields.length; i++) { QueryParser qp = new QueryParser(fields[i], analyzer); Query q = qp.parse(query); if (q != null && // q never null, just being defensive (!(q instanceof BooleanQuery) || ((BooleanQuery) q).clauses().size() > 0)) { bQuery.add(q, flags[i]); } } return bQuery.build(); } /** * Parses a query, searching on the fields specified. Use this if you need to specify certain * fields as required, and others as prohibited. * *

Usage: * *

   * 
   * String[] query = {"query1", "query2", "query3"};
   * String[] fields = {"filename", "contents", "description"};
   * BooleanClause.Occur[] flags = {BooleanClause.Occur.SHOULD,
   *                BooleanClause.Occur.MUST,
   *                BooleanClause.Occur.MUST_NOT};
   * MultiFieldQueryParser.parse(query, fields, flags, analyzer);
   * 
   * 
* *

The code above would construct a query: * *

   * 
   * (filename:query1) +(contents:query2) -(description:query3)
   * 
   * 
* * @param queries Queries string to parse * @param fields Fields to search on * @param flags Flags describing the fields * @param analyzer Analyzer to use * @throws ParseException if query parsing fails * @throws IllegalArgumentException if the length of the queries, fields, and flags array differ */ public static Query parse( String[] queries, String[] fields, BooleanClause.Occur[] flags, Analyzer analyzer) throws ParseException { if (!(queries.length == fields.length && queries.length == flags.length)) throw new IllegalArgumentException( "queries, fields, and flags array have have different length"); BooleanQuery.Builder bQuery = new BooleanQuery.Builder(); for (int i = 0; i < fields.length; i++) { QueryParser qp = new QueryParser(fields[i], analyzer); Query q = qp.parse(queries[i]); if (q != null && // q never null, just being defensive (!(q instanceof BooleanQuery) || ((BooleanQuery) q).clauses().size() > 0)) { bQuery.add(q, flags[i]); } } return bQuery.build(); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy