org.apache.lucene.queryparser.flexible.standard.StandardQueryParser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-queryparser Show documentation
Lucene QueryParsers module
There is a newer version: 9.11.1
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.queryparser.flexible.standard;

import java.util.Locale;
import java.util.Map;
import java.util.TimeZone;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.document.DateTools.Resolution;
import org.apache.lucene.queryparser.flexible.core.QueryNodeException;
import org.apache.lucene.queryparser.flexible.core.QueryParserHelper;
import org.apache.lucene.queryparser.flexible.core.config.QueryConfigHandler;
import org.apache.lucene.queryparser.flexible.standard.builders.StandardQueryTreeBuilder;
import org.apache.lucene.queryparser.flexible.standard.config.FuzzyConfig;
import org.apache.lucene.queryparser.flexible.standard.config.PointsConfig;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.ConfigurationKeys;
import org.apache.lucene.queryparser.flexible.standard.config.StandardQueryConfigHandler.Operator;
import org.apache.lucene.queryparser.flexible.standard.parser.StandardSyntaxParser;
import org.apache.lucene.queryparser.flexible.standard.processors.StandardQueryNodeProcessorPipeline;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.MultiTermQuery;
import org.apache.lucene.search.Query;

/**
 * The {@link StandardQueryParser} is a pre-assembled query parser that supports most features of
 * the {@linkplain org.apache.lucene.queryparser.classic.QueryParser classic Lucene query parser},
 * allows dynamic configuration of some of its features (like multi-field expansion or wildcard
 * query restrictions) and adds support for new query types and expressions.
 *
 * The {@link StandardSyntaxParser} is an extension of the {@link QueryParserHelper} with
 * reasonable defaults for syntax tree parsing ({@link StandardSyntaxParser}, node processor
 * pipeline ({@link StandardQueryNodeProcessorPipeline} and node tree to {@link Query} builder
 * ({@link StandardQueryTreeBuilder}).
 *
 * 
Typical usage, including configuration tweaks:
 *
 * 
{@code
 * StandardQueryParser qpHelper = new StandardQueryParser();
 * StandardQueryConfigHandler config =  qpHelper.getQueryConfigHandler();
 * config.setAllowLeadingWildcard(true);
 * config.setAnalyzer(new WhitespaceAnalyzer());
 * Query query = qpHelper.parse("apache AND lucene", "defaultField");
 * }
 *
 * Supported query syntax
 *
 * Standard query parser borrows most of its syntax from the {@linkplain
 * org.apache.lucene.queryparser.classic classic query parser} but adds more features and
 * expressions on top of that syntax.
 *
 * 
A query consists of clauses, field specifications, grouping and Boolean operators and
 * interval functions. We will discuss them in order.
 *
 * 
Basic clauses
 *
 * A query must contain one or more clauses. A clause can be a literal term, a phrase, a wildcard
 * expression or other expression that
 *
 * 
The following are some examples of simple one-clause queries:
 *
 * 

 *   test
 *       selects documents containing the word test (term clause).
 *   
"test equipment"
 *       phrase search; selects documents containing the phrase test equipment (phrase
 *       clause).
 *   
"test failure"~4
 *       proximity search; selects documents containing the words test and
 *       failure within 4 words (positions) from each other. The provided "proximity" is
 *       technically translated into "edit distance" (maximum number of atomic word-moving
 *       operations required to transform the document's phrase into the query phrase).
 *   
tes*
 *       prefix wildcard matching; selects documents containing words starting with tes,
 *       such as: test, testing or testable.
 *   
/.est(s|ing)/
 *       documents containing words matching the provided regular expression, such as
 *       resting or nests.
 *   
nest~2
 *       fuzzy term matching; documents containing words within 2-edits distance (2 additions,
 *       removals or replacements of a letter) from nest, such as test,
 *       net or rests.
 * 
 *
 * Field specifications
 *
 * Most clauses can be prefixed by a field name and a colon: the clause will then apply to that
 * field only. If the field specification is omitted, the query parser will expand the clause over
 * all fields specified by a call to {@link StandardQueryParser#setMultiFields(CharSequence[])} or
 * will use the default field provided in the call to {@link #parse(String, String)}.
 *
 * 
The following are some examples of field-prefixed clauses:
 *
 * 

 *   title:test
 *       documents containing test in the title field.
 *   
title:(die OR hard)
 *       documents containing die or hard in the title field.
 * 
 *
 * Boolean operators and grouping
 *
 * You can combine clauses using Boolean AND, OR and NOT operators to form more complex
 * expressions, for example:
 *
 * 

 *   test AND results
 *       selects documents containing both the word test and the word results.
 *   
test OR suite OR results
 *       selects documents with at least one of test, suite or
 *       results.
 *   
title:test AND NOT title:complete
 *       selects documents containing test and not containing complete in the
 *       title field.
 *   
title:test AND (pass* OR fail*)
 *       grouping; use parentheses to specify the precedence of terms in a Boolean clause. Query
 *       will match documents containing test in the title field and a word
 *       starting with pass or fail in the default search fields.
 *   
title:(pass fail skip)
 *       shorthand notation; documents containing at least one of pass, fail or
 *       skip in the title field.
 *   
title:(+test +"result unknown")
 *       shorthand notation; documents containing both pass and result unknown
 *       in the title field.
 * 
 *
 * Note the Boolean operators must be written in all caps, otherwise they are parsed as regular
 * terms.
 *
 * 
Range operators
 *
 * To search for ranges of textual or numeric values, use square or curly brackets, for example:
 *
 * 

 *   name:[Jones TO Smith]
 *       inclusive range; selects documents whose name
 *        field has any value between Jones and Smith, including
 *       boundaries.
 *   
score:{2.5 TO 7.3}
 *       exclusive range; selects documents whose score field is between 2.5 and
 *       7.3, excluding boundaries.
 *   
score:{2.5 TO *]
 *       one-sided range; selects documents whose score field is larger than 2.5.
 * 
 *
 * Term boosting
 *
 * Terms, quoted terms, term range expressions and grouped clauses can have a floating-point
 * weight boost applied to them to increase their score relative to other clauses. For
 * example:
 *
 * 

 *   jones^2 OR smith^0.5
 *       prioritize documents with jones term over matches on the smith
 *       term.
 *   
field:(a OR b NOT c)^2.5 OR field:d
 *       apply the boost to a sub-query.
 * 
 *
 * Special character escaping
 *
 * Most search terms can be put in double quotes making special-character escaping not necessary.
 * If the search term contains the quote character (or cannot be quoted for some reason), any
 * character can be quoted with a backslash. For example:
 *
 * 

 *   \:\(quoted\+term\)\:
 *       a single search term (quoted+term): with escape sequences. An alternative
 *       quoted form would be simpler: ":(quoted+term):"
 *       .
 * 
 *
 * Minimum-should-match constraint for Boolean disjunction groups
 *
 * A minimum-should-match operator can be applied to a disjunction Boolean query (a query with
 * only "OR"-subclauses) and forces the query to match documents with at least the provided number
 * of these subclauses. For example:
 *
 * 

 *   (blue crab fish)@2
 *       matches all documents with at least two terms from the set [blue, crab, fish] (in any
 *       order).
 *   
((yellow OR blue) crab fish)@2
 *       sub-clauses of a Boolean query can themselves be complex queries; here the
 *       min-should-match selects documents that match at least two of the provided three
 *       sub-clauses.
 * 
 *
 * Interval function clauses
 *
 * Interval functions are a powerful tool to express search needs in terms of one or more *
 * contiguous fragments of text and their relationship to one another. All interval clauses start
 * with the {@code fn:} prefix (possibly prefixed by a field specification). For example:
 *
 * 

 *   fn:ordered(quick brown fox)
 *       matches all documents (in the default field or in multi-field expansion) with at least
 *       one ordered sequence of quick, 
 *       brown and fox terms.
 *   
title:fn:maxwidth(5 fn:atLeast(2 quick brown fox))
 *       matches all documents in the title
 *        field where at least two of the three terms (quick, 
 *        brown and fox) occur within five positions of each other.
 * 
 *
 * Please refer to the {@linkplain org.apache.lucene.queryparser.flexible.standard.nodes.intervalfn
 * interval functions package} for more information on which functions are available and how they
 * work.
 *
 * @see StandardQueryParser
 * @see StandardQueryConfigHandler
 * @see StandardSyntaxParser
 * @see StandardQueryNodeProcessorPipeline
 * @see StandardQueryTreeBuilder
 */
public class StandardQueryParser extends QueryParserHelper
    implements CommonQueryParserConfiguration {

  /** Constructs a {@link StandardQueryParser} object. */
  public StandardQueryParser() {
    super(
        new StandardQueryConfigHandler(),
        new StandardSyntaxParser(),
        new StandardQueryNodeProcessorPipeline(null),
        new StandardQueryTreeBuilder());
    setEnablePositionIncrements(true);
  }

  /**
   * Constructs a {@link StandardQueryParser} object and sets an {@link Analyzer} to it. The same
   * as:
   *
   *    * StandardQueryParser qp = new StandardQueryParser();
   * qp.getQueryConfigHandler().setAnalyzer(analyzer);
   * 
   *
   * @param analyzer the analyzer to be used by this query parser helper
   */
  public StandardQueryParser(Analyzer analyzer) {
    this();

    this.setAnalyzer(analyzer);
  }

  @Override
  public String toString() {
    return "";
  }

  /**
   * Overrides {@link QueryParserHelper#parse(String, String)} so it casts the return object to
   * {@link Query}. For more reference about this method, check {@link
   * QueryParserHelper#parse(String, String)}.
   *
   * @param query the query string
   * @param defaultField the default field used by the text parser
   * @return the object built from the query
   * @throws QueryNodeException if something wrong happens along the three phases
   */
  @Override
  public Query parse(String query, String defaultField) throws QueryNodeException {

    return (Query) super.parse(query, defaultField);
  }

  /**
   * Gets implicit operator setting, which will be either {@link Operator#AND} or {@link
   * Operator#OR}.
   */
  public StandardQueryConfigHandler.Operator getDefaultOperator() {
    return getQueryConfigHandler().get(ConfigurationKeys.DEFAULT_OPERATOR);
  }

  /**
   * Sets the boolean operator of the QueryParser. In default mode ( {@link Operator#OR}) terms
   * without any modifiers are considered optional: for example capital of Hungary is
   * equal to capital OR of OR Hungary.

   * In {@link Operator#AND} mode terms are considered to be in conjunction: the above mentioned
   * query is parsed as capital AND of AND Hungary
   */
  public void setDefaultOperator(StandardQueryConfigHandler.Operator operator) {
    getQueryConfigHandler().set(ConfigurationKeys.DEFAULT_OPERATOR, operator);
  }

  /**
   * Set to true to allow leading wildcard characters.
   *
   * When set, * or ? are allowed as the first character of a
   * PrefixQuery and WildcardQuery. Note that this can produce very slow queries on big indexes.
   *
   * 
Default: false.
   */
  @Override
  public void setAllowLeadingWildcard(boolean allowLeadingWildcard) {
    getQueryConfigHandler().set(ConfigurationKeys.ALLOW_LEADING_WILDCARD, allowLeadingWildcard);
  }

  /**
   * Set to true to enable position increments in result query.
   *
   * 
When set, result phrase and multi-phrase queries will be aware of position increments.
   * Useful when e.g. a StopFilter increases the position increment of the token that follows an
   * omitted token.
   *
   * Default: false.
   */
  @Override
  public void setEnablePositionIncrements(boolean enabled) {
    getQueryConfigHandler().set(ConfigurationKeys.ENABLE_POSITION_INCREMENTS, enabled);
  }

  /**
   * @see #setEnablePositionIncrements(boolean)
   */
  @Override
  public boolean getEnablePositionIncrements() {
    Boolean enablePositionsIncrements =
        getQueryConfigHandler().get(ConfigurationKeys.ENABLE_POSITION_INCREMENTS);

    if (enablePositionsIncrements == null) {
      return false;

    } else {
      return enablePositionsIncrements;
    }
  }

  @Override
  public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) {
    getQueryConfigHandler().set(ConfigurationKeys.MULTI_TERM_REWRITE_METHOD, method);
  }

  /**
   * @see #setMultiTermRewriteMethod(org.apache.lucene.search.MultiTermQuery.RewriteMethod)
   */
  @Override
  public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() {
    return getQueryConfigHandler().get(ConfigurationKeys.MULTI_TERM_REWRITE_METHOD);
  }

  /**
   * Set the fields a query should be expanded to when the field is null
   *
   * @param fields the fields used to expand the query
   */
  public void setMultiFields(CharSequence[] fields) {

    if (fields == null) {
      fields = new CharSequence[0];
    }

    getQueryConfigHandler().set(ConfigurationKeys.MULTI_FIELDS, fields);
  }

  /**
   * Returns the fields used to expand the query when the field for a certain query is null
   * 
   *
   * @return the fields used to expand the query
   */
  public CharSequence[] getMultiFields() {
    return getQueryConfigHandler().get(ConfigurationKeys.MULTI_FIELDS);
  }

  /**
   * Set the prefix length for fuzzy queries. Default is 0.
   *
   * @param fuzzyPrefixLength The fuzzyPrefixLength to set.
   */
  @Override
  public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
    QueryConfigHandler config = getQueryConfigHandler();
    FuzzyConfig fuzzyConfig = config.get(ConfigurationKeys.FUZZY_CONFIG);

    if (fuzzyConfig == null) {
      fuzzyConfig = new FuzzyConfig();
      config.set(ConfigurationKeys.FUZZY_CONFIG, fuzzyConfig);
    }

    fuzzyConfig.setPrefixLength(fuzzyPrefixLength);
  }

  public void setPointsConfigMap(Map pointsConfigMap) {
    getQueryConfigHandler().set(ConfigurationKeys.POINTS_CONFIG_MAP, pointsConfigMap);
  }

  public Map getPointsConfigMap() {
    return getQueryConfigHandler().get(ConfigurationKeys.POINTS_CONFIG_MAP);
  }

  /** Set locale used by date range parsing. */
  @Override
  public void setLocale(Locale locale) {
    getQueryConfigHandler().set(ConfigurationKeys.LOCALE, locale);
  }

  /** Returns current locale, allowing access by subclasses. */
  @Override
  public Locale getLocale() {
    return getQueryConfigHandler().get(ConfigurationKeys.LOCALE);
  }

  @Override
  public void setTimeZone(TimeZone timeZone) {
    getQueryConfigHandler().set(ConfigurationKeys.TIMEZONE, timeZone);
  }

  @Override
  public TimeZone getTimeZone() {
    return getQueryConfigHandler().get(ConfigurationKeys.TIMEZONE);
  }

  /**
   * Sets the default slop for phrases. If zero, then exact phrase matches are required. Default
   * value is zero.
   */
  @Override
  public void setPhraseSlop(int defaultPhraseSlop) {
    getQueryConfigHandler().set(ConfigurationKeys.PHRASE_SLOP, defaultPhraseSlop);
  }

  public void setAnalyzer(Analyzer analyzer) {
    getQueryConfigHandler().set(ConfigurationKeys.ANALYZER, analyzer);
  }

  @Override
  public Analyzer getAnalyzer() {
    return getQueryConfigHandler().get(ConfigurationKeys.ANALYZER);
  }

  /**
   * @see #setAllowLeadingWildcard(boolean)
   */
  @Override
  public boolean getAllowLeadingWildcard() {
    Boolean allowLeadingWildcard =
        getQueryConfigHandler().get(ConfigurationKeys.ALLOW_LEADING_WILDCARD);

    if (allowLeadingWildcard == null) {
      return false;

    } else {
      return allowLeadingWildcard;
    }
  }

  /** Get the minimal similarity for fuzzy queries. */
  @Override
  public float getFuzzyMinSim() {
    FuzzyConfig fuzzyConfig = getQueryConfigHandler().get(ConfigurationKeys.FUZZY_CONFIG);

    if (fuzzyConfig == null) {
      return FuzzyQuery.defaultMaxEdits;
    } else {
      return fuzzyConfig.getMinSimilarity();
    }
  }

  /**
   * Get the prefix length for fuzzy queries.
   *
   * @return Returns the fuzzyPrefixLength.
   */
  @Override
  public int getFuzzyPrefixLength() {
    FuzzyConfig fuzzyConfig = getQueryConfigHandler().get(ConfigurationKeys.FUZZY_CONFIG);

    if (fuzzyConfig == null) {
      return FuzzyQuery.defaultPrefixLength;
    } else {
      return fuzzyConfig.getPrefixLength();
    }
  }

  /** Gets the default slop for phrases. */
  @Override
  public int getPhraseSlop() {
    Integer phraseSlop = getQueryConfigHandler().get(ConfigurationKeys.PHRASE_SLOP);

    if (phraseSlop == null) {
      return 0;

    } else {
      return phraseSlop;
    }
  }

  /**
   * Set the minimum similarity for fuzzy queries. Default is defined on {@link
   * FuzzyQuery#defaultMaxEdits}.
   */
  @Override
  public void setFuzzyMinSim(float fuzzyMinSim) {
    QueryConfigHandler config = getQueryConfigHandler();
    FuzzyConfig fuzzyConfig = config.get(ConfigurationKeys.FUZZY_CONFIG);

    if (fuzzyConfig == null) {
      fuzzyConfig = new FuzzyConfig();
      config.set(ConfigurationKeys.FUZZY_CONFIG, fuzzyConfig);
    }

    fuzzyConfig.setMinSimilarity(fuzzyMinSim);
  }

  /**
   * Sets the boost used for each field.
   *
   * @param boosts a collection that maps a field to its boost
   */
  public void setFieldsBoost(Map boosts) {
    getQueryConfigHandler().set(ConfigurationKeys.FIELD_BOOST_MAP, boosts);
  }

  /**
   * Returns the field to boost map used to set boost for each field.
   *
   * @return the field to boost map
   */
  public Map getFieldsBoost() {
    return getQueryConfigHandler().get(ConfigurationKeys.FIELD_BOOST_MAP);
  }

  /**
   * Sets the default {@link Resolution} used for certain field when no {@link Resolution} is
   * defined for this field.
   *
   * @param dateResolution the default {@link Resolution}
   */
  @Override
  public void setDateResolution(DateTools.Resolution dateResolution) {
    getQueryConfigHandler().set(ConfigurationKeys.DATE_RESOLUTION, dateResolution);
  }

  /**
   * Returns the default {@link Resolution} used for certain field when no {@link Resolution} is
   * defined for this field.
   *
   * @return the default {@link Resolution}
   */
  public DateTools.Resolution getDateResolution() {
    return getQueryConfigHandler().get(ConfigurationKeys.DATE_RESOLUTION);
  }

  /**
   * Returns the field to {@link Resolution} map used to normalize each date field.
   *
   * @return the field to {@link Resolution} map
   */
  public Map getDateResolutionMap() {
    return getQueryConfigHandler().get(ConfigurationKeys.FIELD_DATE_RESOLUTION_MAP);
  }

  /**
   * Sets the {@link Resolution} used for each field
   *
   * @param dateRes a collection that maps a field to its {@link Resolution}
   */
  public void setDateResolutionMap(Map dateRes) {
    getQueryConfigHandler().set(ConfigurationKeys.FIELD_DATE_RESOLUTION_MAP, dateRes);
  }
}