All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.queryparser.classic.QueryParser.jj Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

options {
  STATIC=false;
  JAVA_UNICODE_ESCAPE=true;
  USER_CHAR_STREAM=true;
}

PARSER_BEGIN(QueryParser)

package org.apache.lucene.queryparser.classic;

import java.io.StringReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermRangeQuery;

/**
 * This class is generated by JavaCC.  The most important method is
 * {@link #parse(String)}.
 *
 * The syntax for query strings is as follows:
 * A Query is a series of clauses.
 * A clause may be prefixed by:
 * 
    *
  • a plus (+) or a minus (-) sign, indicating * that the clause is required or prohibited respectively; or *
  • a term followed by a colon, indicating the field to be searched. * This enables one to construct queries which search multiple fields. *
* * A clause may be either: *
    *
  • a term, indicating all the documents that contain this term; or *
  • a nested query, enclosed in parentheses. Note that this may be used * with a +/- prefix to require any of a set of * terms. *
* * Thus, in BNF, the query grammar is: *
 *   Query  ::= ( Clause )*
 *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
 * 
* *

* Examples of appropriately formatted queries can be found in the query syntax * documentation. *

* *

* In {@link TermRangeQuery}s, QueryParser tries to detect date values, e.g. * date:[6/1/2005 TO 6/4/2005] produces a range query that searches * for "date" fields between 2005-06-01 and 2005-06-04. Note that the format * of the accepted input depends on {@link #setLocale(Locale) the locale}. * A {@link org.apache.lucene.document.DateTools.Resolution} has to be set, * if you want to use {@link DateTools} for date conversion. *

*

* The date resolution that shall be used for RangeQueries can be set * using {@link #setDateResolution(DateTools.Resolution)} * or {@link #setDateResolution(String, DateTools.Resolution)}. The former * sets the default date resolution for all fields, whereas the latter can * be used to set field specific date resolutions. Field specific date * resolutions take, if set, precedence over the default date resolution. *

*

* If you don't use {@link DateTools} in your index, you can create your own * query parser that inherits QueryParser and overwrites * {@link #getRangeQuery(String, String, String, boolean, boolean)} to * use a different method for date conversion. *

* *

Note that QueryParser is not thread-safe.

* *

NOTE: there is a new QueryParser in contrib, which matches * the same syntax as this class, but is more modular, * enabling substantial customization to how a query is created. */ public class QueryParser extends QueryParserBase { /** The default operator for parsing queries. * Use {@link QueryParserBase#setDefaultOperator} to change it. */ static public enum Operator { OR, AND } /** default split on whitespace behavior */ public static final boolean DEFAULT_SPLIT_ON_WHITESPACE = false; /** Create a query parser. * @param f the default field for query terms. * @param a used to find terms in the query text. */ public QueryParser(String f, Analyzer a) { this(new FastCharStream(new StringReader(""))); init(f, a); } /** * Set to true if phrase queries will be automatically generated * when the analyzer returns more than one term from whitespace * delimited text. * NOTE: this behavior may not be suitable for all languages. *

* Set to false if phrase queries should only be generated when * surrounded by double quotes. *

* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true * is disallowed. See LUCENE-7533. */ @Override public void setAutoGeneratePhraseQueries(boolean value) { if (splitOnWhitespace == false && value == true) { throw new IllegalArgumentException ("setAutoGeneratePhraseQueries(true) is disallowed when getSplitOnWhitespace() == false"); } this.autoGeneratePhraseQueries = value; } /** * @see #setSplitOnWhitespace(boolean) */ public boolean getSplitOnWhitespace() { return splitOnWhitespace; } /** * Whether query text should be split on whitespace prior to analysis. * Default is {@value #DEFAULT_SPLIT_ON_WHITESPACE}. *

* The combination splitOnWhitespace=false and autoGeneratePhraseQueries=true * is disallowed. See LUCENE-7533. */ public void setSplitOnWhitespace(boolean splitOnWhitespace) { if (splitOnWhitespace == false && getAutoGeneratePhraseQueries() == true) { throw new IllegalArgumentException ("setSplitOnWhitespace(false) is disallowed when getAutoGeneratePhraseQueries() == true"); } this.splitOnWhitespace = splitOnWhitespace; } private boolean splitOnWhitespace = DEFAULT_SPLIT_ON_WHITESPACE; private static Set disallowedPostMultiTerm = new HashSet(Arrays.asList(COLON, STAR, FUZZY_SLOP, CARAT, AND, OR)); private static boolean allowedPostMultiTerm(int tokenKind) { return disallowedPostMultiTerm.contains(tokenKind) == false; } } PARSER_END(QueryParser) /* ***************** */ /* Token Definitions */ /* ***************** */ <*> TOKEN : { <#_NUM_CHAR: ["0"-"9"] > | <#_ESCAPED_CHAR: "\\" ~[] > // every character that follows a backslash is considered as an escaped character | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "\u3000", "+", "-", "!", "(", ")", ":", "^", "[", "]", "\"", "{", "}", "~", "*", "?", "\\", "/" ] | <_ESCAPED_CHAR> ) > | <#_TERM_CHAR: ( <_TERM_START_CHAR> | "-" | "+" ) > | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" | "\u3000") > | <#_QUOTED_CHAR: ( ~[ "\"", "\\" ] | <_ESCAPED_CHAR> ) > } SKIP : { < <_WHITESPACE>> } TOKEN : { | | | | | > | | | | | : Boost | )* "\""> | (<_TERM_CHAR>)* > | )+ (( "." (<_NUM_CHAR>)+ )? (<_TERM_CHAR>)*) | (<_TERM_CHAR>)*) > | (<_TERM_CHAR>)* "*" ) > | | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* > | | : Range | : Range } TOKEN : { )+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT } TOKEN : { | : DEFAULT | : DEFAULT | | } // * Query ::= ( Clause )* // * Clause ::= ["+", "-"] [ ":"] ( | "(" Query ")" ) int Conjunction() : { int ret = CONJ_NONE; } { [ { ret = CONJ_AND; } | { ret = CONJ_OR; } ] { return ret; } } int Modifiers() : { int ret = MOD_NONE; } { [ { ret = MOD_REQ; } | { ret = MOD_NOT; } | { ret = MOD_NOT; } ] { return ret; } } // This makes sure that there is no garbage after the query string Query TopLevelQuery(String field) : { Query q; } { q=Query(field) { return q; } } Query Query(String field) : { List clauses = new ArrayList(); Query q, firstQuery=null; int conj, mods; } { ( LOOKAHEAD(2) firstQuery=MultiTerm(field, clauses) | mods=Modifiers() q=Clause(field) { addClause(clauses, CONJ_NONE, mods, q); if (mods == MOD_NONE) { firstQuery = q; } } ) ( LOOKAHEAD(2) MultiTerm(field, clauses) | conj=Conjunction() mods=Modifiers() q=Clause(field) { addClause(clauses, conj, mods, q); } )* { if (clauses.size() == 1 && firstQuery != null) { return firstQuery; } else { return getBooleanQuery(clauses); } } } Query Clause(String field) : { Query q; Token fieldToken=null, boost=null; } { [ LOOKAHEAD(2) ( fieldToken= {field=discardEscapeChar(fieldToken.image);} | {field="*";} ) ] ( q=Term(field) | q=Query(field) [ boost= ] ) { return handleBoost(q, boost); } } Query Term(String field) : { Token term, boost=null, fuzzySlop=null, goop1, goop2; boolean prefix = false; boolean wildcard = false; boolean fuzzy = false; boolean regexp = false; boolean startInc=false; boolean endInc=false; Query q; } { ( ( term= | term= { wildcard=true; } | term= { prefix=true; } | term= { wildcard=true; } | term= { regexp=true; } | term= | term= { term.image = term.image.substring(0,1); } ) [ boost= [ fuzzySlop= { fuzzy=true; } ] | fuzzySlop= { fuzzy=true; } [ boost= ] ] { q = handleBareTokenQuery(field, term, fuzzySlop, prefix, wildcard, fuzzy, regexp); } | ( { startInc = true; } | ) ( goop1= | goop1= | goop1= ) ( ) ( goop2= | goop2= | goop2= ) ( { endInc = true; } | ) [ boost= ] { boolean startOpen=false; boolean endOpen=false; if (goop1.kind == RANGE_QUOTED) { goop1.image = goop1.image.substring(1, goop1.image.length()-1); } else if ("*".equals(goop1.image)) { startOpen=true; } if (goop2.kind == RANGE_QUOTED) { goop2.image = goop2.image.substring(1, goop2.image.length()-1); } else if ("*".equals(goop2.image)) { endOpen=true; } q = getRangeQuery(field, startOpen ? null : discardEscapeChar(goop1.image), endOpen ? null : discardEscapeChar(goop2.image), startInc, endInc); } | term= [ boost= [ fuzzySlop= { fuzzy=true; } ] | fuzzySlop= { fuzzy=true; } [ boost= ] ] { q = handleQuotedTerm(field, term, fuzzySlop); } ) { return handleBoost(q, boost); } } /** Returns the first query if splitOnWhitespace=true or otherwise the entire produced query */ Query MultiTerm(String field, List clauses) : { Token text, whitespace, followingText; Query firstQuery = null; } { text= { if (splitOnWhitespace) { firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false); addClause(clauses, CONJ_NONE, MOD_NONE, firstQuery); } } // Both lookaheads are required; the first lookahead vets the first following term and the second lookahead vets the rest LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) }) ( LOOKAHEAD({ getToken(1).kind == TERM && allowedPostMultiTerm(getToken(2).kind) }) followingText= { if (splitOnWhitespace) { Query q = getFieldQuery(field, discardEscapeChar(followingText.image), false); addClause(clauses, CONJ_NONE, MOD_NONE, q); } else { // build up the text to send to analysis text.image += " " + followingText.image; } } )+ { if (splitOnWhitespace == false) { firstQuery = getFieldQuery(field, discardEscapeChar(text.image), false); addMultiTermClauses(clauses, firstQuery); } return firstQuery; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy