org.apache.lucene.queryparser.surround.parser.QueryParser.jj Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
/* Surround query language parser */
/* Query language operators: OR, AND, NOT, W, N, (, ), ^, *, ?, " and comma */
options {
STATIC=false;
JAVA_UNICODE_ESCAPE=true;
USER_CHAR_STREAM=true;
}
PARSER_BEGIN(QueryParser)
package org.apache.lucene.queryparser.surround.parser;
import java.util.ArrayList;
import java.util.List;
import java.io.StringReader;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.queryparser.surround.query.SrndQuery;
import org.apache.lucene.queryparser.surround.query.FieldsQuery;
import org.apache.lucene.queryparser.surround.query.OrQuery;
import org.apache.lucene.queryparser.surround.query.AndQuery;
import org.apache.lucene.queryparser.surround.query.NotQuery;
import org.apache.lucene.queryparser.surround.query.DistanceQuery;
import org.apache.lucene.queryparser.surround.query.SrndTermQuery;
import org.apache.lucene.queryparser.surround.query.SrndPrefixQuery;
import org.apache.lucene.queryparser.surround.query.SrndTruncQuery;
/**
* This class is generated by JavaCC. The only method that clients should need
* to call is {@link #parse parse()}.
*
* This parser generates queries that make use of position information
* (Span queries). It provides positional operators (w
and
* n
) that accept a numeric distance, as well as boolean
* operators (and
, or
, and not
,
* wildcards (*
and ?
), quoting (with
* "
), and boosting (via ^
).
* The operators (W, N, AND, OR, NOT) can be expressed lower-cased or
* upper-cased, and the non-unary operators (everything but NOT) support
* both infix (a AND b AND c)
and prefix AND(a, b,
* c)
notation.
* The W and N operators express a positional relationship among their
* operands. W is ordered, and N is unordered. The distance is 1 by
* default, meaning the operands are adjacent, or may be provided as a
* prefix from 2-99. So, for example, 3W(a, b) means that terms a and b
* must appear within three positions of each other, or in other words, up
* to two terms may appear between a and b.
*/
public class QueryParser {
static final int MINIMUM_PREFIX_LENGTH = 3;
static final int MINIMUM_CHARS_IN_TRUNC = 3;
static final String TRUNCATION_ERROR_MESSAGE = "Too unrestrictive truncation: ";
static final String BOOST_ERROR_MESSAGE = "Cannot handle boost value: ";
/* CHECKME: These should be the same as for the tokenizer. How? */
static final char TRUNCATOR = '*';
static final char ANY_CHAR = '?';
static final char FIELD_OPERATOR = ':';
static public SrndQuery parse(String query) throws ParseException {
QueryParser parser = new QueryParser();
return parser.parse2(query);
}
public QueryParser() {
this(new FastCharStream(new StringReader("")));
}
public SrndQuery parse2(String query) throws ParseException {
ReInit(new FastCharStream(new StringReader(query)));
try {
return TopSrndQuery();
} catch (TokenMgrError tme) {
throw new ParseException(tme.getMessage());
}
}
protected SrndQuery getFieldsQuery(
SrndQuery q, ArrayList fieldNames) {
/* FIXME: check acceptable subquery: at least one subquery should not be
* a fields query.
*/
return new FieldsQuery(q, fieldNames, FIELD_OPERATOR);
}
protected SrndQuery getOrQuery(List queries, boolean infix, Token orToken) {
return new OrQuery(queries, infix, orToken.image);
}
protected SrndQuery getAndQuery(List queries, boolean infix, Token andToken) {
return new AndQuery( queries, infix, andToken.image);
}
protected SrndQuery getNotQuery(List queries, Token notToken) {
return new NotQuery( queries, notToken.image);
}
protected static int getOpDistance(String distanceOp) {
/* W, 2W, 3W etc -> 1, 2 3, etc. Same for N, 2N ... */
return distanceOp.length() == 1
? 1
: Integer.parseInt( distanceOp.substring( 0, distanceOp.length() - 1));
}
protected static void checkDistanceSubQueries(DistanceQuery distq, String opName)
throws ParseException {
String m = distq.distanceSubQueryNotAllowed();
if (m != null) {
throw new ParseException("Operator " + opName + ": " + m);
}
}
protected SrndQuery getDistanceQuery(
List queries,
boolean infix,
Token dToken,
boolean ordered) throws ParseException {
DistanceQuery dq = new DistanceQuery(queries,
infix,
getOpDistance(dToken.image),
dToken.image,
ordered);
checkDistanceSubQueries(dq, dToken.image);
return dq;
}
protected SrndQuery getTermQuery(
String term, boolean quoted) {
return new SrndTermQuery(term, quoted);
}
protected boolean allowedSuffix(String suffixed) {
return (suffixed.length() - 1) >= MINIMUM_PREFIX_LENGTH;
}
protected SrndQuery getPrefixQuery(
String prefix, boolean quoted) {
return new SrndPrefixQuery(prefix, quoted, TRUNCATOR);
}
protected boolean allowedTruncation(String truncated) {
/* At least 3 normal characters needed. */
int nrNormalChars = 0;
for (int i = 0; i < truncated.length(); i++) {
char c = truncated.charAt(i);
if ((c != TRUNCATOR) && (c != ANY_CHAR)) {
nrNormalChars++;
}
}
return nrNormalChars >= MINIMUM_CHARS_IN_TRUNC;
}
protected SrndQuery getTruncQuery(String truncated) {
return new SrndTruncQuery(truncated, TRUNCATOR, ANY_CHAR);
}
}
PARSER_END(QueryParser)
/* ***************** */
/* Token Definitions */
/* ***************** */
<*> TOKEN : {
<#_NUM_CHAR: ["0"-"9"] >
| <#_TERM_CHAR: /* everything except whitespace and operators */
( ~[ " ", "\t", "\n", "\r",
",", "?", "*", "(", ")", ":", "^", "\""]
) >
| <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" ) >
| <#_STAR: "*" > /* term truncation */
| <#_ONE_CHAR: "?" > /* precisely one character in a term */
/* 2..99 prefix for distance operators */
| <#_DISTOP_NUM: ((["2"-"9"](["0"-"9"])?) | ("1" ["0"-"9"]))>
}
SKIP : {
< <_WHITESPACE>>
}
/* Operator tokens (in increasing order of precedence): */
TOKEN :
{
|
|
| )? ("W"|"w")>
| )? ("N"|"n")>
/* These are excluded in _TERM_CHAR: */
|
|
|
|
| : Boost
/* Literal non empty term between single quotes,
* escape quoted quote or backslash by backslash.
* Evt. truncated.
*/
| >
|
| )+ <_STAR>>
| )+
(<_STAR> | <_ONE_CHAR> )+ /* at least one * or ? */
(<_TERM_CHAR> | <_STAR> | <_ONE_CHAR> )*
>
| )+>
}
TOKEN : {
)+ ( "." (<_NUM_CHAR>)+ )?> : DEFAULT
}
SrndQuery TopSrndQuery() : {
SrndQuery q;
}{
q = FieldsQuery()
{return q;}
}
SrndQuery FieldsQuery() : {
SrndQuery q;
ArrayList fieldNames;
}{
fieldNames = OptionalFields()
q = OrQuery()
{return (fieldNames == null) ? q : getFieldsQuery(q, fieldNames);}
}
ArrayList OptionalFields() : {
Token fieldName;
ArrayList fieldNames = null;
}{
( LOOKAHEAD(2) // to the colon
fieldName =
{
if (fieldNames == null) {
fieldNames = new ArrayList();
}
fieldNames.add(fieldName.image);
}
)*
{return fieldNames;}
}
SrndQuery OrQuery() : {
SrndQuery q;
ArrayList queries = null;
Token oprt = null;
}{
q = AndQuery()
( oprt = { /* keep only last used operator */
if (queries == null) {
queries = new ArrayList();
queries.add(q);
}
}
q = AndQuery() {
queries.add(q);
}
)*
{return (queries == null) ? q : getOrQuery(queries, true /* infix */, oprt);}
}
SrndQuery AndQuery() : {
SrndQuery q;
ArrayList queries = null;
Token oprt = null;
}{
q = NotQuery()
( oprt = { /* keep only last used operator */
if (queries == null) {
queries = new ArrayList();
queries.add(q);
}
}
q = NotQuery() {
queries.add(q);
}
)*
{return (queries == null) ? q : getAndQuery(queries, true /* infix */, oprt);}
}
SrndQuery NotQuery() : {
SrndQuery q;
ArrayList queries = null;
Token oprt = null;
}{
q = NQuery()
( oprt = { /* keep only last used operator */
if (queries == null) {
queries = new ArrayList();
queries.add(q);
}
}
q = NQuery() {
queries.add(q);
}
)*
{return (queries == null) ? q : getNotQuery(queries, oprt);}
}
SrndQuery NQuery() : {
SrndQuery q;
ArrayList queries;
Token dt;
}{
q = WQuery()
( dt = {
queries = new ArrayList();
queries.add(q); /* left associative */
}
q = WQuery() {
queries.add(q);
q = getDistanceQuery(queries, true /* infix */, dt, false /* not ordered */);
}
)*
{return q;}
}
SrndQuery WQuery() : {
SrndQuery q;
ArrayList queries;
Token wt;
}{
q = PrimaryQuery()
( wt = {
queries = new ArrayList();
queries.add(q); /* left associative */
}
q = PrimaryQuery() {
queries.add(q);
q = getDistanceQuery(queries, true /* infix */, wt, true /* ordered */);
}
)*
{return q;}
}
SrndQuery PrimaryQuery() : { /* bracketed weighted query or weighted term */
SrndQuery q;
}{
( q = FieldsQuery()
| q = PrefixOperatorQuery()
| q = SimpleTerm()
)
OptionalWeights(q)
{return q;}
}
SrndQuery PrefixOperatorQuery() : {
Token oprt;
List queries;
}{
( oprt = /* prefix OR */
queries = FieldsQueryList()
{return getOrQuery(queries, false /* not infix */, oprt);}
| oprt = /* prefix AND */
queries = FieldsQueryList()
{return getAndQuery(queries, false /* not infix */, oprt);}
| oprt = /* prefix N */
queries = FieldsQueryList()
{return getDistanceQuery(queries, false /* not infix */, oprt, false /* not ordered */);}
| oprt = /* prefix W */
queries = FieldsQueryList()
{return getDistanceQuery(queries, false /* not infix */, oprt, true /* ordered */);}
)
}
List FieldsQueryList() : {
SrndQuery q;
ArrayList queries = new ArrayList();
}{
q = FieldsQuery() {queries.add(q);}
( q = FieldsQuery() {queries.add(q);})+
{return queries;}
}
SrndQuery SimpleTerm() : {
Token term;
}{
( term=
{return getTermQuery(term.image, false /* not quoted */);}
| term=
{return getTermQuery(term.image.substring(1, term.image.length()-1), true /* quoted */);}
| term= { /* ending in * */
if (! allowedSuffix(term.image)) {
throw new ParseException(TRUNCATION_ERROR_MESSAGE + term.image);
}
return getPrefixQuery(term.image.substring(0, term.image.length()-1), false /* not quoted */);
}
| term= { /* with at least one * or ? */
if (! allowedTruncation(term.image)) {
throw new ParseException(TRUNCATION_ERROR_MESSAGE + term.image);
}
return getTruncQuery(term.image);
}
| term= { /* eg. "9b-b,m"* */
if ((term.image.length() - 3) < MINIMUM_PREFIX_LENGTH) {
throw new ParseException(TRUNCATION_ERROR_MESSAGE + term.image);
}
return getPrefixQuery(term.image.substring(1, term.image.length()-2), true /* quoted */);
}
)
}
void OptionalWeights(SrndQuery q) : {
Token weight=null;
}{
( weight= {
float f;
try {
f = Float.parseFloat(weight.image);
} catch (Exception floatExc) {
throw new ParseException(BOOST_ERROR_MESSAGE + weight.image + " (" + floatExc + ")");
}
if (f <= 0.0) {
throw new ParseException(BOOST_ERROR_MESSAGE + weight.image);
}
q.setWeight(f * q.getWeight()); /* left associative, fwiw */
}
)*
}