All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.queryparser.surround.parser.QueryParser.jj Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* Surround query language parser */

/* Query language operators: OR, AND, NOT, W, N, (, ), ^, *, ?, " and comma */


options {
  STATIC=false;
  JAVA_UNICODE_ESCAPE=true;
  USER_CHAR_STREAM=true;
}

PARSER_BEGIN(QueryParser)

package org.apache.lucene.queryparser.surround.parser;

import java.util.ArrayList;
import java.util.List;
import java.io.StringReader;


import org.apache.lucene.analysis.TokenStream;

import org.apache.lucene.queryparser.surround.query.SrndQuery;
import org.apache.lucene.queryparser.surround.query.FieldsQuery;
import org.apache.lucene.queryparser.surround.query.OrQuery;
import org.apache.lucene.queryparser.surround.query.AndQuery;
import org.apache.lucene.queryparser.surround.query.NotQuery;
import org.apache.lucene.queryparser.surround.query.DistanceQuery;
import org.apache.lucene.queryparser.surround.query.SrndTermQuery;
import org.apache.lucene.queryparser.surround.query.SrndPrefixQuery;
import org.apache.lucene.queryparser.surround.query.SrndTruncQuery;

/**
 * This class is generated by JavaCC.  The only method that clients should need
 * to call is {@link #parse parse()}.
 *

 * 

This parser generates queries that make use of position information * (Span queries). It provides positional operators (w and * n) that accept a numeric distance, as well as boolean * operators (and, or, and not, * wildcards (* and ?), quoting (with * "), and boosting (via ^).

*

The operators (W, N, AND, OR, NOT) can be expressed lower-cased or * upper-cased, and the non-unary operators (everything but NOT) support * both infix (a AND b AND c) and prefix AND(a, b, * c) notation.

*

The W and N operators express a positional relationship among their * operands. W is ordered, and N is unordered. The distance is 1 by * default, meaning the operands are adjacent, or may be provided as a * prefix from 2-99. So, for example, 3W(a, b) means that terms a and b * must appear within three positions of each other, or in other words, up * to two terms may appear between a and b.

*/ public class QueryParser { static final int MINIMUM_PREFIX_LENGTH = 3; static final int MINIMUM_CHARS_IN_TRUNC = 3; static final String TRUNCATION_ERROR_MESSAGE = "Too unrestrictive truncation: "; static final String BOOST_ERROR_MESSAGE = "Cannot handle boost value: "; /* CHECKME: These should be the same as for the tokenizer. How? */ static final char TRUNCATOR = '*'; static final char ANY_CHAR = '?'; static final char FIELD_OPERATOR = ':'; static public SrndQuery parse(String query) throws ParseException { QueryParser parser = new QueryParser(); return parser.parse2(query); } public QueryParser() { this(new FastCharStream(new StringReader(""))); } public SrndQuery parse2(String query) throws ParseException { ReInit(new FastCharStream(new StringReader(query))); try { return TopSrndQuery(); } catch (TokenMgrError tme) { throw new ParseException(tme.getMessage()); } } protected SrndQuery getFieldsQuery( SrndQuery q, ArrayList fieldNames) { /* FIXME: check acceptable subquery: at least one subquery should not be * a fields query. */ return new FieldsQuery(q, fieldNames, FIELD_OPERATOR); } protected SrndQuery getOrQuery(List queries, boolean infix, Token orToken) { return new OrQuery(queries, infix, orToken.image); } protected SrndQuery getAndQuery(List queries, boolean infix, Token andToken) { return new AndQuery( queries, infix, andToken.image); } protected SrndQuery getNotQuery(List queries, Token notToken) { return new NotQuery( queries, notToken.image); } protected static int getOpDistance(String distanceOp) { /* W, 2W, 3W etc -> 1, 2 3, etc. Same for N, 2N ... */ return distanceOp.length() == 1 ? 1 : Integer.parseInt( distanceOp.substring( 0, distanceOp.length() - 1)); } protected static void checkDistanceSubQueries(DistanceQuery distq, String opName) throws ParseException { String m = distq.distanceSubQueryNotAllowed(); if (m != null) { throw new ParseException("Operator " + opName + ": " + m); } } protected SrndQuery getDistanceQuery( List queries, boolean infix, Token dToken, boolean ordered) throws ParseException { DistanceQuery dq = new DistanceQuery(queries, infix, getOpDistance(dToken.image), dToken.image, ordered); checkDistanceSubQueries(dq, dToken.image); return dq; } protected SrndQuery getTermQuery( String term, boolean quoted) { return new SrndTermQuery(term, quoted); } protected boolean allowedSuffix(String suffixed) { return (suffixed.length() - 1) >= MINIMUM_PREFIX_LENGTH; } protected SrndQuery getPrefixQuery( String prefix, boolean quoted) { return new SrndPrefixQuery(prefix, quoted, TRUNCATOR); } protected boolean allowedTruncation(String truncated) { /* At least 3 normal characters needed. */ int nrNormalChars = 0; for (int i = 0; i < truncated.length(); i++) { char c = truncated.charAt(i); if ((c != TRUNCATOR) && (c != ANY_CHAR)) { nrNormalChars++; } } return nrNormalChars >= MINIMUM_CHARS_IN_TRUNC; } protected SrndQuery getTruncQuery(String truncated) { return new SrndTruncQuery(truncated, TRUNCATOR, ANY_CHAR); } } PARSER_END(QueryParser) /* ***************** */ /* Token Definitions */ /* ***************** */ <*> TOKEN : { <#_NUM_CHAR: ["0"-"9"] > | <#_TERM_CHAR: /* everything except whitespace and operators */ ( ~[ " ", "\t", "\n", "\r", ",", "?", "*", "(", ")", ":", "^", "\""] ) > | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r" ) > | <#_STAR: "*" > /* term truncation */ | <#_ONE_CHAR: "?" > /* precisely one character in a term */ /* 2..99 prefix for distance operators */ | <#_DISTOP_NUM: ((["2"-"9"](["0"-"9"])?) | ("1" ["0"-"9"]))> } SKIP : { < <_WHITESPACE>> } /* Operator tokens (in increasing order of precedence): */ TOKEN : { | | | )? ("W"|"w")> | )? ("N"|"n")> /* These are excluded in _TERM_CHAR: */ | | | | | : Boost /* Literal non empty term between single quotes, * escape quoted quote or backslash by backslash. * Evt. truncated. */ | > | | )+ <_STAR>> | )+ (<_STAR> | <_ONE_CHAR> )+ /* at least one * or ? */ (<_TERM_CHAR> | <_STAR> | <_ONE_CHAR> )* > | )+> } TOKEN : { )+ ( "." (<_NUM_CHAR>)+ )?> : DEFAULT } SrndQuery TopSrndQuery() : { SrndQuery q; }{ q = FieldsQuery() {return q;} } SrndQuery FieldsQuery() : { SrndQuery q; ArrayList fieldNames; }{ fieldNames = OptionalFields() q = OrQuery() {return (fieldNames == null) ? q : getFieldsQuery(q, fieldNames);} } ArrayList OptionalFields() : { Token fieldName; ArrayList fieldNames = null; }{ ( LOOKAHEAD(2) // to the colon fieldName = { if (fieldNames == null) { fieldNames = new ArrayList(); } fieldNames.add(fieldName.image); } )* {return fieldNames;} } SrndQuery OrQuery() : { SrndQuery q; ArrayList queries = null; Token oprt = null; }{ q = AndQuery() ( oprt = { /* keep only last used operator */ if (queries == null) { queries = new ArrayList(); queries.add(q); } } q = AndQuery() { queries.add(q); } )* {return (queries == null) ? q : getOrQuery(queries, true /* infix */, oprt);} } SrndQuery AndQuery() : { SrndQuery q; ArrayList queries = null; Token oprt = null; }{ q = NotQuery() ( oprt = { /* keep only last used operator */ if (queries == null) { queries = new ArrayList(); queries.add(q); } } q = NotQuery() { queries.add(q); } )* {return (queries == null) ? q : getAndQuery(queries, true /* infix */, oprt);} } SrndQuery NotQuery() : { SrndQuery q; ArrayList queries = null; Token oprt = null; }{ q = NQuery() ( oprt = { /* keep only last used operator */ if (queries == null) { queries = new ArrayList(); queries.add(q); } } q = NQuery() { queries.add(q); } )* {return (queries == null) ? q : getNotQuery(queries, oprt);} } SrndQuery NQuery() : { SrndQuery q; ArrayList queries; Token dt; }{ q = WQuery() ( dt = { queries = new ArrayList(); queries.add(q); /* left associative */ } q = WQuery() { queries.add(q); q = getDistanceQuery(queries, true /* infix */, dt, false /* not ordered */); } )* {return q;} } SrndQuery WQuery() : { SrndQuery q; ArrayList queries; Token wt; }{ q = PrimaryQuery() ( wt = { queries = new ArrayList(); queries.add(q); /* left associative */ } q = PrimaryQuery() { queries.add(q); q = getDistanceQuery(queries, true /* infix */, wt, true /* ordered */); } )* {return q;} } SrndQuery PrimaryQuery() : { /* bracketed weighted query or weighted term */ SrndQuery q; }{ ( q = FieldsQuery() | q = PrefixOperatorQuery() | q = SimpleTerm() ) OptionalWeights(q) {return q;} } SrndQuery PrefixOperatorQuery() : { Token oprt; List queries; }{ ( oprt = /* prefix OR */ queries = FieldsQueryList() {return getOrQuery(queries, false /* not infix */, oprt);} | oprt = /* prefix AND */ queries = FieldsQueryList() {return getAndQuery(queries, false /* not infix */, oprt);} | oprt = /* prefix N */ queries = FieldsQueryList() {return getDistanceQuery(queries, false /* not infix */, oprt, false /* not ordered */);} | oprt = /* prefix W */ queries = FieldsQueryList() {return getDistanceQuery(queries, false /* not infix */, oprt, true /* ordered */);} ) } List FieldsQueryList() : { SrndQuery q; ArrayList queries = new ArrayList(); }{ q = FieldsQuery() {queries.add(q);} ( q = FieldsQuery() {queries.add(q);})+ {return queries;} } SrndQuery SimpleTerm() : { Token term; }{ ( term= {return getTermQuery(term.image, false /* not quoted */);} | term= {return getTermQuery(term.image.substring(1, term.image.length()-1), true /* quoted */);} | term= { /* ending in * */ if (! allowedSuffix(term.image)) { throw new ParseException(TRUNCATION_ERROR_MESSAGE + term.image); } return getPrefixQuery(term.image.substring(0, term.image.length()-1), false /* not quoted */); } | term= { /* with at least one * or ? */ if (! allowedTruncation(term.image)) { throw new ParseException(TRUNCATION_ERROR_MESSAGE + term.image); } return getTruncQuery(term.image); } | term= { /* eg. "9b-b,m"* */ if ((term.image.length() - 3) < MINIMUM_PREFIX_LENGTH) { throw new ParseException(TRUNCATION_ERROR_MESSAGE + term.image); } return getPrefixQuery(term.image.substring(1, term.image.length()-2), true /* quoted */); } ) } void OptionalWeights(SrndQuery q) : { Token weight=null; }{ ( weight= { float f; try { f = Float.parseFloat(weight.image); } catch (Exception floatExc) { throw new ParseException(BOOST_ERROR_MESSAGE + weight.image + " (" + floatExc + ")"); } if (f <= 0.0) { throw new ParseException(BOOST_ERROR_MESSAGE + weight.image); } q.setWeight(f * q.getWeight()); /* left associative, fwiw */ } )* }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy