org.apache.solr.search.QueryParsing Maven / Gradle / Ivy
The newest version!
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.function.FunctionQuery;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;
/**
* Collection of static utilities useful for query parsing.
*
* @version $Id: QueryParsing.java 831175 2009-10-30 01:01:27Z markrmiller $
*/
public class QueryParsing {
public static final String OP = "q.op"; // the SolrParam used to override the QueryParser "default operator"
public static final String V = "v"; // value of this parameter
public static final String F = "f"; // field that a query or command pertains to
public static final String TYPE = "type";// type of this query or command
public static final String DEFTYPE = "defType"; // default type for any direct subqueries
public static final String LOCALPARAM_START = "{!";
public static final char LOCALPARAM_END = '}';
public static final String DOCID = "_docid_";
/**
* Helper utility for parsing a query using the Lucene QueryParser syntax.
* @param qs query expression in standard Lucene syntax
* @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
*/
public static Query parseQuery(String qs, IndexSchema schema) {
return parseQuery(qs, null, schema);
}
/**
* Helper utility for parsing a query using the Lucene QueryParser syntax.
* @param qs query expression in standard Lucene syntax
* @param defaultField default field used for unqualified search terms in the query expression
* @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
*/
public static Query parseQuery(String qs, String defaultField, IndexSchema schema) {
try {
Query query = schema.getSolrQueryParser(defaultField).parse(qs);
if (SolrCore.log.isTraceEnabled() ) {
SolrCore.log.trace("After QueryParser:" + query);
}
return query;
} catch (ParseException e) {
SolrCore.log(e);
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Error parsing Lucene query",e);
}
}
/**
* Helper utility for parsing a query using the Lucene QueryParser syntax.
* @param qs query expression in standard Lucene syntax
* @param defaultField default field used for unqualified search terms in the query expression
* @param params used to determine the default operator, overriding the schema specified operator
* @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
*/
public static Query parseQuery(String qs, String defaultField, SolrParams params, IndexSchema schema) {
try {
SolrQueryParser parser = schema.getSolrQueryParser(defaultField);
String opParam = params.get(OP);
if (opParam != null) {
parser.setDefaultOperator("AND".equals(opParam) ? QueryParser.Operator.AND : QueryParser.Operator.OR);
}
Query query = parser.parse(qs);
if (SolrCore.log.isTraceEnabled() ) {
SolrCore.log.trace("After QueryParser:" + query);
}
return query;
} catch (ParseException e) {
SolrCore.log(e);
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Query parsing error: " + e.getMessage(),e);
}
}
// note to self: something needs to detect infinite recursion when parsing queries
static int parseLocalParams(String txt, int start, Map target, SolrParams params) throws ParseException {
int off=start;
if (!txt.startsWith(LOCALPARAM_START,off)) return start;
StrParser p = new StrParser(txt,start,txt.length());
p.pos+=2; // skip over "{!"
for(;;) {
/*
if (p.pos>=txt.length()) {
throw new ParseException("Missing '}' parsing local params '" + txt + '"');
}
*/
char ch = p.peek();
if (ch==LOCALPARAM_END) {
return p.pos+1;
}
String id = p.getId();
if (id.length()==0) {
throw new ParseException("Expected identifier '}' parsing local params '" + txt + '"');
}
String val=null;
ch = p.peek();
if (ch!='=') {
// single word... treat {!func} as type=func for easy lookup
val = id;
id = TYPE;
} else {
// saw equals, so read value
p.pos++;
ch = p.peek();
if (ch=='\"' || ch=='\'') {
val = p.getQuotedString();
} else if (ch=='$') {
p.pos++;
// dereference parameter
String pname = p.getId();
if (params!=null) {
val = params.get(pname);
}
} else {
// read unquoted literal ended by whitespace or '}'
// there is no escaping.
int valStart = p.pos;
for (;;) {
if (p.pos >= p.end) {
throw new ParseException("Missing end to unquoted value starting at " + valStart + " str='" + txt +"'");
}
char c = p.val.charAt(p.pos);
if (c==LOCALPARAM_END || Character.isWhitespace(c)) {
val = p.val.substring(valStart, p.pos);
break;
}
p.pos++;
}
}
}
if (target != null) target.put(id,val);
}
}
/**
* "foo" returns null
* "{!prefix f=myfield}yes" returns type="prefix",f="myfield",v="yes"
* "{!prefix f=myfield v=$p}" returns type="prefix",f="myfield",v=params.get("p")
*/
public static SolrParams getLocalParams(String txt, SolrParams params) throws ParseException {
if (txt==null || !txt.startsWith(LOCALPARAM_START)) {
return null;
}
Map localParams = new HashMap();
int start = QueryParsing.parseLocalParams(txt, 0, localParams, params);
String val;
if (start >= txt.length()) {
// if the rest of the string is empty, check for "v" to provide the value
val = localParams.get(V);
val = val==null ? "" : val;
} else {
val = txt.substring(start);
}
localParams.put(V,val);
return new MapSolrParams(localParams);
}
private static Pattern sortSep = Pattern.compile(",");
/**
* Returns null if the sortSpec is the standard sort desc.
*
*
* The form of the sort specification string currently parsed is:
*
* >
* SortSpec ::= SingleSort [, SingleSort]*
* SingleSort ::= SortDirection
* SortDirection ::= top | desc | bottom | asc
*
* Examples:
*
* score desc #normal sort by score (will return null)
* weight bottom #sort by weight ascending
* weight desc #sort by weight descending
* height desc,weight desc #sort by height descending, and use weight descending to break any ties
* height desc,weight asc #sort by height descending, using weight ascending as a tiebreaker
*
*
*/
public static Sort parseSort(String sortSpec, IndexSchema schema) {
if (sortSpec==null || sortSpec.length()==0) return null;
String[] parts = sortSep.split(sortSpec.trim());
if (parts.length == 0) return null;
SortField[] lst = new SortField[parts.length];
for( int i=0; i 0 ) {
String order = part.substring( idx+1 ).trim();
if( "desc".equals( order ) || "top".equals(order) ) {
top = true;
}
else if ("asc".equals(order) || "bottom".equals(order)) {
top = false;
}
else {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown sort order: "+order);
}
part = part.substring( 0, idx ).trim();
}
else {
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Missing sort order." );
}
if( "score".equals(part) ) {
if (top) {
// If there is only one thing in the list, just do the regular thing...
if( parts.length == 1 ) {
return null; // do normal scoring...
}
lst[i] = SortField.FIELD_SCORE;
}
else {
lst[i] = new SortField(null, SortField.SCORE, true);
}
} else if (DOCID.equals(part)) {
lst[i] = new SortField(null, SortField.DOC, top);
}
else {
// getField could throw an exception if the name isn't found
SchemaField f = null;
try{
f = schema.getField(part);
}
catch( SolrException e ){
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "can not sort on undefined field: "+part, e );
}
if (f == null || !f.indexed()){
throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "can not sort on unindexed field: "+part );
}
lst[i] = f.getType().getSortField(f,top);
}
}
return new Sort(lst);
}
///////////////////////////
///////////////////////////
///////////////////////////
static FieldType writeFieldName(String name, IndexSchema schema, Appendable out, int flags) throws IOException {
FieldType ft = null;
ft = schema.getFieldTypeNoEx(name);
out.append(name);
if (ft==null) {
out.append("(UNKNOWN FIELD "+name+')');
}
out.append(':');
return ft;
}
static void writeFieldVal(String val, FieldType ft, Appendable out, int flags) throws IOException {
if (ft!=null) {
try {
out.append(ft.indexedToReadable(val));
} catch (Exception e) {
out.append("EXCEPTION(val=");
out.append(val);
out.append(")");
}
} else {
out.append(val);
}
}
/** @see #toString(Query,IndexSchema) */
public static void toString(Query query, IndexSchema schema, Appendable out, int flags) throws IOException {
boolean writeBoost=true;
if (query instanceof TermQuery) {
TermQuery q = (TermQuery)query;
Term t = q.getTerm();
FieldType ft = writeFieldName(t.field(), schema, out, flags);
writeFieldVal(t.text(), ft, out, flags);
} else if (query instanceof TermRangeQuery) {
TermRangeQuery q = (TermRangeQuery)query;
String fname = q.getField();
FieldType ft = writeFieldName(fname, schema, out, flags);
out.append( q.includesLower() ? '[' : '{' );
String lt = q.getLowerTerm();
String ut = q.getUpperTerm();
if (lt==null) {
out.append('*');
} else {
writeFieldVal(lt, ft, out, flags);
}
out.append(" TO ");
if (ut==null) {
out.append('*');
} else {
writeFieldVal(ut, ft, out, flags);
}
out.append( q.includesUpper() ? ']' : '}' );
} else if (query instanceof NumericRangeQuery) {
NumericRangeQuery q = (NumericRangeQuery)query;
String fname = q.getField();
FieldType ft = writeFieldName(fname, schema, out, flags);
out.append( q.includesMin() ? '[' : '{' );
Number lt = q.getMin();
Number ut = q.getMax();
if (lt==null) {
out.append('*');
} else {
out.append(lt.toString());
}
out.append(" TO ");
if (ut==null) {
out.append('*');
} else {
out.append(ut.toString());
}
out.append( q.includesMax() ? ']' : '}' );
} else if (query instanceof BooleanQuery) {
BooleanQuery q = (BooleanQuery)query;
boolean needParens=false;
if (q.getBoost() != 1.0 || q.getMinimumNumberShouldMatch() != 0) {
needParens=true;
}
if (needParens) {
out.append('(');
}
boolean first=true;
for (BooleanClause c : (List)q.clauses()) {
if (!first) {
out.append(' ');
} else {
first=false;
}
if (c.isProhibited()) {
out.append('-');
} else if (c.isRequired()) {
out.append('+');
}
Query subQuery = c.getQuery();
boolean wrapQuery=false;
// TODO: may need to put parens around other types
// of queries too, depending on future syntax.
if (subQuery instanceof BooleanQuery) {
wrapQuery=true;
}
if (wrapQuery) {
out.append('(');
}
toString(subQuery, schema, out, flags);
if (wrapQuery) {
out.append(')');
}
}
if (needParens) {
out.append(')');
}
if (q.getMinimumNumberShouldMatch()>0) {
out.append('~');
out.append(Integer.toString(q.getMinimumNumberShouldMatch()));
}
} else if (query instanceof PrefixQuery) {
PrefixQuery q = (PrefixQuery)query;
Term prefix = q.getPrefix();
FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
out.append(prefix.text());
out.append('*');
} else if (query instanceof ConstantScorePrefixQuery) {
ConstantScorePrefixQuery q = (ConstantScorePrefixQuery)query;
Term prefix = q.getPrefix();
FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
out.append(prefix.text());
out.append('*');
} else if (query instanceof WildcardQuery) {
out.append(query.toString());
writeBoost=false;
} else if (query instanceof FuzzyQuery) {
out.append(query.toString());
writeBoost=false;
} else if (query instanceof ConstantScoreQuery) {
out.append(query.toString());
writeBoost=false;
} else {
out.append(query.getClass().getSimpleName()
+ '(' + query.toString() + ')' );
writeBoost=false;
}
if (writeBoost && query.getBoost() != 1.0f) {
out.append("^");
out.append(Float.toString(query.getBoost()));
}
}
/**
* Formats a Query for debugging, using the IndexSchema to make
* complex field types readable.
*
*
* The benefit of using this method instead of calling
* Query.toString
directly is that it knows about the data
* types of each field, so any field which is encoded in a particularly
* complex way is still readable. The downside is that it only knows
* about built in Query types, and will not be able to format custom
* Query classes.
*
*/
public static String toString(Query query, IndexSchema schema) {
try {
StringBuilder sb = new StringBuilder();
toString(query, schema, sb, 0);
return sb.toString();
} catch (Exception e) {
throw new RuntimeException(e);
}
}
/**
* Simple class to help with parsing a string
* Note: This API is experimental and may change in non backward-compatible ways in the future
*/
public static class StrParser {
String val;
int pos;
int end;
public StrParser(String val) {
this(val,0,val.length());
}
public StrParser(String val, int start, int end) {
this.val = val;
this.pos = start;
this.end = end;
}
void eatws() {
while (pos='0' && ch<='9')
|| ch=='+' || ch=='-'
|| ch=='.' || ch=='e' || ch=='E'
) {
pos++;
arr[i]=ch;
} else {
break;
}
}
return Float.parseFloat(new String(arr,0,i));
}
String getId() throws ParseException {
eatws();
int id_start=pos;
if (pos=end) {
throw new ParseException("Missing end quote for string at pos " + (val_start-1) + " str='"+val+"'");
}
char ch = val.charAt(pos);
if (ch=='\\') {
ch = pos