All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.search.QueryParsing Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.search;

import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.function.FunctionQuery;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

/**
 * Collection of static utilities useful for query parsing.
 *
 * @version $Id: QueryParsing.java 831175 2009-10-30 01:01:27Z markrmiller $
 */
public class QueryParsing {
  public static final String OP = "q.op";  // the SolrParam used to override the QueryParser "default operator"
  public static final String V = "v";      // value of this parameter
  public static final String F = "f";      // field that a query or command pertains to
  public static final String TYPE = "type";// type of this query or command
  public static final String DEFTYPE = "defType"; // default type for any direct subqueries
  public static final String LOCALPARAM_START = "{!";
  public static final char LOCALPARAM_END = '}';
  public static final String DOCID = "_docid_";

  /** 
   * Helper utility for parsing a query using the Lucene QueryParser syntax. 
   * @param qs query expression in standard Lucene syntax
   * @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
   */
  public static Query parseQuery(String qs, IndexSchema schema) {
    return parseQuery(qs, null, schema);
  }

  /** 
   * Helper utility for parsing a query using the Lucene QueryParser syntax. 
   * @param qs query expression in standard Lucene syntax
   * @param defaultField default field used for unqualified search terms in the query expression
   * @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
   */
  public static Query parseQuery(String qs, String defaultField, IndexSchema schema) {
    try {
      Query query = schema.getSolrQueryParser(defaultField).parse(qs);

      if (SolrCore.log.isTraceEnabled() ) {
        SolrCore.log.trace("After QueryParser:" + query);
      }

      return query;

    } catch (ParseException e) {
      SolrCore.log(e);
      throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Error parsing Lucene query",e);
    }
  }

  /**
   * Helper utility for parsing a query using the Lucene QueryParser syntax. 
   * @param qs query expression in standard Lucene syntax
   * @param defaultField default field used for unqualified search terms in the query expression
   * @param params used to determine the default operator, overriding the schema specified operator
   * @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
   */
  public static Query parseQuery(String qs, String defaultField, SolrParams params, IndexSchema schema) {
    try {
      SolrQueryParser parser = schema.getSolrQueryParser(defaultField);
      String opParam = params.get(OP);
      if (opParam != null) {
        parser.setDefaultOperator("AND".equals(opParam) ? QueryParser.Operator.AND : QueryParser.Operator.OR);
      }
      Query query = parser.parse(qs);

      if (SolrCore.log.isTraceEnabled() ) {
        SolrCore.log.trace("After QueryParser:" + query);
      }

      return query;

    } catch (ParseException e) {
      SolrCore.log(e);
      throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Query parsing error: " + e.getMessage(),e);
    }
  }


  // note to self: something needs to detect infinite recursion when parsing queries
  static int parseLocalParams(String txt, int start, Map target, SolrParams params) throws ParseException {
    int off=start;
    if (!txt.startsWith(LOCALPARAM_START,off)) return start;
    StrParser p = new StrParser(txt,start,txt.length());
    p.pos+=2; // skip over "{!"

    for(;;) {
      /*
      if (p.pos>=txt.length()) {
        throw new ParseException("Missing '}' parsing local params '" + txt + '"');
      }
      */
      char ch = p.peek();
      if (ch==LOCALPARAM_END) {
        return p.pos+1;
      }

      String id = p.getId();
      if (id.length()==0) {
        throw new ParseException("Expected identifier '}' parsing local params '" + txt + '"');

      }
      String val=null;

      ch = p.peek();
      if (ch!='=') {
        // single word... treat {!func} as type=func for easy lookup
        val = id;
        id = TYPE;
      } else {
        // saw equals, so read value
        p.pos++;
        ch = p.peek();
        if (ch=='\"' || ch=='\'') {
          val = p.getQuotedString();
        } else if (ch=='$') {
          p.pos++;
          // dereference parameter
          String pname = p.getId();
          if (params!=null) {
            val = params.get(pname);
          }
        } else {
          // read unquoted literal ended by whitespace or '}'
          // there is no escaping.
          int valStart = p.pos;
          for (;;) {
            if (p.pos >= p.end) {
              throw new ParseException("Missing end to unquoted value starting at " + valStart + " str='" + txt +"'");
            }
            char c = p.val.charAt(p.pos);
            if (c==LOCALPARAM_END || Character.isWhitespace(c)) {
              val = p.val.substring(valStart, p.pos);
              break;
            }
            p.pos++;
          }
        }
      }
      if (target != null) target.put(id,val);
    }
  }

  /**
   *  "foo" returns null
   *  "{!prefix f=myfield}yes" returns type="prefix",f="myfield",v="yes"
   *  "{!prefix f=myfield v=$p}" returns type="prefix",f="myfield",v=params.get("p")
   */
  public static SolrParams getLocalParams(String txt, SolrParams params) throws ParseException {
    if (txt==null || !txt.startsWith(LOCALPARAM_START)) {
      return null;      
    }
    Map localParams = new HashMap();
    int start = QueryParsing.parseLocalParams(txt, 0, localParams, params);

    String val;
    if (start >= txt.length()) {
      // if the rest of the string is empty, check for "v" to provide the value
      val = localParams.get(V);
      val = val==null ? "" : val;
    } else {
      val = txt.substring(start);
    }
    localParams.put(V,val);
    return new MapSolrParams(localParams);
  }


  private static Pattern sortSep = Pattern.compile(",");

  /**
   * Returns null if the sortSpec is the standard sort desc.
   *
   * 

* The form of the sort specification string currently parsed is: *

*
>
   * SortSpec ::= SingleSort [, SingleSort]*
   * SingleSort ::=  SortDirection
   * SortDirection ::= top | desc | bottom | asc
   * 
* Examples: *
   *   score desc               #normal sort by score (will return null)
   *   weight bottom            #sort by weight ascending 
   *   weight desc              #sort by weight descending
   *   height desc,weight desc  #sort by height descending, and use weight descending to break any ties
   *   height desc,weight asc   #sort by height descending, using weight ascending as a tiebreaker
   * 
* */ public static Sort parseSort(String sortSpec, IndexSchema schema) { if (sortSpec==null || sortSpec.length()==0) return null; String[] parts = sortSep.split(sortSpec.trim()); if (parts.length == 0) return null; SortField[] lst = new SortField[parts.length]; for( int i=0; i 0 ) { String order = part.substring( idx+1 ).trim(); if( "desc".equals( order ) || "top".equals(order) ) { top = true; } else if ("asc".equals(order) || "bottom".equals(order)) { top = false; } else { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown sort order: "+order); } part = part.substring( 0, idx ).trim(); } else { throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Missing sort order." ); } if( "score".equals(part) ) { if (top) { // If there is only one thing in the list, just do the regular thing... if( parts.length == 1 ) { return null; // do normal scoring... } lst[i] = SortField.FIELD_SCORE; } else { lst[i] = new SortField(null, SortField.SCORE, true); } } else if (DOCID.equals(part)) { lst[i] = new SortField(null, SortField.DOC, top); } else { // getField could throw an exception if the name isn't found SchemaField f = null; try{ f = schema.getField(part); } catch( SolrException e ){ throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "can not sort on undefined field: "+part, e ); } if (f == null || !f.indexed()){ throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "can not sort on unindexed field: "+part ); } lst[i] = f.getType().getSortField(f,top); } } return new Sort(lst); } /////////////////////////// /////////////////////////// /////////////////////////// static FieldType writeFieldName(String name, IndexSchema schema, Appendable out, int flags) throws IOException { FieldType ft = null; ft = schema.getFieldTypeNoEx(name); out.append(name); if (ft==null) { out.append("(UNKNOWN FIELD "+name+')'); } out.append(':'); return ft; } static void writeFieldVal(String val, FieldType ft, Appendable out, int flags) throws IOException { if (ft!=null) { try { out.append(ft.indexedToReadable(val)); } catch (Exception e) { out.append("EXCEPTION(val="); out.append(val); out.append(")"); } } else { out.append(val); } } /** @see #toString(Query,IndexSchema) */ public static void toString(Query query, IndexSchema schema, Appendable out, int flags) throws IOException { boolean writeBoost=true; if (query instanceof TermQuery) { TermQuery q = (TermQuery)query; Term t = q.getTerm(); FieldType ft = writeFieldName(t.field(), schema, out, flags); writeFieldVal(t.text(), ft, out, flags); } else if (query instanceof TermRangeQuery) { TermRangeQuery q = (TermRangeQuery)query; String fname = q.getField(); FieldType ft = writeFieldName(fname, schema, out, flags); out.append( q.includesLower() ? '[' : '{' ); String lt = q.getLowerTerm(); String ut = q.getUpperTerm(); if (lt==null) { out.append('*'); } else { writeFieldVal(lt, ft, out, flags); } out.append(" TO "); if (ut==null) { out.append('*'); } else { writeFieldVal(ut, ft, out, flags); } out.append( q.includesUpper() ? ']' : '}' ); } else if (query instanceof NumericRangeQuery) { NumericRangeQuery q = (NumericRangeQuery)query; String fname = q.getField(); FieldType ft = writeFieldName(fname, schema, out, flags); out.append( q.includesMin() ? '[' : '{' ); Number lt = q.getMin(); Number ut = q.getMax(); if (lt==null) { out.append('*'); } else { out.append(lt.toString()); } out.append(" TO "); if (ut==null) { out.append('*'); } else { out.append(ut.toString()); } out.append( q.includesMax() ? ']' : '}' ); } else if (query instanceof BooleanQuery) { BooleanQuery q = (BooleanQuery)query; boolean needParens=false; if (q.getBoost() != 1.0 || q.getMinimumNumberShouldMatch() != 0) { needParens=true; } if (needParens) { out.append('('); } boolean first=true; for (BooleanClause c : (List)q.clauses()) { if (!first) { out.append(' '); } else { first=false; } if (c.isProhibited()) { out.append('-'); } else if (c.isRequired()) { out.append('+'); } Query subQuery = c.getQuery(); boolean wrapQuery=false; // TODO: may need to put parens around other types // of queries too, depending on future syntax. if (subQuery instanceof BooleanQuery) { wrapQuery=true; } if (wrapQuery) { out.append('('); } toString(subQuery, schema, out, flags); if (wrapQuery) { out.append(')'); } } if (needParens) { out.append(')'); } if (q.getMinimumNumberShouldMatch()>0) { out.append('~'); out.append(Integer.toString(q.getMinimumNumberShouldMatch())); } } else if (query instanceof PrefixQuery) { PrefixQuery q = (PrefixQuery)query; Term prefix = q.getPrefix(); FieldType ft = writeFieldName(prefix.field(), schema, out, flags); out.append(prefix.text()); out.append('*'); } else if (query instanceof ConstantScorePrefixQuery) { ConstantScorePrefixQuery q = (ConstantScorePrefixQuery)query; Term prefix = q.getPrefix(); FieldType ft = writeFieldName(prefix.field(), schema, out, flags); out.append(prefix.text()); out.append('*'); } else if (query instanceof WildcardQuery) { out.append(query.toString()); writeBoost=false; } else if (query instanceof FuzzyQuery) { out.append(query.toString()); writeBoost=false; } else if (query instanceof ConstantScoreQuery) { out.append(query.toString()); writeBoost=false; } else { out.append(query.getClass().getSimpleName() + '(' + query.toString() + ')' ); writeBoost=false; } if (writeBoost && query.getBoost() != 1.0f) { out.append("^"); out.append(Float.toString(query.getBoost())); } } /** * Formats a Query for debugging, using the IndexSchema to make * complex field types readable. * *

* The benefit of using this method instead of calling * Query.toString directly is that it knows about the data * types of each field, so any field which is encoded in a particularly * complex way is still readable. The downside is that it only knows * about built in Query types, and will not be able to format custom * Query classes. *

*/ public static String toString(Query query, IndexSchema schema) { try { StringBuilder sb = new StringBuilder(); toString(query, schema, sb, 0); return sb.toString(); } catch (Exception e) { throw new RuntimeException(e); } } /** * Simple class to help with parsing a string * Note: This API is experimental and may change in non backward-compatible ways in the future */ public static class StrParser { String val; int pos; int end; public StrParser(String val) { this(val,0,val.length()); } public StrParser(String val, int start, int end) { this.val = val; this.pos = start; this.end = end; } void eatws() { while (pos='0' && ch<='9') || ch=='+' || ch=='-' || ch=='.' || ch=='e' || ch=='E' ) { pos++; arr[i]=ch; } else { break; } } return Float.parseFloat(new String(arr,0,i)); } String getId() throws ParseException { eatws(); int id_start=pos; if (pos=end) { throw new ParseException("Missing end quote for string at pos " + (val_start-1) + " str='"+val+"'"); } char ch = val.charAt(pos); if (ch=='\\') { ch = pos




© 2015 - 2024 Weber Informatics LLC | Privacy Policy