org.apache.solr.search.QueryParsing Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr Server
The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.search;

import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.*;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.params.MapSolrParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.core.SolrCore;
import org.apache.solr.request.LocalSolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.function.FunctionQuery;

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Pattern;

/**
 * Collection of static utilities useful for query parsing.
 *
 * @version $Id: QueryParsing.java 831175 2009-10-30 01:01:27Z markrmiller $
 */
public class QueryParsing {
  public static final String OP = "q.op";  // the SolrParam used to override the QueryParser "default operator"
  public static final String V = "v";      // value of this parameter
  public static final String F = "f";      // field that a query or command pertains to
  public static final String TYPE = "type";// type of this query or command
  public static final String DEFTYPE = "defType"; // default type for any direct subqueries
  public static final String LOCALPARAM_START = "{!";
  public static final char LOCALPARAM_END = '}';
  public static final String DOCID = "_docid_";

  /** 
   * Helper utility for parsing a query using the Lucene QueryParser syntax. 
   * @param qs query expression in standard Lucene syntax
   * @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
   */
  public static Query parseQuery(String qs, IndexSchema schema) {
    return parseQuery(qs, null, schema);
  }

  /** 
   * Helper utility for parsing a query using the Lucene QueryParser syntax. 
   * @param qs query expression in standard Lucene syntax
   * @param defaultField default field used for unqualified search terms in the query expression
   * @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
   */
  public static Query parseQuery(String qs, String defaultField, IndexSchema schema) {
    try {
      Query query = schema.getSolrQueryParser(defaultField).parse(qs);

      if (SolrCore.log.isTraceEnabled() ) {
        SolrCore.log.trace("After QueryParser:" + query);
      }

      return query;

    } catch (ParseException e) {
      SolrCore.log(e);
      throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Error parsing Lucene query",e);
    }
  }

  /**
   * Helper utility for parsing a query using the Lucene QueryParser syntax. 
   * @param qs query expression in standard Lucene syntax
   * @param defaultField default field used for unqualified search terms in the query expression
   * @param params used to determine the default operator, overriding the schema specified operator
   * @param schema used for default operator (overridden by params) and passed to the query parser for field format analysis information
   */
  public static Query parseQuery(String qs, String defaultField, SolrParams params, IndexSchema schema) {
    try {
      SolrQueryParser parser = schema.getSolrQueryParser(defaultField);
      String opParam = params.get(OP);
      if (opParam != null) {
        parser.setDefaultOperator("AND".equals(opParam) ? QueryParser.Operator.AND : QueryParser.Operator.OR);
      }
      Query query = parser.parse(qs);

      if (SolrCore.log.isTraceEnabled() ) {
        SolrCore.log.trace("After QueryParser:" + query);
      }

      return query;

    } catch (ParseException e) {
      SolrCore.log(e);
      throw new SolrException( SolrException.ErrorCode.BAD_REQUEST,"Query parsing error: " + e.getMessage(),e);
    }
  }


  // note to self: something needs to detect infinite recursion when parsing queries
  static int parseLocalParams(String txt, int start, Map target, SolrParams params) throws ParseException {
    int off=start;
    if (!txt.startsWith(LOCALPARAM_START,off)) return start;
    StrParser p = new StrParser(txt,start,txt.length());
    p.pos+=2; // skip over "{!"

    for(;;) {
      /*
      if (p.pos>=txt.length()) {
        throw new ParseException("Missing '}' parsing local params '" + txt + '"');
      }
      */
      char ch = p.peek();
      if (ch==LOCALPARAM_END) {
        return p.pos+1;
      }

      String id = p.getId();
      if (id.length()==0) {
        throw new ParseException("Expected identifier '}' parsing local params '" + txt + '"');

      }
      String val=null;

      ch = p.peek();
      if (ch!='=') {
        // single word... treat {!func} as type=func for easy lookup
        val = id;
        id = TYPE;
      } else {
        // saw equals, so read value
        p.pos++;
        ch = p.peek();
        if (ch=='\"' || ch=='\'') {
          val = p.getQuotedString();
        } else if (ch=='$') {
          p.pos++;
          // dereference parameter
          String pname = p.getId();
          if (params!=null) {
            val = params.get(pname);
          }
        } else {
          // read unquoted literal ended by whitespace or '}'
          // there is no escaping.
          int valStart = p.pos;
          for (;;) {
            if (p.pos >= p.end) {
              throw new ParseException("Missing end to unquoted value starting at " + valStart + " str='" + txt +"'");
            }
            char c = p.val.charAt(p.pos);
            if (c==LOCALPARAM_END || Character.isWhitespace(c)) {
              val = p.val.substring(valStart, p.pos);
              break;
            }
            p.pos++;
          }
        }
      }
      if (target != null) target.put(id,val);
    }
  }

  /**
   *  "foo" returns null
   *  "{!prefix f=myfield}yes" returns type="prefix",f="myfield",v="yes"
   *  "{!prefix f=myfield v=$p}" returns type="prefix",f="myfield",v=params.get("p")
   */
  public static SolrParams getLocalParams(String txt, SolrParams params) throws ParseException {
    if (txt==null || !txt.startsWith(LOCALPARAM_START)) {
      return null;      
    }
    Map localParams = new HashMap();
    int start = QueryParsing.parseLocalParams(txt, 0, localParams, params);

    String val;
    if (start >= txt.length()) {
      // if the rest of the string is empty, check for "v" to provide the value
      val = localParams.get(V);
      val = val==null ? "" : val;
    } else {
      val = txt.substring(start);
    }
    localParams.put(V,val);
    return new MapSolrParams(localParams);
  }


  private static Pattern sortSep = Pattern.compile(",");

  /**
   * Returns null if the sortSpec is the standard sort desc.
   *
   * 
   * The form of the sort specification string currently parsed is:
   * 
   * >
   * SortSpec ::= SingleSort [, SingleSort]*
   * SingleSort ::=  SortDirection
   * SortDirection ::= top | desc | bottom | asc
   * 
   * Examples:
   *    *   score desc               #normal sort by score (will return null)
   *   weight bottom            #sort by weight ascending 
   *   weight desc              #sort by weight descending
   *   height desc,weight desc  #sort by height descending, and use weight descending to break any ties
   *   height desc,weight asc   #sort by height descending, using weight ascending as a tiebreaker
   * 
   *
   */
  public static Sort parseSort(String sortSpec, IndexSchema schema) {
    if (sortSpec==null || sortSpec.length()==0) return null;

    String[] parts = sortSep.split(sortSpec.trim());
    if (parts.length == 0) return null;

    SortField[] lst = new SortField[parts.length];
    for( int i=0; i 0 ) {
        String order = part.substring( idx+1 ).trim();
      	if( "desc".equals( order ) || "top".equals(order) ) {
      	  top = true;
      	}
      	else if ("asc".equals(order) || "bottom".equals(order)) {
      	  top = false;
      	}
      	else {
      	  throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Unknown sort order: "+order);
      	}
      	part = part.substring( 0, idx ).trim();
      }
      else {
        throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "Missing sort order." );
      }
    	
      if( "score".equals(part) ) {
        if (top) {
          // If there is only one thing in the list, just do the regular thing...
          if( parts.length == 1 ) {
            return null; // do normal scoring...
          }
          lst[i] = SortField.FIELD_SCORE;
        }
        else {
          lst[i] = new SortField(null, SortField.SCORE, true);
        }
      } else if (DOCID.equals(part)) {
        lst[i] = new SortField(null, SortField.DOC, top);
      }
      else {
        // getField could throw an exception if the name isn't found
        SchemaField f = null;
        try{
          f = schema.getField(part);
        }
        catch( SolrException e ){
          throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "can not sort on undefined field: "+part, e );
        }
        if (f == null || !f.indexed()){
          throw new SolrException( SolrException.ErrorCode.BAD_REQUEST, "can not sort on unindexed field: "+part );
        }
        lst[i] = f.getType().getSortField(f,top);
      }
    }
    return new Sort(lst);
  }


  ///////////////////////////
  ///////////////////////////
  ///////////////////////////

  static FieldType writeFieldName(String name, IndexSchema schema, Appendable out, int flags) throws IOException {
    FieldType ft = null;
    ft = schema.getFieldTypeNoEx(name);
    out.append(name);
    if (ft==null) {
      out.append("(UNKNOWN FIELD "+name+')');
    }
    out.append(':');
    return ft;
  }

  static void writeFieldVal(String val, FieldType ft, Appendable out, int flags) throws IOException {
    if (ft!=null) {
      try {
        out.append(ft.indexedToReadable(val));
      } catch (Exception e) {
        out.append("EXCEPTION(val=");
        out.append(val);
        out.append(")");
      }
    } else {
      out.append(val);
    }
  }
  /** @see #toString(Query,IndexSchema) */
  public static void toString(Query query, IndexSchema schema, Appendable out, int flags) throws IOException {
    boolean writeBoost=true;

    if (query instanceof TermQuery) {
      TermQuery q = (TermQuery)query;
      Term t = q.getTerm();
      FieldType ft = writeFieldName(t.field(), schema, out, flags);
      writeFieldVal(t.text(), ft, out, flags);
    } else if (query instanceof TermRangeQuery) {
      TermRangeQuery q = (TermRangeQuery)query;
      String fname = q.getField();
      FieldType ft = writeFieldName(fname, schema, out, flags);
      out.append( q.includesLower() ? '[' : '{' );
      String lt = q.getLowerTerm();
      String ut = q.getUpperTerm();
      if (lt==null) {
        out.append('*');
      } else {
        writeFieldVal(lt, ft, out, flags);
      }

      out.append(" TO ");

      if (ut==null) {
        out.append('*');
      } else {
        writeFieldVal(ut, ft, out, flags);
      }

      out.append( q.includesUpper() ? ']' : '}' );
    } else if (query instanceof NumericRangeQuery) {
      NumericRangeQuery q = (NumericRangeQuery)query;
      String fname = q.getField();
      FieldType ft = writeFieldName(fname, schema, out, flags);
      out.append( q.includesMin() ? '[' : '{' );
      Number lt = q.getMin();
      Number ut = q.getMax();
      if (lt==null) {
        out.append('*');
      } else {
        out.append(lt.toString());
      }

      out.append(" TO ");

      if (ut==null) {
        out.append('*');
      } else {
        out.append(ut.toString());
      }

      out.append( q.includesMax() ? ']' : '}' );
    } else if (query instanceof BooleanQuery) {
      BooleanQuery q = (BooleanQuery)query;
      boolean needParens=false;

      if (q.getBoost() != 1.0 || q.getMinimumNumberShouldMatch() != 0) {
        needParens=true;
      }
      if (needParens) {
        out.append('(');
      }
      boolean first=true;
      for (BooleanClause c : (List)q.clauses()) {
        if (!first) {
          out.append(' ');
        } else {
          first=false;
        }

        if (c.isProhibited()) {
          out.append('-');
        } else if (c.isRequired()) {
          out.append('+');
        }
        Query subQuery = c.getQuery();
        boolean wrapQuery=false;

        // TODO: may need to put parens around other types
        // of queries too, depending on future syntax.
        if (subQuery instanceof BooleanQuery) {
          wrapQuery=true;
        }

        if (wrapQuery) {
          out.append('(');
        }

        toString(subQuery, schema, out, flags);

        if (wrapQuery) {
          out.append(')');
        }
      }

      if (needParens) {
        out.append(')');
      }
      if (q.getMinimumNumberShouldMatch()>0) {
        out.append('~');
        out.append(Integer.toString(q.getMinimumNumberShouldMatch()));
      }

    } else if (query instanceof PrefixQuery) {
      PrefixQuery q = (PrefixQuery)query;
      Term prefix = q.getPrefix();
      FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
      out.append(prefix.text());
      out.append('*');
    } else if (query instanceof ConstantScorePrefixQuery) {
      ConstantScorePrefixQuery q = (ConstantScorePrefixQuery)query;
      Term prefix = q.getPrefix();
      FieldType ft = writeFieldName(prefix.field(), schema, out, flags);
      out.append(prefix.text());
      out.append('*');
    } else if (query instanceof WildcardQuery) {
      out.append(query.toString());
      writeBoost=false;
    } else if (query instanceof FuzzyQuery) {
      out.append(query.toString());
      writeBoost=false;      
    } else if (query instanceof ConstantScoreQuery) {
      out.append(query.toString());
      writeBoost=false;
    } else {
      out.append(query.getClass().getSimpleName()
              + '(' + query.toString() + ')' );
      writeBoost=false;
    }

    if (writeBoost && query.getBoost() != 1.0f) {
      out.append("^");
      out.append(Float.toString(query.getBoost()));
    }

  }
  
  /**
   * Formats a Query for debugging, using the IndexSchema to make 
   * complex field types readable.
   *
   * 
   * The benefit of using this method instead of calling 
   * Query.toString directly is that it knows about the data
   * types of each field, so any field which is encoded in a particularly 
   * complex way is still readable. The downside is that it only knows 
   * about built in Query types, and will not be able to format custom 
   * Query classes.
   * 
   */
  public static String toString(Query query, IndexSchema schema) {
    try {
      StringBuilder sb = new StringBuilder();
      toString(query, schema, sb, 0);
      return sb.toString();
    } catch (Exception e) {
      throw new RuntimeException(e);
    }
  }

  /**
   * Simple class to help with parsing a string
   * Note: This API is experimental and may change in non backward-compatible ways in the future
   */
  public static class StrParser {
    String val;
    int pos;
    int end;

    public StrParser(String val) {
      this(val,0,val.length());
    }

    public StrParser(String val, int start, int end) {
      this.val = val;
      this.pos = start;
      this.end = end;
    }

    void eatws() {
      while (pos='0' && ch<='9')
             || ch=='+' || ch=='-'
             || ch=='.' || ch=='e' || ch=='E'
        ) {
          pos++;
          arr[i]=ch;
        } else {
          break;
        }
      }

      return Float.parseFloat(new String(arr,0,i));
    }

    String getId() throws ParseException {
      eatws();
      int id_start=pos;
      if (pos=end) {
          throw new ParseException("Missing end quote for string at pos " + (val_start-1) + " str='"+val+"'");
        }
        char ch = val.charAt(pos);
        if (ch=='\\') {
          ch = pos