All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.queryparser.classic.QueryParserBase Maven / Gradle / Ivy

There is a newer version: 10.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.queryparser.classic;

import java.io.StringReader;
import java.text.DateFormat;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.DateTools;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.QueryParser.Operator;
import org.apache.lucene.queryparser.flexible.standard.CommonQueryParserConfiguration;
import org.apache.lucene.search.*;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery.TooManyClauses;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.QueryBuilder;
import org.apache.lucene.util.automaton.RegExp;

import static org.apache.lucene.util.automaton.Operations.DEFAULT_DETERMINIZE_WORK_LIMIT;

/** This class is overridden by QueryParser in QueryParser.jj
 * and acts to separate the majority of the Java code from the .jj grammar file. 
 */
public abstract class QueryParserBase extends QueryBuilder implements CommonQueryParserConfiguration {

  static final int CONJ_NONE   = 0;
  static final int CONJ_AND    = 1;
  static final int CONJ_OR     = 2;

  static final int MOD_NONE    = 0;
  static final int MOD_NOT     = 10;
  static final int MOD_REQ     = 11;

  // make it possible to call setDefaultOperator() without accessing
  // the nested class:
  /** Alternative form of QueryParser.Operator.AND */
  public static final Operator AND_OPERATOR = Operator.AND;
  /** Alternative form of QueryParser.Operator.OR */
  public static final Operator OR_OPERATOR = Operator.OR;

  /** The actual operator that parser uses to combine query terms */
  Operator operator = OR_OPERATOR;

  MultiTermQuery.RewriteMethod multiTermRewriteMethod = MultiTermQuery.CONSTANT_SCORE_REWRITE;
  boolean allowLeadingWildcard = false;

  protected String field;
  int phraseSlop = 0;
  float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
  int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
  Locale locale = Locale.getDefault();
  TimeZone timeZone = TimeZone.getDefault();

  // the default date resolution
  DateTools.Resolution dateResolution = null;
  // maps field names to date resolutions
  Map fieldToDateResolution = null;

  boolean autoGeneratePhraseQueries;
  int determinizeWorkLimit = DEFAULT_DETERMINIZE_WORK_LIMIT;

  // So the generated QueryParser(CharStream) won't error out
  protected QueryParserBase() {
    super(null);
  }

  /** Initializes a query parser.  Called by the QueryParser constructor
   *  @param f  the default field for query terms.
   *  @param a   used to find terms in the query text.
   */
  public void init(String f, Analyzer a) {
    setAnalyzer(a);
    field = f;
    setAutoGeneratePhraseQueries(false);
  }

  // the generated parser will create these in QueryParser
  public abstract void ReInit(CharStream stream);
  public abstract Query TopLevelQuery(String field) throws ParseException;


  /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
   *  @param query  the query string to be parsed.
   *  @throws ParseException if the parsing fails
   */
  public Query parse(String query) throws ParseException {
    ReInit(new FastCharStream(new StringReader(query)));
    try {
      // TopLevelQuery is a Query followed by the end-of-input (EOF)
      Query res = TopLevelQuery(field);
      return res!=null ? res : newBooleanQuery().build();
    }
    catch (ParseException | TokenMgrError tme) {
      // rethrow to include the original query:
      ParseException e = new ParseException("Cannot parse '" +query+ "': " + tme.getMessage());
      e.initCause(tme);
      throw e;
    } catch (BooleanQuery.TooManyClauses tmc) {
      ParseException e = new ParseException("Cannot parse '" +query+ "': too many boolean clauses");
      e.initCause(tmc);
      throw e;
    }
  }

  /**
   * @return Returns the default field.
   */
  public String getField() {
    return field;
  }

  /**
   * @see #setAutoGeneratePhraseQueries(boolean)
   */
  public final boolean getAutoGeneratePhraseQueries() {
    return autoGeneratePhraseQueries;
  }

  /**
   * Set to true if phrase queries will be automatically generated
   * when the analyzer returns more than one term from whitespace
   * delimited text.
   * NOTE: this behavior may not be suitable for all languages.
   * 

* Set to false if phrase queries should only be generated when * surrounded by double quotes. */ public void setAutoGeneratePhraseQueries(boolean value) { this.autoGeneratePhraseQueries = value; } /** * Get the minimal similarity for fuzzy queries. */ @Override public float getFuzzyMinSim() { return fuzzyMinSim; } /** * Set the minimum similarity for fuzzy queries. * Default is 2f. */ @Override public void setFuzzyMinSim(float fuzzyMinSim) { this.fuzzyMinSim = fuzzyMinSim; } /** * Get the prefix length for fuzzy queries. * @return Returns the fuzzyPrefixLength. */ @Override public int getFuzzyPrefixLength() { return fuzzyPrefixLength; } /** * Set the prefix length for fuzzy queries. Default is 0. * @param fuzzyPrefixLength The fuzzyPrefixLength to set. */ @Override public void setFuzzyPrefixLength(int fuzzyPrefixLength) { this.fuzzyPrefixLength = fuzzyPrefixLength; } /** * Sets the default slop for phrases. If zero, then exact phrase matches * are required. Default value is zero. */ @Override public void setPhraseSlop(int phraseSlop) { this.phraseSlop = phraseSlop; } /** * Gets the default slop for phrases. */ @Override public int getPhraseSlop() { return phraseSlop; } /** * Set to true to allow leading wildcard characters. *

* When set, * or ? are allowed as * the first character of a PrefixQuery and WildcardQuery. * Note that this can produce very slow * queries on big indexes. *

* Default: false. */ @Override public void setAllowLeadingWildcard(boolean allowLeadingWildcard) { this.allowLeadingWildcard = allowLeadingWildcard; } /** * @see #setAllowLeadingWildcard(boolean) */ @Override public boolean getAllowLeadingWildcard() { return allowLeadingWildcard; } /** * Sets the boolean operator of the QueryParser. * In default mode (OR_OPERATOR) terms without any modifiers * are considered optional: for example capital of Hungary is equal to * capital OR of OR Hungary.
* In AND_OPERATOR mode terms are considered to be in conjunction: the * above mentioned query is parsed as capital AND of AND Hungary */ public void setDefaultOperator(Operator op) { this.operator = op; } /** * Gets implicit operator setting, which will be either AND_OPERATOR * or OR_OPERATOR. */ public Operator getDefaultOperator() { return operator; } /** * By default QueryParser uses {@link org.apache.lucene.search.MultiTermQuery#CONSTANT_SCORE_REWRITE} * when creating a {@link PrefixQuery}, {@link WildcardQuery} or {@link TermRangeQuery}. This implementation is generally preferable because it * a) Runs faster b) Does not have the scarcity of terms unduly influence score * c) avoids any {@link TooManyClauses} exception. * However, if your application really needs to use the * old-fashioned {@link BooleanQuery} expansion rewriting and the above * points are not relevant then use this to change * the rewrite method. */ @Override public void setMultiTermRewriteMethod(MultiTermQuery.RewriteMethod method) { multiTermRewriteMethod = method; } /** * @see #setMultiTermRewriteMethod */ @Override public MultiTermQuery.RewriteMethod getMultiTermRewriteMethod() { return multiTermRewriteMethod; } /** * Set locale used by date range parsing, lowercasing, and other * locale-sensitive operations. */ @Override public void setLocale(Locale locale) { this.locale = locale; } /** * Returns current locale, allowing access by subclasses. */ @Override public Locale getLocale() { return locale; } @Override public void setTimeZone(TimeZone timeZone) { this.timeZone = timeZone; } @Override public TimeZone getTimeZone() { return timeZone; } /** * Sets the default date resolution used by RangeQueries for fields for which no * specific date resolutions has been set. Field specific resolutions can be set * with {@link #setDateResolution(String, org.apache.lucene.document.DateTools.Resolution)}. * * @param dateResolution the default date resolution to set */ @Override public void setDateResolution(DateTools.Resolution dateResolution) { this.dateResolution = dateResolution; } /** * Sets the date resolution used by RangeQueries for a specific field. * * @param fieldName field for which the date resolution is to be set * @param dateResolution date resolution to set */ public void setDateResolution(String fieldName, DateTools.Resolution dateResolution) { if (fieldName == null) { throw new IllegalArgumentException("Field must not be null."); } if (fieldToDateResolution == null) { // lazily initialize HashMap fieldToDateResolution = new HashMap<>(); } fieldToDateResolution.put(fieldName, dateResolution); } /** * Returns the date resolution that is used by RangeQueries for the given field. * Returns null, if no default or field specific date resolution has been set * for the given field. * */ public DateTools.Resolution getDateResolution(String fieldName) { if (fieldName == null) { throw new IllegalArgumentException("Field must not be null."); } if (fieldToDateResolution == null) { // no field specific date resolutions set; return default date resolution instead return this.dateResolution; } DateTools.Resolution resolution = fieldToDateResolution.get(fieldName); if (resolution == null) { // no date resolutions set for the given field; return default date resolution instead resolution = this.dateResolution; } return resolution; } /** * @param determinizeWorkLimit the maximum effort that determinizing a regexp query can spend. If * the query requires more effort, a TooComplexToDeterminizeException is thrown. */ public void setDeterminizeWorkLimit(int determinizeWorkLimit) { this.determinizeWorkLimit = determinizeWorkLimit; } /** * @return the maximum effort that determinizing a regexp query can spend. If the query requires * more effort, a TooComplexToDeterminizeException is thrown. */ public int getDeterminizeWorkLimit() { return determinizeWorkLimit; } protected void addClause(List clauses, int conj, int mods, Query q) { boolean required, prohibited; // If this term is introduced by AND, make the preceding term required, // unless it's already prohibited if (clauses.size() > 0 && conj == CONJ_AND) { BooleanClause c = clauses.get(clauses.size()-1); if (!c.isProhibited()) clauses.set(clauses.size() - 1, new BooleanClause(c.getQuery(), Occur.MUST)); } if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) { // If this term is introduced by OR, make the preceding term optional, // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b) // notice if the input is a OR b, first term is parsed as required; without // this modification a OR b would parsed as +a OR b BooleanClause c = clauses.get(clauses.size()-1); if (!c.isProhibited()) clauses.set(clauses.size() - 1, new BooleanClause(c.getQuery(), Occur.SHOULD)); } // We might have been passed a null query; the term might have been // filtered away by the analyzer. if (q == null) return; if (operator == OR_OPERATOR) { // We set REQUIRED if we're introduced by AND or +; PROHIBITED if // introduced by NOT or -; make sure not to set both. prohibited = (mods == MOD_NOT); required = (mods == MOD_REQ); if (conj == CONJ_AND && !prohibited) { required = true; } } else { // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED // if not PROHIBITED and not introduced by OR prohibited = (mods == MOD_NOT); required = (!prohibited && conj != CONJ_OR); } if (required && !prohibited) clauses.add(newBooleanClause(q, BooleanClause.Occur.MUST)); else if (!required && !prohibited) clauses.add(newBooleanClause(q, BooleanClause.Occur.SHOULD)); else if (!required && prohibited) clauses.add(newBooleanClause(q, BooleanClause.Occur.MUST_NOT)); else throw new RuntimeException("Clause cannot be both required and prohibited"); } /** * Adds clauses generated from analysis over text containing whitespace. * There are no operators, so the query's clauses can either be MUST (if the * default operator is AND) or SHOULD (default OR). * * If all of the clauses in the given Query are TermQuery-s, this method flattens the result * by adding the TermQuery-s individually to the output clause list; otherwise, the given Query * is added as a single clause including its nested clauses. */ protected void addMultiTermClauses(List clauses, Query q) { // We might have been passed a null query; the term might have been // filtered away by the analyzer. if (q == null) { return; } boolean allNestedTermQueries = false; if (q instanceof BooleanQuery) { allNestedTermQueries = true; for (BooleanClause clause : ((BooleanQuery)q).clauses()) { if ( ! (clause.getQuery() instanceof TermQuery)) { allNestedTermQueries = false; break; } } } if (allNestedTermQueries) { clauses.addAll(((BooleanQuery)q).clauses()); } else { BooleanClause.Occur occur = operator == OR_OPERATOR ? BooleanClause.Occur.SHOULD : BooleanClause.Occur.MUST; if (q instanceof BooleanQuery) { for (BooleanClause clause : ((BooleanQuery)q).clauses()) { clauses.add(newBooleanClause(clause.getQuery(), occur)); } } else { clauses.add(newBooleanClause(q, occur)); } } } /** * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */ protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException { return newFieldQuery(getAnalyzer(), field, queryText, quoted); } /** * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */ protected Query newFieldQuery(Analyzer analyzer, String field, String queryText, boolean quoted) throws ParseException { BooleanClause.Occur occur = operator == Operator.AND ? BooleanClause.Occur.MUST : BooleanClause.Occur.SHOULD; return createFieldQuery(analyzer, occur, field, queryText, quoted || autoGeneratePhraseQueries, phraseSlop); } /** * Base implementation delegates to {@link #getFieldQuery(String,String,boolean)}. * This method may be overridden, for example, to return * a SpanNearQuery instead of a PhraseQuery. * * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */ protected Query getFieldQuery(String field, String queryText, int slop) throws ParseException { Query query = getFieldQuery(field, queryText, true); if (query instanceof PhraseQuery) { query = addSlopToPhrase((PhraseQuery) query, slop); } else if (query instanceof MultiPhraseQuery) { MultiPhraseQuery mpq = (MultiPhraseQuery)query; if (slop != mpq.getSlop()) { query = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build(); } } return query; } /** * Rebuild a phrase query with a slop value */ private PhraseQuery addSlopToPhrase(PhraseQuery query, int slop) { PhraseQuery.Builder builder = new PhraseQuery.Builder(); builder.setSlop(slop); org.apache.lucene.index.Term[] terms = query.getTerms(); int[] positions = query.getPositions(); for (int i = 0; i < terms.length; ++i) { builder.add(terms[i], positions[i]); } return builder.build(); } protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException { DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale); df.setLenient(true); DateTools.Resolution resolution = getDateResolution(field); try { part1 = DateTools.dateToString(df.parse(part1), resolution); } catch (Exception e) { } try { Date d2 = df.parse(part2); if (endInclusive) { // The user can only specify the date, not the time, so make sure // the time is set to the latest possible time of that date to really // include all documents: Calendar cal = Calendar.getInstance(timeZone, locale); cal.setTime(d2); cal.set(Calendar.HOUR_OF_DAY, 23); cal.set(Calendar.MINUTE, 59); cal.set(Calendar.SECOND, 59); cal.set(Calendar.MILLISECOND, 999); d2 = cal.getTime(); } part2 = DateTools.dateToString(d2, resolution); } catch (Exception e) { } return newRangeQuery(field, part1, part2, startInclusive, endInclusive); } /** * Builds a new BooleanClause instance * @param q sub query * @param occur how this clause should occur when matching documents * @return new BooleanClause instance */ protected BooleanClause newBooleanClause(Query q, BooleanClause.Occur occur) { return new BooleanClause(q, occur); } /** * Builds a new PrefixQuery instance * @param prefix Prefix term * @return new PrefixQuery instance */ protected Query newPrefixQuery(Term prefix){ PrefixQuery query = new PrefixQuery(prefix); query.setRewriteMethod(multiTermRewriteMethod); return query; } /** * Builds a new RegexpQuery instance * @param regexp Regexp term * @return new RegexpQuery instance */ protected Query newRegexpQuery(Term regexp) { RegexpQuery query = new RegexpQuery(regexp, RegExp.ALL, determinizeWorkLimit); query.setRewriteMethod(multiTermRewriteMethod); return query; } /** * Builds a new FuzzyQuery instance * @param term Term * @param minimumSimilarity minimum similarity * @param prefixLength prefix length * @return new FuzzyQuery Instance */ protected Query newFuzzyQuery(Term term, float minimumSimilarity, int prefixLength) { // FuzzyQuery doesn't yet allow constant score rewrite String text = term.text(); int numEdits = FuzzyQuery.floatToEdits(minimumSimilarity, text.codePointCount(0, text.length())); return new FuzzyQuery(term,numEdits,prefixLength); } /** * Builds a new {@link TermRangeQuery} instance * @param field Field * @param part1 min * @param part2 max * @param startInclusive true if the start of the range is inclusive * @param endInclusive true if the end of the range is inclusive * @return new {@link TermRangeQuery} instance */ protected Query newRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) { final BytesRef start; final BytesRef end; if (part1 == null) { start = null; } else { start = getAnalyzer().normalize(field, part1); } if (part2 == null) { end = null; } else { end = getAnalyzer().normalize(field, part2); } final TermRangeQuery query = new TermRangeQuery(field, start, end, startInclusive, endInclusive); query.setRewriteMethod(multiTermRewriteMethod); return query; } /** * Builds a new MatchAllDocsQuery instance * @return new MatchAllDocsQuery instance */ protected Query newMatchAllDocsQuery() { return new MatchAllDocsQuery(); } /** * Builds a new WildcardQuery instance * @param t wildcard term * @return new WildcardQuery instance */ protected Query newWildcardQuery(Term t) { WildcardQuery query = new WildcardQuery(t, determinizeWorkLimit); query.setRewriteMethod(multiTermRewriteMethod); return query; } /** * Factory method for generating query, given a set of clauses. * By default creates a boolean query composed of clauses passed in. * * Can be overridden by extending classes, to modify query being * returned. * * @param clauses List that contains {@link org.apache.lucene.search.BooleanClause} instances * to join. * * @return Resulting {@link org.apache.lucene.search.Query} object. * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */ protected Query getBooleanQuery(List clauses) throws ParseException { if (clauses.size()==0) { return null; // all clause words were filtered away by the analyzer. } BooleanQuery.Builder query = newBooleanQuery(); for(final BooleanClause clause: clauses) { query.add(clause); } return query.build(); } /** * Factory method for generating a query. Called when parser * parses an input term token that contains one or more wildcard * characters (? and *), but is not a prefix term token (one * that has just a single * character at the end) *

* Depending on settings, prefix term may be lower-cased * automatically. It will not go through the default Analyzer, * however, since normal Analyzers are unlikely to work properly * with wildcard templates. *

* Can be overridden by extending classes, to provide custom handling for * wildcard queries, which may be necessary due to missing analyzer calls. * * @param field Name of the field query will use. * @param termStr Term token that contains one or more wild card * characters (? or *), but is not simple prefix term * * @return Resulting {@link org.apache.lucene.search.Query} built for the term * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */ protected Query getWildcardQuery(String field, String termStr) throws ParseException { if ("*".equals(field)) { if ("*".equals(termStr)) return newMatchAllDocsQuery(); } if (!allowLeadingWildcard && (termStr.startsWith("*") || termStr.startsWith("?"))) throw new ParseException("'*' or '?' not allowed as first character in WildcardQuery"); Term t = new Term(field, analyzeWildcard(field, termStr)); return newWildcardQuery(t); } private static final Pattern WILDCARD_PATTERN = Pattern.compile("(\\\\.)|([?*]+)"); private BytesRef analyzeWildcard(String field, String termStr) { // best effort to not pass the wildcard characters and escaped characters through #normalize Matcher wildcardMatcher = WILDCARD_PATTERN.matcher(termStr); BytesRefBuilder sb = new BytesRefBuilder(); int last = 0; while (wildcardMatcher.find()){ if (wildcardMatcher.start() > 0) { String chunk = termStr.substring(last, wildcardMatcher.start()); BytesRef normalized = getAnalyzer().normalize(field, chunk); sb.append(normalized); } //append the matched group - without normalizing sb.append(new BytesRef(wildcardMatcher.group())); last = wildcardMatcher.end(); } if (last < termStr.length()){ String chunk = termStr.substring(last); BytesRef normalized = getAnalyzer().normalize(field, chunk); sb.append(normalized); } return sb.toBytesRef(); } /** * Factory method for generating a query. Called when parser * parses an input term token that contains a regular expression * query. *

* Depending on settings, pattern term may be lower-cased * automatically. It will not go through the default Analyzer, * however, since normal Analyzers are unlikely to work properly * with regular expression templates. *

* Can be overridden by extending classes, to provide custom handling for * regular expression queries, which may be necessary due to missing analyzer * calls. * * @param field Name of the field query will use. * @param termStr Term token that contains a regular expression * * @return Resulting {@link org.apache.lucene.search.Query} built for the term * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */ protected Query getRegexpQuery(String field, String termStr) throws ParseException { // We need to pass the whole string to #normalize, which will not work with // custom attribute factories for the binary term impl, and may not work // with some analyzers BytesRef term = getAnalyzer().normalize(field, termStr); Term t = new Term(field, term); return newRegexpQuery(t); } /** * Factory method for generating a query (similar to * {@link #getWildcardQuery}). Called when parser parses an input term * token that uses prefix notation; that is, contains a single '*' wildcard * character as its last character. Since this is a special case * of generic wildcard term, and such a query can be optimized easily, * this usually results in a different query object. *

* Depending on settings, a prefix term may be lower-cased * automatically. It will not go through the default Analyzer, * however, since normal Analyzers are unlikely to work properly * with wildcard templates. *

* Can be overridden by extending classes, to provide custom handling for * wild card queries, which may be necessary due to missing analyzer calls. * * @param field Name of the field query will use. * @param termStr Term token to use for building term for the query * (without trailing '*' character!) * * @return Resulting {@link org.apache.lucene.search.Query} built for the term * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */ protected Query getPrefixQuery(String field, String termStr) throws ParseException { if (!allowLeadingWildcard && termStr.startsWith("*")) throw new ParseException("'*' not allowed as first character in PrefixQuery"); BytesRef term = getAnalyzer().normalize(field, termStr); Term t = new Term(field, term); return newPrefixQuery(t); } /** * Factory method for generating a query (similar to * {@link #getWildcardQuery}). Called when parser parses * an input term token that has the fuzzy suffix (~) appended. * * @param field Name of the field query will use. * @param termStr Term token to use for building term for the query * * @return Resulting {@link org.apache.lucene.search.Query} built for the term * @exception org.apache.lucene.queryparser.classic.ParseException throw in overridden method to disallow */ protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException { BytesRef term = getAnalyzer().normalize(field, termStr); Term t = new Term(field, term); return newFuzzyQuery(t, minSimilarity, fuzzyPrefixLength); } // extracted from the .jj grammar Query handleBareTokenQuery(String qfield, Token term, Token fuzzySlop, boolean prefix, boolean wildcard, boolean fuzzy, boolean regexp) throws ParseException { Query q; String termImage=discardEscapeChar(term.image); if (wildcard) { q = getWildcardQuery(qfield, term.image); } else if (prefix) { q = getPrefixQuery(qfield, discardEscapeChar(term.image.substring (0, term.image.length()-1))); } else if (regexp) { q = getRegexpQuery(qfield, term.image.substring(1, term.image.length()-1)); } else if (fuzzy) { q = handleBareFuzzy(qfield, fuzzySlop, termImage); } else { q = getFieldQuery(qfield, termImage, false); } return q; } Query handleBareFuzzy(String qfield, Token fuzzySlop, String termImage) throws ParseException { Query q; float fms = fuzzyMinSim; try { fms = Float.parseFloat(fuzzySlop.image.substring(1)); } catch (Exception ignored) { } if(fms < 0.0f){ throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !"); } else if (fms >= 1.0f && fms != (int) fms) { throw new ParseException("Fractional edit distances are not allowed!"); } q = getFuzzyQuery(qfield, termImage, fms); return q; } // extracted from the .jj grammar Query handleQuotedTerm(String qfield, Token term, Token fuzzySlop) throws ParseException { int s = phraseSlop; // default if (fuzzySlop != null) { try { s = (int)Float.parseFloat(fuzzySlop.image.substring(1)); } catch (Exception ignored) { } } return getFieldQuery(qfield, discardEscapeChar(term.image.substring(1, term.image.length()-1)), s); } // extracted from the .jj grammar Query handleBoost(Query q, Token boost) { if (boost != null) { float f = (float) 1.0; try { f = Float.parseFloat(boost.image); } catch (Exception ignored) { /* Should this be handled somehow? (defaults to "no boost", if * boost number is invalid) */ } // avoid boosting null queries, such as those caused by stop words if (q != null) { q = new BoostQuery(q, f); } } return q; } /** * Returns a String where the escape char has been * removed, or kept only once if there was a double escape. * * Supports escaped unicode characters, e. g. translates * \\u0041 to A. * */ String discardEscapeChar(String input) throws ParseException { // Create char array to hold unescaped char sequence char[] output = new char[input.length()]; // The length of the output can be less than the input // due to discarded escape chars. This variable holds // the actual length of the output int length = 0; // We remember whether the last processed character was // an escape character boolean lastCharWasEscapeChar = false; // The multiplier the current unicode digit must be multiplied with. // E. g. the first digit must be multiplied with 16^3, the second with 16^2... int codePointMultiplier = 0; // Used to calculate the codepoint of the escaped unicode character int codePoint = 0; for (int i = 0; i < input.length(); i++) { char curChar = input.charAt(i); if (codePointMultiplier > 0) { codePoint += hexToInt(curChar) * codePointMultiplier; codePointMultiplier >>>= 4; if (codePointMultiplier == 0) { output[length++] = (char)codePoint; codePoint = 0; } } else if (lastCharWasEscapeChar) { if (curChar == 'u') { // found an escaped unicode character codePointMultiplier = 16 * 16 * 16; } else { // this character was escaped output[length] = curChar; length++; } lastCharWasEscapeChar = false; } else { if (curChar == '\\') { lastCharWasEscapeChar = true; } else { output[length] = curChar; length++; } } } if (codePointMultiplier > 0) { throw new ParseException("Truncated unicode escape sequence."); } if (lastCharWasEscapeChar) { throw new ParseException("Term can not end with escape character."); } return new String(output, 0, length); } /** Returns the numeric value of the hexadecimal character */ static final int hexToInt(char c) throws ParseException { if ('0' <= c && c <= '9') { return c - '0'; } else if ('a' <= c && c <= 'f'){ return c - 'a' + 10; } else if ('A' <= c && c <= 'F') { return c - 'A' + 10; } else { throw new ParseException("Non-hex character in Unicode escape sequence: " + c); } } /** * Returns a String where those characters that QueryParser * expects to be escaped are escaped by a preceding \. */ public static String escape(String s) { StringBuilder sb = new StringBuilder(); for (int i = 0; i < s.length(); i++) { char c = s.charAt(i); // These characters are part of the query syntax and must be escaped if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':' || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~' || c == '*' || c == '?' || c == '|' || c == '&' || c == '/') { sb.append('\\'); } sb.append(c); } return sb.toString(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy