Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.search;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenFilterFactory;
import org.apache.lucene.analysis.core.StopFilterFactory;
import org.apache.lucene.index.Term;
import org.apache.lucene.queries.function.FunctionQuery;
import org.apache.lucene.queries.function.FunctionScoreQuery;
import org.apache.lucene.queries.function.ValueSource;
import org.apache.lucene.queries.function.valuesource.ProductFloatFunction;
import org.apache.lucene.queries.function.valuesource.QueryValueSource;
import org.apache.lucene.queries.spans.SpanQuery;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.solr.analysis.TokenizerChain;
import org.apache.solr.common.SolrException;
import org.apache.solr.common.SolrException.ErrorCode;
import org.apache.solr.common.params.DisMaxParams;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.common.util.CollectionUtil;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.common.util.StrUtils;
import org.apache.solr.parser.QueryParser;
import org.apache.solr.parser.SolrQueryParserBase.MagicFieldName;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.FieldType;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.search.ExtendedDismaxQParser.ExtendedSolrQueryParser.Alias;
import org.apache.solr.util.SolrPluginUtils;
/**
* Query parser that generates DisjunctionMaxQueries based on user configuration. See the Reference
* Guide page
*/
public class ExtendedDismaxQParser extends QParser {
/**
* A field we can't ever find in any schema, so we can safely tell DisjunctionMaxQueryParser to
* use it as our defaultField, and map aliases from it to any field in our schema.
*/
private static String IMPOSSIBLE_FIELD_NAME = "\uFFFC\uFFFC\uFFFC";
/** shorten the class references for utilities */
private static class U extends SolrPluginUtils {
/* :NOOP */
}
/** shorten the class references for utilities */
private static interface DMP extends DisMaxParams {
/**
* User fields. The fields that can be used by the end user to create field-specific queries.
*/
public static String UF = "uf";
/**
* Lowercase Operators. If set to true, 'or' and 'and' will be considered OR and AND, otherwise
* lowercase operators will be considered terms to search for.
*/
public static String LOWERCASE_OPS = "lowercaseOperators";
/**
* Multiplicative boost. Boost functions which scores are going to be multiplied to the score of
* the main query (instead of just added, like with bf)
*/
public static String MULT_BOOST = "boost";
/** If set to true, stopwords are removed from the query. */
public static String STOPWORDS = "stopwords";
}
private ExtendedDismaxConfiguration config;
private Query parsedUserQuery;
private Query altUserQuery;
private List boostQueries;
private boolean parsed = false;
public ExtendedDismaxQParser(
String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
super(qstr, localParams, params, req);
config = this.createConfiguration(qstr, localParams, params, req);
}
@Override
public Query parse() throws SyntaxError {
parsed = true;
/* the main query we will execute. we disable the coord because
* this query is an artificial construct
*/
BooleanQuery.Builder query = new BooleanQuery.Builder();
/* * * Main User Query * * */
parsedUserQuery = null;
String userQuery = getString();
altUserQuery = null;
if (StrUtils.isBlank(userQuery)) {
// If no query is specified, we may have an alternate
if (config.altQ != null) {
QParser altQParser = subQuery(config.altQ, null);
altUserQuery = altQParser.getQuery();
query.add(altUserQuery, BooleanClause.Occur.MUST);
} else {
return null;
// throw new SyntaxError("missing query string" );
}
} else {
// There is a valid query string
ExtendedSolrQueryParser up = createEdismaxQueryParser(this, IMPOSSIBLE_FIELD_NAME);
up.addAlias(IMPOSSIBLE_FIELD_NAME, config.tiebreaker, config.queryFields);
addAliasesFromRequest(up, config.tiebreaker);
validateQueryFields(up);
up.setPhraseSlop(config.qslop); // slop for explicit user phrase queries
up.setAllowLeadingWildcard(true);
up.setAllowSubQueryParsing(config.userFields.isAllowed(MagicFieldName.QUERY.field));
// defer escaping and only do if lucene parsing fails, or we need phrases
// parsing fails. Need to sloppy phrase queries anyway though.
List clauses = splitIntoClauses(userQuery, false);
// Always rebuild mainUserQuery from clauses to catch modifications from splitIntoClauses
// This was necessary for userFields modifications to get propagated into the query.
// Convert lower or mixed case operators to uppercase if we saw them.
// only do this for the lucene query part and not for phrase query boosting
// since some fields might not be case insensitive.
// We don't use a regex for this because it might change and AND or OR in
// a phrase query in a case sensitive field.
String mainUserQuery = rebuildUserQuery(clauses, config.lowercaseOperators);
// but always for unstructured implicit bqs created by getFieldQuery
up.minShouldMatch = config.minShouldMatch;
up.setSplitOnWhitespace(config.splitOnWhitespace);
parsedUserQuery = parseOriginalQuery(up, mainUserQuery, clauses, config);
if (parsedUserQuery == null) {
parsedUserQuery = parseEscapedQuery(up, escapeUserQuery(clauses), config);
}
query.add(parsedUserQuery, BooleanClause.Occur.MUST);
addPhraseFieldQueries(query, clauses, config);
}
/* * * Boosting Query * * */
boostQueries = getBoostQueries();
for (Query f : boostQueries) {
query.add(f, BooleanClause.Occur.SHOULD);
}
/* * * Boosting Functions * * */
List boostFunctions = getBoostFunctions();
for (Query f : boostFunctions) {
query.add(f, BooleanClause.Occur.SHOULD);
}
//
// create a boosted query (scores multiplied by boosts)
//
Query topQuery = QueryUtils.build(query, this);
List boosts = getMultiplicativeBoosts();
if (boosts.size() > 1) {
ValueSource prod = new ProductFloatFunction(boosts.toArray(new ValueSource[0]));
topQuery = FunctionScoreQuery.boostByValue(topQuery, prod.asDoubleValuesSource());
} else if (boosts.size() == 1) {
topQuery = FunctionScoreQuery.boostByValue(topQuery, boosts.get(0).asDoubleValuesSource());
}
return topQuery;
}
/**
* Validate query field names. Must be explicitly defined in the schema or match a dynamic field
* pattern. Checks source field(s) represented by a field alias
*
* @param up parser used
* @throws SyntaxError for invalid field name
*/
protected void validateQueryFields(ExtendedSolrQueryParser up) throws SyntaxError {
List flds = new ArrayList<>(config.queryFields.keySet().size());
for (String fieldName : config.queryFields.keySet()) {
buildQueryFieldList(fieldName, up.getAlias(fieldName), flds, up);
}
checkFieldsInSchema(flds);
}
/**
* Build list of source (non-alias) query field names. Recursive through aliases.
*
* @param fieldName query field name
* @param alias field alias
* @param flds list of query field names
* @param up parser used
* @throws SyntaxError for invalid field name
*/
private void buildQueryFieldList(
String fieldName, Alias alias, List flds, ExtendedSolrQueryParser up)
throws SyntaxError {
if (null == alias) {
flds.add(fieldName);
return;
}
up.validateCyclicAliasing(fieldName);
flds.addAll(getFieldsFromAlias(up, alias));
}
/**
* Return list of source (non-alias) field names from an alias
*
* @param up parser used
* @param a field alias
* @return list of source fields
* @throws SyntaxError for invalid field name
*/
private List getFieldsFromAlias(ExtendedSolrQueryParser up, Alias a) throws SyntaxError {
List lst = new ArrayList<>();
for (String s : a.fields.keySet()) {
buildQueryFieldList(s, up.getAlias(s), lst, up);
}
return lst;
}
/**
* Verify field name exists in schema, explicit or dynamic field pattern
*
* @param fieldName source field name to verify
* @throws SyntaxError for invalid field name
*/
private void checkFieldInSchema(String fieldName) throws SyntaxError {
try {
config.schema.getField(fieldName);
} catch (SolrException se) {
throw new SyntaxError("Query Field '" + fieldName + "' is not a valid field name", se);
}
}
/**
* Verify list of source field names
*
* @param flds list of source field names to verify
* @throws SyntaxError for invalid field name
*/
private void checkFieldsInSchema(List flds) throws SyntaxError {
for (String fieldName : flds) {
checkFieldInSchema(fieldName);
}
}
/** Adds shingled phrase queries to all the fields specified in the pf, pf2 anf pf3 parameters */
protected void addPhraseFieldQueries(
BooleanQuery.Builder query, List clauses, ExtendedDismaxConfiguration config)
throws SyntaxError {
// sloppy phrase queries for proximity
List allPhraseFields = config.getAllPhraseFields();
if (allPhraseFields.size() > 0) {
// find non-field clauses
List normalClauses = new ArrayList<>(clauses.size());
for (Clause clause : clauses) {
if (clause.field != null || clause.isPhrase) continue;
// check for keywords "AND,OR,TO"
if (clause.isBareWord()) {
String s = clause.val;
// avoid putting explicit operators in the phrase query
if ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s) || "TO".equals(s)) continue;
}
normalClauses.add(clause);
}
// create a map of {wordGram, [phraseField]}
final Map> phraseFieldsByWordGram = new HashMap<>();
for (FieldParams fieldParams : allPhraseFields) {
phraseFieldsByWordGram
.computeIfAbsent(fieldParams.getWordGrams(), k -> new ArrayList<>())
.add(fieldParams);
}
// for each {wordGram, [phraseField]} entry, create and add shingled field queries to the main
// user query
for (Map.Entry> phraseFieldsByWordGramEntry :
phraseFieldsByWordGram.entrySet()) {
// group the fields within this wordGram collection by their associated slop (it's possible
// that the same field appears multiple times for the same wordGram count but with different
// slop values. In this case, we should take the *sum* of those phrase queries, rather than
// the max across them).
final Map> phraseFieldsBySlop = new HashMap<>();
for (FieldParams fieldParams : phraseFieldsByWordGramEntry.getValue()) {
phraseFieldsBySlop
.computeIfAbsent(fieldParams.getSlop(), k -> new ArrayList<>())
.add(fieldParams);
}
for (Map.Entry> phraseFieldsBySlopEntry :
phraseFieldsBySlop.entrySet()) {
addShingledPhraseQueries(
query,
normalClauses,
phraseFieldsBySlopEntry.getValue(),
phraseFieldsByWordGramEntry.getKey(),
config.tiebreaker,
phraseFieldsBySlopEntry.getKey());
}
}
}
}
/**
* Creates an instance of ExtendedDismaxConfiguration. It will contain all the necessary
* parameters to parse the query
*/
protected ExtendedDismaxConfiguration createConfiguration(
String qstr, SolrParams localParams, SolrParams params, SolrQueryRequest req) {
return new ExtendedDismaxConfiguration(localParams, params, req);
}
/**
* Creates an instance of ExtendedSolrQueryParser, the query parser that's going to be used to
* parse the query.
*/
protected ExtendedSolrQueryParser createEdismaxQueryParser(QParser qParser, String field) {
return new ExtendedSolrQueryParser(qParser, field);
}
/**
* Parses an escaped version of the user's query. This method is called in the event that the
* original query encounters exceptions during parsing.
*
* @param up parser used
* @param escapedUserQuery query that is parsed, should already be escaped so that no trivial
* parse errors are encountered
* @param config Configuration options for this parse request
* @return the resulting query (flattened if needed) with "min should match" rules applied as
* specified in the config.
* @see #parseOriginalQuery
* @see SolrPluginUtils#flattenBooleanQuery
*/
protected Query parseEscapedQuery(
ExtendedSolrQueryParser up, String escapedUserQuery, ExtendedDismaxConfiguration config)
throws SyntaxError {
Query query = up.parse(escapedUserQuery);
if (query instanceof BooleanQuery) {
BooleanQuery.Builder t = new BooleanQuery.Builder();
SolrPluginUtils.flattenBooleanQuery(t, (BooleanQuery) query);
SolrPluginUtils.setMinShouldMatch(t, config.minShouldMatch, config.mmAutoRelax);
query = QueryUtils.build(t, this);
}
return query;
}
/**
* Parses the user's original query. This method attempts to cleanly parse the specified query
* string using the specified parser, any Exceptions are ignored resulting in null being returned.
*
* @param up parser used
* @param mainUserQuery query string that is parsed
* @param clauses used to dictate "min should match" logic
* @param config Configuration options for this parse request
* @return the resulting query with "min should match" rules applied as specified in the config.
* @see #parseEscapedQuery
*/
protected Query parseOriginalQuery(
ExtendedSolrQueryParser up,
String mainUserQuery,
List clauses,
ExtendedDismaxConfiguration config) {
Query query = null;
try {
up.setRemoveStopFilter(!config.stopwords);
up.exceptions = true;
query = up.parse(mainUserQuery);
if (shouldRemoveStopFilter(config, query)) {
// if the query was all stop words, remove none of them
up.setRemoveStopFilter(true);
query = up.parse(mainUserQuery);
}
} catch (Exception e) {
// ignore failure and reparse later after escaping reserved chars
up.exceptions = false;
}
if (query == null) {
return null;
}
// For correct lucene queries, turn off mm processing if no explicit mm spec was provided
// and there were explicit operators (except for AND).
if (query instanceof BooleanQuery) {
// config.minShouldMatch holds the value of mm which MIGHT have come from the user,
// but could also have been derived from q.op.
String mmSpec = config.minShouldMatch;
if (foundOperators(clauses, config.lowercaseOperators)) {
// Use provided mm spec if present, otherwise turn off mm processing
mmSpec = config.solrParams.get(DisMaxParams.MM, "0%");
}
query = SolrPluginUtils.setMinShouldMatch((BooleanQuery) query, mmSpec, config.mmAutoRelax);
}
return query;
}
/**
* Determines if query should be re-parsed removing the stop filter.
*
* @return true if there are stopwords configured and the parsed query was empty false in any
* other case.
*/
protected boolean shouldRemoveStopFilter(ExtendedDismaxConfiguration config, Query query) {
return config.stopwords && isEmpty(query);
}
private String escapeUserQuery(List clauses) {
StringBuilder sb = new StringBuilder();
for (Clause clause : clauses) {
boolean doQuote = clause.isPhrase;
String s = clause.val;
if (!clause.isPhrase && ("OR".equals(s) || "AND".equals(s) || "NOT".equals(s))) {
doQuote = true;
}
if (clause.must != 0) {
sb.append(clause.must);
}
if (clause.field != null) {
sb.append(clause.field);
sb.append(':');
}
if (doQuote) {
sb.append('"');
}
sb.append(clause.val);
if (doQuote) {
sb.append('"');
}
if (clause.field != null) {
// Add the default user field boost, if any
Float boost = config.userFields.getBoost(clause.field);
if (boost != null) sb.append("^").append(boost);
}
sb.append(' ');
}
return sb.toString();
}
/** Returns true if at least one of the clauses is/has an explicit operator (except for AND) */
private boolean foundOperators(List clauses, boolean lowercaseOperators) {
for (Clause clause : clauses) {
if (clause.must == '+') return true;
if (clause.must == '-') return true;
if (clause.isBareWord()) {
String s = clause.val;
if ("OR".equals(s)) {
return true;
} else if ("NOT".equals(s)) {
return true;
} else if (lowercaseOperators && "or".equals(s)) {
return true;
}
}
}
return false;
}
/**
* Generates a query string from the raw clauses, uppercasing 'and' and 'or' as needed.
*
* @param clauses the clauses of the query string to be rebuilt
* @param lowercaseOperators if true, lowercase 'and' and 'or' clauses will be recognized as
* operators and uppercased in the final query string.
* @return the generated query string.
*/
protected String rebuildUserQuery(List clauses, boolean lowercaseOperators) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < clauses.size(); i++) {
Clause clause = clauses.get(i);
String s = clause.raw;
// and and or won't be operators at the start or end
if (lowercaseOperators && i > 0 && i + 1 < clauses.size()) {
if ("AND".equalsIgnoreCase(s)) {
s = "AND";
} else if ("OR".equalsIgnoreCase(s)) {
s = "OR";
}
}
sb.append(s);
sb.append(' ');
}
return sb.toString();
}
/** Parses all multiplicative boosts */
protected List getMultiplicativeBoosts() throws SyntaxError {
List boosts = new ArrayList<>();
if (config.hasMultiplicativeBoosts()) {
for (String boostStr : config.multBoosts) {
if (boostStr == null || boostStr.length() == 0) continue;
Query boost = subQuery(boostStr, FunctionQParserPlugin.NAME).getQuery();
ValueSource vs;
if (boost instanceof FunctionQuery) {
vs = ((FunctionQuery) boost).getValueSource();
} else {
vs = new QueryValueSource(boost, 1.0f);
}
boosts.add(vs);
}
}
return boosts;
}
/** Parses all function queries */
protected List getBoostFunctions() throws SyntaxError {
List boostFunctions = new ArrayList<>();
if (config.hasBoostFunctions()) {
for (String boostFunc : config.boostFuncs) {
if (null == boostFunc || boostFunc.isEmpty()) continue;
Map ff = SolrPluginUtils.parseFieldBoosts(boostFunc);
for (Map.Entry entry : ff.entrySet()) {
Query fq = subQuery(entry.getKey(), FunctionQParserPlugin.NAME).getQuery();
Float b = entry.getValue();
if (null != b && b.floatValue() != 1f) {
fq = new BoostQuery(fq, b);
}
boostFunctions.add(fq);
}
}
}
return boostFunctions;
}
/** Parses all boost queries */
protected List getBoostQueries() throws SyntaxError {
List boostQueries = new ArrayList<>();
if (config.hasBoostParams()) {
for (String qs : config.boostParams) {
if (qs.trim().length() == 0) continue;
Query q = subQuery(qs, null).getQuery();
boostQueries.add(q);
}
}
return boostQueries;
}
/** Extracts all the aliased fields from the requests and adds them to up */
private void addAliasesFromRequest(ExtendedSolrQueryParser up, float tiebreaker) {
Iterator it = config.solrParams.getParameterNamesIterator();
while (it.hasNext()) {
String param = it.next();
if (param.startsWith("f.") && param.endsWith(".qf")) {
// Add the alias
String fname = param.substring(2, param.length() - 3);
String qfReplacement = config.solrParams.get(param);
Map parsedQf = SolrPluginUtils.parseFieldBoosts(qfReplacement);
if (parsedQf.size() == 0) return;
up.addAlias(fname, tiebreaker, parsedQf);
}
}
}
/**
* Modifies the main query by adding a new optional Query consisting of shingled phrase queries
* across the specified clauses using the specified field => boost mappings.
*
* @param mainQuery Where the phrase boosting queries will be added
* @param clauses Clauses that will be used to construct the phrases
* @param fields Field => boost mappings for the phrase queries
* @param shingleSize how big the phrases should be, 0 means a single phrase
* @param tiebreaker tie breaker value for the DisjunctionMaxQueries
*/
protected void addShingledPhraseQueries(
final BooleanQuery.Builder mainQuery,
final List clauses,
final Collection fields,
int shingleSize,
final float tiebreaker,
final int slop)
throws SyntaxError {
if (null == fields || fields.isEmpty() || null == clauses || clauses.size() < shingleSize)
return;
if (0 == shingleSize) shingleSize = clauses.size();
final int lastClauseIndex = shingleSize - 1;
StringBuilder userPhraseQuery = new StringBuilder();
for (int i = 0; i < clauses.size() - lastClauseIndex; i++) {
userPhraseQuery.append('"');
for (int j = 0; j <= lastClauseIndex; j++) {
userPhraseQuery.append(clauses.get(i + j).val);
userPhraseQuery.append(' ');
}
userPhraseQuery.append('"');
userPhraseQuery.append(' ');
}
/* for parsing sloppy phrases using DisjunctionMaxQueries */
ExtendedSolrQueryParser pp = createEdismaxQueryParser(this, IMPOSSIBLE_FIELD_NAME);
pp.addAlias(IMPOSSIBLE_FIELD_NAME, tiebreaker, getFieldBoosts(fields));
pp.setPhraseSlop(slop);
pp.setRemoveStopFilter(true); // remove stop filter and keep stopwords
pp.setSplitOnWhitespace(config.splitOnWhitespace);
/* :TODO: reevaluate using makeDismax=true vs false...
*
* The DismaxQueryParser always used DisjunctionMaxQueries for the
* pf boost, for the same reasons it used them for the qf fields.
* When Yonik first wrote the ExtendedDismaxQParserPlugin, he added
* the "makeDismax=false" property to use BooleanQueries instead, but
* when asked why his response was "I honestly don't recall" ...
*
* https://issues.apache.org/jira/browse/SOLR-1553?focusedCommentId=12793813#action_12793813
*
* so for now, we continue to use dismax style queries because it
* seems the most logical and is back compatible, but we should
* try to figure out what Yonik was thinking at the time (because he
* rarely does things for no reason)
*/
pp.makeDismax = true;
// minClauseSize is independent of the shingleSize because of stop words
// (if they are removed from the middle, so be it, but we need at least
// two or there shouldn't be a boost)
pp.minClauseSize = 2;
// TODO: perhaps we shouldn't use synonyms either...
Query phrase = pp.parse(userPhraseQuery.toString());
if (phrase != null) {
mainQuery.add(phrase, BooleanClause.Occur.SHOULD);
}
}
/**
* @return a {fieldName, fieldBoost} map for the given fields.
*/
private Map getFieldBoosts(Collection fields) {
Map fieldBoostMap = CollectionUtil.newLinkedHashMap(fields.size());
for (FieldParams field : fields) {
fieldBoostMap.put(field.getField(), field.getBoost());
}
return fieldBoostMap;
}
@Override
public String[] getDefaultHighlightFields() {
return config.queryFields.keySet().toArray(new String[0]);
}
@Override
public Query getHighlightQuery() throws SyntaxError {
if (!parsed) parse();
return parsedUserQuery == null ? altUserQuery : parsedUserQuery;
}
@Override
public void addDebugInfo(NamedList