All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.zulia.server.search.ZuliaMultiFieldQueryParser Maven / Gradle / Ivy

package io.zulia.server.search;

import io.zulia.server.config.ServerIndexConfig;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.DisjunctionMaxQuery;
import org.apache.lucene.search.MultiPhraseQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeSet;

/**
 * Created by Matt Davis on 5/14/16.
 * @author mdavis
 * Copied mostly from org.apache.lucene.queryparser.classic.MultiFieldQueryParser
 */
public class ZuliaMultiFieldQueryParser extends ZuliaQueryParser {

	protected List fields;
	protected Map boosts;
	private float dismaxTie = 0;
	private boolean dismax = false;

	public ZuliaMultiFieldQueryParser(Analyzer analyzer, ServerIndexConfig indexConfig) {
		super(analyzer, indexConfig);
	}

	public void enableDismax(float dismaxTie) {
		this.dismaxTie = dismaxTie;
		this.dismax = true;
	}

	public void disableDismax() {
		this.dismax = false;
	}

	public void setDefaultFields(Collection fields) {

		Map boostMap = new HashMap<>();
		Set allFields = new TreeSet<>();
		for (String field : fields) {

			Float boost = null;
			if (field.contains("^")) {
				boost = Float.parseFloat(field.substring(field.indexOf("^") + 1));
				try {
					field = field.substring(0, field.indexOf("^"));

				}
				catch (Exception e) {
					throw new IllegalArgumentException("Invalid queryText field boost <" + field + ">");
				}
			}

			if (field.contains("*")) {
				String regex = field.replace("*", ".*");
				Set fieldNames = indexConfig.getMatchingFields(regex);
				allFields.addAll(fieldNames);

				if (boost != null) {
					for (String f : fieldNames) {
						boostMap.put(f, boost);
					}
				}
			}
			else {
				allFields.add(field);
				if (boost != null) {
					boostMap.put(field, boost);
				}
			}
		}

		super.setDefaultField(null);
		this.fields = new ArrayList<>(allFields);
		this.boosts = boostMap;
	}

	@Override
	public void setDefaultField(String field) {
		super.setDefaultField(field);
		this.fields = null;
	}

	@Override
	protected Query getFieldQuery(String field, String queryText, int slop) throws ParseException {
		if (field == null) {
			List clauses = new ArrayList<>();
			for (String f : fields) {
				Query q = super.getFieldQuery(f, queryText, true);
				if (q != null) {
					//If the user passes a map of boosts
					if (boosts != null) {
						//Get the boost from the map and apply them
						Float boost = boosts.get(f);
						if (boost != null) {
							q = new BoostQuery(q, boost);
						}
					}
					q = applySlop(q, slop);
					clauses.add(q);
				}
			}
			if (clauses.size() == 0)  // happens for stopwords
				return null;
			return getMultiFieldQuery(clauses);
		}
		Query q = super.getFieldQuery(field, queryText, true);
		q = applySlop(q, slop);
		return q;
	}

	private Query applySlop(Query q, int slop) {
		if (q instanceof PhraseQuery) {
			PhraseQuery.Builder builder = new PhraseQuery.Builder();
			builder.setSlop(slop);
			PhraseQuery pq = (PhraseQuery) q;
			org.apache.lucene.index.Term[] terms = pq.getTerms();
			int[] positions = pq.getPositions();
			for (int i = 0; i < terms.length; ++i) {
				builder.add(terms[i], positions[i]);
			}
			q = builder.build();
		}
		else if (q instanceof MultiPhraseQuery) {
			MultiPhraseQuery mpq = (MultiPhraseQuery) q;

			if (slop != mpq.getSlop()) {
				q = new MultiPhraseQuery.Builder(mpq).setSlop(slop).build();
			}
		}
		return q;
	}

	@Override
	protected Query getFieldQuery(String field, String queryText, boolean quoted) throws ParseException {
		if (field == null) {
			List clauses = new ArrayList<>();
			Query[] fieldQueries = new Query[fields.size()];
			int maxTerms = 0;
			for (int i = 0; i < fields.size(); i++) {
				Query q = super.getFieldQuery(fields.get(i), queryText, quoted);
				if (q != null) {
					if (q instanceof BooleanQuery) {
						maxTerms = Math.max(maxTerms, ((BooleanQuery) q).clauses().size());
					}
					else {
						maxTerms = Math.max(1, maxTerms);
					}
					fieldQueries[i] = q;
				}
			}
			for (int termNum = 0; termNum < maxTerms; termNum++) {
				List termClauses = new ArrayList<>();
				for (int i = 0; i < fields.size(); i++) {
					if (fieldQueries[i] != null) {
						Query q = null;
						if (fieldQueries[i] instanceof BooleanQuery) {
							List nestedClauses = ((BooleanQuery) fieldQueries[i]).clauses();
							if (termNum < nestedClauses.size()) {
								q = nestedClauses.get(termNum).getQuery();
							}
						}
						else if (termNum == 0) { // e.g. TermQuery-s
							q = fieldQueries[i];
						}
						if (q != null) {
							if (boosts != null) {
								//Get the boost from the map and apply them
								Float boost = boosts.get(fields.get(i));
								if (boost != null) {
									q = new BoostQuery(q, boost);
								}
							}
							termClauses.add(q);
						}
					}
				}
				if (maxTerms > 1) {
					if (termClauses.size() > 0) {
						//mdavis - don't use super method because of min match
						BooleanQuery.Builder builder = new BooleanQuery.Builder();
						for (Query termClause : termClauses) {
							builder.add(termClause, BooleanClause.Occur.SHOULD);
						}
						clauses.add(builder.build());
					}
				}
				else {
					clauses.addAll(termClauses);
				}
			}
			if (clauses.size() == 0)  // happens for stopwords
				return null;
			return getMultiFieldQuery(clauses);
		}
		Query q = super.getFieldQuery(field, queryText, quoted);
		return q;
	}

	@Override
	protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException {
		if (field == null) {
			List clauses = new ArrayList<>();
			for (String f : fields) {
				clauses.add(getFuzzyQuery(f, termStr, minSimilarity));
			}
			return getMultiFieldQuery(clauses);
		}
		return super.getFuzzyQuery(field, termStr, minSimilarity);
	}

	@Override
	protected Query getPrefixQuery(String field, String termStr) throws ParseException {
		if (field == null) {
			List clauses = new ArrayList<>();
			for (String f : fields) {
				clauses.add(getPrefixQuery(f, termStr));
			}
			return getMultiFieldQuery(clauses);
		}
		return super.getPrefixQuery(field, termStr);
	}

	@Override
	protected Query getWildcardQuery(String field, String termStr) throws ParseException {
		if (field == null) {
			List clauses = new ArrayList<>();
			for (String f : fields) {
				clauses.add(getWildcardQuery(f, termStr));
			}
			return getMultiFieldQuery(clauses);
		}
		return super.getWildcardQuery(field, termStr);
	}

	@Override
	protected Query getRangeQuery(String field, String part1, String part2, boolean startInclusive, boolean endInclusive) throws ParseException {
		if (field == null) {
			List clauses = new ArrayList<>();
			for (String f : fields) {
				clauses.add(getRangeQuery(f, part1, part2, startInclusive, endInclusive));
			}
			return getMultiFieldQuery(clauses);
		}
		return super.getRangeQuery(field, part1, part2, startInclusive, endInclusive);
	}

	@Override
	protected Query getRegexpQuery(String field, String termStr) throws ParseException {
		if (field == null) {
			List clauses = new ArrayList<>();
			for (String f : fields) {
				clauses.add(getRegexpQuery(f, termStr));
			}
			return getMultiFieldQuery(clauses);
		}
		return super.getRegexpQuery(field, termStr);
	}

	/** Creates a multi-field query */
	// TODO: investigate more general approach by default, e.g. DisjunctionMaxQuery?
	protected Query getMultiFieldQuery(List queries) throws ParseException {
		if (queries.isEmpty()) {
			return null; // all clause words were filtered away by the analyzer.
		}

		if (dismax) {
			return new DisjunctionMaxQuery(queries, dismaxTie);
		}
		else {
			//mdavis - don't use super method because of min match
			BooleanQuery.Builder query = new BooleanQuery.Builder();
			for (Query sub : queries) {
				query.add(sub, BooleanClause.Occur.SHOULD);
			}

			return query.build();
		}
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy