All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pageseeder.flint.lucene.query.Question Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2015 Allette Systems (Australia)
 * http://www.allette.com.au
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.pageseeder.flint.lucene.query;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BoostQuery;
import org.apache.lucene.search.Query;
import org.pageseeder.flint.catalog.Catalog;
import org.pageseeder.flint.lucene.search.Fields;
import org.pageseeder.flint.lucene.search.Terms;
import org.pageseeder.flint.lucene.util.Beta;
import org.pageseeder.xmlwriter.XMLWritable;
import org.pageseeder.xmlwriter.XMLWriter;

import java.io.IOException;
import java.util.*;
import java.util.Map.Entry;

/**
 * A simple parameter to capture a search entered by a user.
 *
 * 

The question is what the user types in the input box and its scope can be a number of fields. * *

It acts on one or multiple fields, each field can have a different boost level. * *

Use the factory methods to create new question. * * @author Christophe Lauret (Weborganic) * @version 13 August 2010 */ @Beta public final class Question implements SearchParameter, XMLWritable { /** * If operators are supported in the question */ private final boolean _supportOperators; /** * If the default operator between terms is 'OR' or 'AND' */ private final boolean _defaultOperatorOR; /** * If wildcards '?' and '*' are supported */ private final boolean _supportWildcards; /** * The search value */ private final String _question; /** * The field names mapped to their boost value. */ private final Map _fields; /** * The computed query. */ private Query _query = null; // Constructors // ============================================================================================== /** * Creates a new question. * *

This is a low level API constructor; to ensure that this class works well, ensure that the * fields cannot be modified externally and that field names do not include empty strings. * * @param fields The fields to search mapped to their respective boost. * @param question The text entered by the user. * @param supportWildcards If wildcards ('?', '*') are supported in the question * @param supportOperators If operators (AND, OR) are supported in the question * @param defaultOperatorOR If the default operator between terms is 'OR' or 'AND' * * @throws NullPointerException If either argument is null. */ Question(Map fields, String question, boolean supportWildcards, boolean supportOperators, boolean defaultOperatorOR) throws NullPointerException { if (fields == null) throw new NullPointerException("fields"); if (question == null) throw new NullPointerException("question"); this._fields = fields; this._question = question; this._supportWildcards = supportWildcards; this._supportOperators = supportOperators; this._defaultOperatorOR = defaultOperatorOR; } // Methods // ============================================================================================== /** * Returns the list of fields this question applies to. * * @return the list of fields this question applies to. */ public Collection fields() { return this._fields.keySet(); } /** * Returns the underlying question string. * * @return the underlying questions string. */ public String question() { return this._question; } /** * Returns the boost value for the specified field. * * @param field the name of the field. * * @return the corresponding boost value. */ public float getBoost(String field) { Float boost = this._fields.get(field); return boost != null ? boost : 1.0f; } /** * A question is empty if either the question or the fields are empty. * * @return true if either the question or the fields are empty; * false if the question has a value and there is at least one field. */ @Override public boolean isEmpty() { return this._question.isEmpty() || this._fields.isEmpty(); } /** * Computes the query for this question. * *

This only needs to be done once. * * @param analyzer The analyser used by the underlying index. * @param catalog The catalog to know if a field is analyzed or not */ private void compute(Analyzer analyzer, Catalog catalog) { BooleanQuery.Builder query = new BooleanQuery.Builder(); if (this._supportOperators) { for (Entry e : this._fields.entrySet()) { Query q = catalog != null && !catalog.isTokenized(e.getKey()) && Queries.hasNoOperators(this._question) ? Queries.termQuery(e.getKey(), this._question, this._supportWildcards) : Queries.parseToQuery(e.getKey(), this._question, analyzer, this._defaultOperatorOR, this._supportWildcards); if (e.getValue() != 1f) q = new BoostQuery(q, e.getValue()); query.add(q, Occur.SHOULD); } } else { Occur with = this._defaultOperatorOR ? Occur.SHOULD : Occur.MUST; List values = Fields.toValues(this._question); for (Entry e : this._fields.entrySet()) { boolean analyzed = catalog == null || catalog.isTokenized(e.getKey()); BooleanQuery.Builder fieldQuery = new BooleanQuery.Builder(); for (String value : values) { if (!analyzed) { Query q = Queries.termQuery(e.getKey(), value, this._supportWildcards); if (e.getValue() != 1f) q = new BoostQuery(q, e.getValue()); fieldQuery.add(q, with); } else { for(Query q : Queries.toTermOrPhraseQueries(e.getKey(), value, this._supportWildcards, analyzer)) { if (e.getValue() != 1f) q = new BoostQuery(q, e.getValue()); fieldQuery.add(q, with); } } } query.add(fieldQuery.build(), Occur.SHOULD); } } this._query = query.build(); } /** * Computes the query for this question by removing any special character from the question and * closing quotation marks. * *

This method ignores any Lucene specific syntax by removing it from the input string. */ private void compute() { BooleanQuery.Builder query = new BooleanQuery.Builder(); if (this._supportOperators) { for (Entry e : this._fields.entrySet()) { Query q = Queries.parseToQuery(e.getKey(), this._question, null, false, this._supportWildcards); query.add(new BoostQuery(q, e.getValue()), Occur.SHOULD); } } else { List values = Fields.toValues(this._question); for (Entry e : this._fields.entrySet()) { BooleanQuery.Builder sub = new BooleanQuery.Builder(); for (String value : values) { Query q = Queries.toTermOrPhraseQuery(e.getKey(), value, this._supportWildcards); sub.add(new BoostQuery(q, e.getValue()), Occur.SHOULD); } query.add(sub.build(), Occur.SHOULD); } } this._query = query.build(); } /** * Returns a list of questions which are considered similar, that where one term was substituted * for a similar term. * Max size is set to 50. * * @param reader the reader to use to extract the similar (fuzzy) terms. * * @return a list of similar questions. * * @throws IOException If thrown by the reader while getting the fuzzy terms. */ public List similar(IndexReader reader) throws IOException { return similar(reader, 50); } /** * Returns a list of questions which are considered similar, that where one term was substituted * for a similar term. * * @param reader the reader to use to extract the similar (fuzzy) terms. * @param maxSize the maximum number of suggestions to return * * @return a list of similar questions. * * @throws IOException If thrown by the reader while getting the fuzzy terms. */ public List similar(IndexReader reader, int maxSize) throws IOException { List similar = new ArrayList<>(); // If the question contains a phrase, try removing the phrase if (this._question.indexOf('"') >= 0) { Question nophrase = new Builder().fields(this._fields).question(this._question.replace('"', ' ')).build(); nophrase.compute(); similar.add(nophrase); // No phrase, try substitution for each term } else { List values = Fields.toValues(this._question); big: for (String value : values) { Set fuzzy = new HashSet<>(); // collect fuzzy terms based on index for (String field : this._fields.keySet()) { List fuzzies = Terms.fuzzy(reader, new Term(field, value)); fuzzy.addAll(fuzzies); } // rewrite question for (String x : fuzzy) { Question q = new Builder().fields(this._fields).question(this._question.replaceAll("\\Q"+value+"\\E", x)).build(); q.compute(); similar.add(q); if (similar.size() >= maxSize) break big; } } } return similar; } /** * Returns this object as Lucene query instance. * * @see #isEmpty() * * @return this object as a Lucene query instance, or null if this query is empty. * @throws IllegalStateException if the query has not been computed before - should not happen if using factory * methods. */ @Override public Query toQuery() throws IllegalStateException { // Return null if empty if (isEmpty()) return null; // Query was not computed if (this._query == null) throw new IllegalStateException("Query has not been computed - call compute(Analyzer)"); return this._query; } /** * {@inheritDoc} */ @Override public void toXML(XMLWriter xml) throws IOException { xml.openElement("question", true); // indicate whether this search term is empty xml.attribute("is-empty", Boolean.toString(isEmpty())); xml.attribute("operators", this._supportOperators ? "true" : "false"); xml.attribute("wildcards", this._supportWildcards ? "true" : "false"); xml.attribute("default-operator", this._defaultOperatorOR ? "or" : "and"); if (this._query != null) { xml.attribute("query", this._query.toString()); } // details of the question for (Entry field : this._fields.entrySet()) { xml.openElement("field"); Float boost = field.getValue(); xml.attribute("boost", boost != null? boost.toString() : "1.0"); xml.writeText(field.getKey()); xml.closeElement(); } xml.element("text", this._question); xml.closeElement(); } /** * {@inheritDoc} */ @Override public String toString() { return this._question + " in " + this._fields.entrySet(); } public static class Builder { private boolean supportOperators = true; private boolean defaultOperatorOR = true; private boolean supportWildcards = false; private String question = null; private Map fields = null; private Analyzer analyzer = null; private Catalog catalog = null; public Builder field(String fieldname) { this.fields = Fields.asBoostMap(Fields.filterNames(Collections.singletonList(fieldname))); return this; } public Builder fields(List fs) { this.fields = Fields.asBoostMap(Fields.filterNames(fs)); return this; } public Builder fields(Map fs) { this.fields = fs; return this; } public Builder question(String q) { this.question = q; return this; } public Builder supportWildcards(boolean support) { this.supportWildcards = support; return this; } public Builder supportOperators(boolean support) { this.supportOperators = support; return this; } public Builder defaultOperatorOR(boolean or) { this.defaultOperatorOR = or; return this; } public Builder analyzerAndCatalog(Analyzer a, Catalog c) { this.analyzer = a; this.catalog = c; return this; } public Question build() { if (this.fields == null) throw new IllegalStateException("Missing fields"); if (this.question == null) throw new IllegalStateException("Missing question"); Question q = new Question(this.fields, this.question, this.supportWildcards, this.supportOperators, this.defaultOperatorOR); if (this.analyzer == null && this.catalog == null) q.compute(); else q.compute(this.analyzer, this.catalog); return q; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy