All Downloads are FREE. Search and download functionalities are using the official Maven repository.

lux.query.parser.LuxQueryParser Maven / Gradle / Ivy

There is a newer version: 1.1.0
Show newest version
package lux.query.parser;

import lux.index.FieldRole;
import lux.index.IndexConfiguration;
import lux.index.field.FieldDefinition;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.ext.ExtendableQueryParser;
import org.apache.lucene.queryparser.ext.Extensions;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.spans.SpanNearQuery;
import org.apache.lucene.search.spans.SpanOrQuery;
import org.apache.lucene.search.spans.SpanQuery;
import org.apache.lucene.search.spans.SpanTermQuery;
import org.apache.lucene.util.Version;

/**
 * A Lucene query parser extension that supports query terms of the form:
 * 
 * 
[node]<[nodeName]:[term]
* *

In which nodeName is either empty, an unqualified element name, a prefixed element name * (ie a QName), or a QName prefixed with "@", indicating an attribute. nodeName is optional: * if it is not present, a full text query of the entire document is indicated. The "node" * prefix is also optional. Concrete examples: *

* *
 *  node<:"Alas, poor Yorick"
 *  node
 * 
 * AND
 * 
 * Boolean queries containing a marker term: lux_within:{slop} or lux_near:{slop} will be
 * replaced by a SpanNearQuery, if the term is required, or a SpanOrQuery, otherwise.  Note
 * that all BooleanQueries nested inside a marked query must also themselves be marked (as a Span).
 */
public class LuxQueryParser extends ExtendableQueryParser {
    
    private final NodeQueryBuilder queryBuilder;
    
    public LuxQueryParser(Version matchVersion, String f, Analyzer a, Extensions ext, NodeQueryBuilder queryBuilder) {
        super(matchVersion, f, a, ext);
        this.queryBuilder = queryBuilder;
    }

    public static LuxQueryParser makeLuxQueryParser(IndexConfiguration config) {
        FieldDefinition elementTextField = config.getField(FieldRole.ELEMENT_TEXT);
        Analyzer elementTextAnalyzer = elementTextField != null ? elementTextField.getAnalyzer() : config.getFieldAnalyzers().getWrappedAnalyzer(null);
        NodeQueryBuilder queryBuilder = new NodeQueryBuilder(elementTextAnalyzer, config.isOption(IndexConfiguration.NAMESPACE_AWARE));
        NodeParser nodeParser = new NodeParser(
                config.getTextFieldName(),
                config.getFieldName(FieldRole.ELEMENT_TEXT),
                config.getFieldName(FieldRole.ATTRIBUTE_TEXT),
                queryBuilder);
        NodeExtensions ext = new NodeExtensions (nodeParser);
        LuxQueryParser parser = new LuxQueryParser(IndexConfiguration.LUCENE_VERSION, 
                config.getFieldName(FieldRole.XML_TEXT), 
                config.getFieldAnalyzers(), 
                ext,
                queryBuilder);
        return parser;
    }
    
    /**
     * declares a namespace binding.
     * @param prefix The namespace prefix to bind.  No attempt is made to ensure that the prefix is syntactically valid.
     * It is not possible to declare a default namespace using this API: the default namespace is always no namespace
     * in this query syntax.  If this method is called with a null or empty prefix, the effect is undefined.
     * @param namespaceURI The namespace "URI" to bind. If empty or null, any existing binding for the prefix is removed.
     */
    public void bindNamespacePrefix (String prefix, String namespaceURI) {
        queryBuilder.bindNamespacePrefix(prefix, namespaceURI);
    }
    
    public void clearNamespaces () {
        queryBuilder.clearNamespaces();
    }
    
    @Override
    public Query parse (String queryString) throws ParseException {
        Query q = super.parse(queryString);
        return maybeConvert (q);
    }

    private Query maybeConvert (Query q) {
        if (! (q instanceof BooleanQuery)) {
            return q;
        }
        BooleanQuery bq = (BooleanQuery) q;
        if (bq.getClauses().length == 0) {
        	return bq;
        }
        Query q1 = bq.getClauses()[0].getQuery();
        if (q1 instanceof TermQuery) {
            Term term = ((TermQuery) q1).getTerm();
            if (term.field().equals("lux_within") || term.field().equals("lux_near")) {
                return toSpanQuery (bq);
            }
        }
        // else (we didn't convert this query, but maybe some nested queries is marked as a span) :)
        convertNestedSpans (bq);
        return bq;
    }

    private void convertNestedSpans (BooleanQuery bq) {
        for (BooleanClause clause : bq.clauses()) {
            Query q = clause.getQuery();
            Query converted = maybeConvert (q);
            if (converted != q) {
                clause.setQuery (converted);
            }
        }
    }

    /**
       Converts BooleanQuery to SpanNearQuery, SpanOrQuery.
       Converts TermQuery to SpanTermQuery.
       It is an error to pass other Queries to this method.
    */
    private SpanQuery toSpanQuery(Query q) {
        if (q instanceof BooleanQuery) {
            BooleanQuery bq = (BooleanQuery) q;
            BooleanClause[] booleanClauses = bq.getClauses();
            if (booleanClauses.length == 0) {
                return new SpanOrQuery();
            }
            Query q1 = booleanClauses[0].getQuery();
            if (q1 instanceof TermQuery) {
                Term term = ((TermQuery) q1).getTerm();
                boolean inOrder;
                int slop;
                int start;
                if (term.field().equals("lux_within")) {
                    inOrder = true;
                    slop = Integer.valueOf(term.text());
                    start = 1;
                }
                else if (term.field().equals("lux_near")) {
                    inOrder = false;
                    slop = Integer.valueOf(term.text());
                    start = 1;
                }
                else {
                    inOrder = true;
                    slop = 0;
                    start = 0;
                }
                SpanQuery [] clauses = convertClauses (booleanClauses, start);
                if (clauses.length == 1) {
                    return clauses[0];
                }
                if (booleanClauses[0].isRequired()) {
                    return new SpanNearQuery(clauses, slop, inOrder);
                }
                return new SpanOrQuery(clauses);
            }
        }
        if (q instanceof TermQuery) {
            return new SpanTermQuery(((TermQuery)q).getTerm());
        }
        throw new IllegalStateException("Can't convert query <" + q + "> of type " + q.getClass().getName() + " to a SpanQuery");
    }

    private SpanQuery[] convertClauses(BooleanClause[] clauses, int start) {
        SpanQuery[] spans = new SpanQuery[clauses.length - start];
        for (int i = start; i < clauses.length; i++) {
            Query subquery = clauses[i].getQuery();
            spans[i - start] = (SpanQuery) toSpanQuery(subquery);
        }
        return spans;
    }

    public final static String escapeQParser (String s) {
        if (s.indexOf(' ') >= 0) {
            // quote phrases
            return '"' + s.replaceAll("\"", "\\\"") + '"';
        }
        return ExtendableQueryParser.escape (s);
    }
    
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy