org.eclipse.rdf4j.sail.lucene.QuerySpecBuilder Maven / Gradle / Ivy
Show all versions of rdf4j-sail-lucene-api Show documentation
/*******************************************************************************
* Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
*
* All rights reserved. This program and the accompanying materials
* are made available under the terms of the Eclipse Distribution License v1.0
* which accompanies this distribution, and is available at
* http://www.eclipse.org/org/documents/edl-v10.php.
*
* SPDX-License-Identifier: BSD-3-Clause
*******************************************************************************/
package org.eclipse.rdf4j.sail.lucene;
import static org.eclipse.rdf4j.model.vocabulary.RDF.TYPE;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.BOOST;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.INDEXID;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.LUCENE_QUERY;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.MATCHES;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.NUM_DOCS;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.PROPERTY;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.QUERY;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.SCORE;
import static org.eclipse.rdf4j.sail.lucene.LuceneSailSchema.SNIPPET;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Resource;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.util.Values;
import org.eclipse.rdf4j.query.BindingSet;
import org.eclipse.rdf4j.query.algebra.Join;
import org.eclipse.rdf4j.query.algebra.StatementPattern;
import org.eclipse.rdf4j.query.algebra.TupleExpr;
import org.eclipse.rdf4j.query.algebra.TupleFunctionCall;
import org.eclipse.rdf4j.query.algebra.ValueConstant;
import org.eclipse.rdf4j.query.algebra.Var;
import org.eclipse.rdf4j.query.algebra.helpers.AbstractQueryModelVisitor;
import org.eclipse.rdf4j.sail.SailException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A QueryInterpreter creates a set of QuerySpecs based on Lucene-related StatementPatterns that it finds in a
* TupleExpr.
*
* QuerySpecs will only be created when the set of StatementPatterns is complete (i.e. contains at least a matches and a
* query statement connected properly) and correct (query pattern has a literal object, matches a resource subject,
* etc.).
*/
public class QuerySpecBuilder implements SearchQueryInterpreter {
private final static Logger logger = LoggerFactory.getLogger(QuerySpecBuilder.class);
private final boolean incompleteQueryFails;
private final IRI indexId;
/**
* Initialize a new QuerySpecBuilder
*
* @param incompleteQueryFails see {@link LuceneSail#isIncompleteQueryFails()}
*/
public QuerySpecBuilder(boolean incompleteQueryFails) {
this(incompleteQueryFails, null);
}
/**
* Initialize a new QuerySpecBuilder
*
* @param incompleteQueryFails see {@link LuceneSail#isIncompleteQueryFails()}
* @param indexId the id of the index, null to do not filter by index id, see
* {@link LuceneSail#INDEX_ID}
*/
public QuerySpecBuilder(boolean incompleteQueryFails, IRI indexId) {
this.incompleteQueryFails = incompleteQueryFails;
this.indexId = indexId;
}
/**
* Returns a set of QuerySpecs embodying all necessary information to perform the Lucene query embedded in a
* TupleExpr. To be removed, prefer {@link #process(TupleExpr, BindingSet, Collection)}.
*/
@SuppressWarnings("unchecked")
@Deprecated
public Set process(TupleExpr tupleExpr, BindingSet bindings) throws SailException {
HashSet result = new HashSet<>();
process(tupleExpr, bindings, (Collection) (Collection>) result);
return result;
}
/**
* Appends a set of QuerySpecs embodying all necessary information to perform the Lucene query embedded in a
* TupleExpr.
*/
@Override
public void process(TupleExpr tupleExpr, BindingSet bindings, Collection result)
throws SailException {
// find Lucene-related StatementPatterns
PatternFilter filter = new PatternFilter();
tupleExpr.visit(filter);
// loop over all matches statements
for (StatementPattern matchesPattern : filter.matchesPatterns) {
// the subject of the matches statements should be a variable or a
// Resource
Var subjectVar = matchesPattern.getSubjectVar();
Value subjectValue = subjectVar.hasValue() ? subjectVar.getValue()
: bindings.getValue(subjectVar.getName());
if (subjectValue != null && !(subjectValue instanceof Resource)) {
failOrWarn(MATCHES + " properties should have Resource subjects: " + subjectVar.getValue());
continue;
}
Resource subject = (Resource) subjectValue;
// the matches var should have no value
Var matchesVar = matchesPattern.getObjectVar();
if (matchesVar.hasValue()) {
failOrWarn(MATCHES + " properties should have variable objects: " + matchesVar.getValue());
continue;
}
// do we need to filter by id?
StatementPattern idPattern;
if (indexId != null) {
try {
idPattern = getPattern(matchesVar, filter.idPatterns);
} catch (IllegalArgumentException e) {
failOrWarn(e);
continue;
}
if (idPattern == null) {
continue;
}
Var indexIdVar = idPattern.getObjectVar();
Value indexIdValue = indexIdVar.hasValue() ? indexIdVar.getValue()
: bindings.getValue(indexIdVar.getName());
if (!(indexIdValue instanceof IRI && indexIdVar.getValue().equals(indexId))) {
continue; // this match isn't for this index, continue for the next one
}
} else {
idPattern = null;
}
// find the relevant outgoing patterns
StatementPattern typePattern, propertyPattern, scorePattern, snippetPattern, numDocsPattern;
List queryPatterns;
try {
typePattern = getPattern(matchesVar, filter.typePatterns);
queryPatterns = getQueryVar(matchesVar, filter.queryPatterns);
propertyPattern = getPattern(matchesVar, filter.propertyPatterns);
scorePattern = getPattern(matchesVar, filter.scorePatterns);
snippetPattern = getPattern(matchesVar, filter.snippetPatterns);
numDocsPattern = getPattern(matchesVar, filter.numDocsPatterns);
} catch (IllegalArgumentException e) {
failOrWarn(e);
continue;
}
// fetch the query String
String queryString = null;
List queries = new ArrayList<>();
StatementPattern litQueryPattern = null;
boolean multiFieldQuery = false;
if (!queryPatterns.isEmpty()) {
Var queryVar = queryPatterns.get(0).getObjectVar();
Value firstQueryValue = queryVar.hasValue() ? queryVar.getValue()
: bindings.getValue(queryVar.getName());
multiFieldQuery = firstQueryValue == null || !firstQueryValue.isLiteral();
if (multiFieldQuery) {
// multiple queries
for (StatementPattern queryPattern : queryPatterns) {
Var queryPatternVar = queryPattern.getObjectVar();
StatementPattern fieldQueryQueryPattern = getPattern(queryPatternVar, filter.queryPatterns);
StatementPattern fieldQueryBoostPattern = getPattern(queryPatternVar, filter.boostPatterns);
StatementPattern fieldQueryPropertyPattern = getPattern(queryPatternVar,
filter.propertyPatterns);
StatementPattern fieldQuerySnippetPattern = getPattern(queryPatternVar, filter.snippetPatterns);
StatementPattern fieldTypePattern = getPattern(queryPatternVar, filter.typePatterns);
String query = null;
IRI property = null;
Float boost = null;
Var snippetVar = fieldQuerySnippetPattern == null ? null
: fieldQuerySnippetPattern.getObjectVar();
if (fieldQueryQueryPattern != null) {
Var fieldQueryQueryVar = fieldQueryQueryPattern.getObjectVar();
Value queryValue = fieldQueryQueryVar.hasValue() ? fieldQueryQueryVar.getValue()
: bindings.getValue(fieldQueryQueryVar.getName());
if (queryValue instanceof Literal) {
query = ((Literal) queryValue).getLabel();
}
}
if (fieldQueryBoostPattern != null) {
Var fieldQueryBoostVar = fieldQueryBoostPattern.getObjectVar();
Value boostValue = fieldQueryBoostVar.hasValue() ? fieldQueryBoostVar.getValue()
: bindings.getValue(fieldQueryBoostVar.getName());
if (boostValue instanceof Literal) {
boost = ((Literal) boostValue).floatValue();
}
}
if (fieldQueryPropertyPattern != null) {
Var propertyVar = fieldQueryPropertyPattern.getObjectVar();
Value propertyValue = propertyVar.hasValue() ? propertyVar.getValue()
: bindings.getValue(propertyVar.getName());
// if property is a restriction, it should be an URI
if (propertyValue instanceof IRI) {
property = (IRI) propertyValue;
}
// otherwise, it should be a variable
else if (propertyValue != null) {
failOrWarn(PROPERTY + " should have a property URI or a variable as object: "
+ propertyVar.getValue());
continue;
}
}
// check the snippet variable, if any
if (snippetVar != null && snippetVar.hasValue()) {
failOrWarn(SNIPPET + " should have a variable as object: " + snippetVar.getValue());
continue;
}
// check type pattern
if (fieldTypePattern == null) {
logger.debug("Query variable '{}' has not rdf:type, assuming {}", fieldTypePattern,
LUCENE_QUERY);
}
queries.add(new QuerySpec.QueryParam(queryPattern, fieldQueryQueryPattern,
fieldQueryPropertyPattern, fieldQuerySnippetPattern, fieldQueryBoostPattern,
fieldTypePattern, query, property, boost));
}
} else {
// using literal query
queryString = ((Literal) firstQueryValue).getLabel();
litQueryPattern = queryPatterns.get(0);
}
}
// check property restriction or variable
IRI propertyURI = null;
if (propertyPattern != null) {
if (multiFieldQuery) {
failOrWarn(PROPERTY + " can't be used with " + MATCHES + " for non literal query");
continue;
}
Var propertyVar = propertyPattern.getObjectVar();
Value propertyValue = propertyVar.hasValue() ? propertyVar.getValue()
: bindings.getValue(propertyVar.getName());
// if property is a restriction, it should be an URI
if (propertyValue instanceof IRI) {
propertyURI = (IRI) propertyValue;
}
// otherwise, it should be a variable
else if (propertyValue != null) {
failOrWarn(PROPERTY + " should have a property URI or a variable as object: "
+ propertyVar.getValue());
continue;
}
}
// check the score variable, if any
Var scoreVar = scorePattern == null ? null : scorePattern.getObjectVar();
if (scoreVar != null && scoreVar.hasValue()) {
failOrWarn(SCORE + " should have a variable as object: " + scoreVar.getValue());
continue;
}
// check the snippet variable, if any
Var snippetVar = snippetPattern == null ? null : snippetPattern.getObjectVar();
if (snippetVar != null && snippetVar.hasValue()) {
failOrWarn(SNIPPET + " should have a variable as object: " + snippetVar.getValue());
continue;
}
// check type pattern
if (typePattern == null) {
logger.debug("Query variable '{}' has not rdf:type, assuming {}", subject, LUCENE_QUERY);
}
if (!multiFieldQuery) {
queries.add(new QuerySpec.QueryParam(litQueryPattern, propertyPattern, snippetPattern, null,
queryString, propertyURI, null));
}
QuerySpec querySpec = new QuerySpec(matchesPattern, queries, scorePattern, typePattern, idPattern,
numDocsPattern, subject);
if (querySpec.isEvaluable()) {
// constant optimizer
result.add(querySpec);
} else {
// evaluate later
TupleFunctionCall funcCall = new TupleFunctionCall();
funcCall.setURI(LuceneSailSchema.SEARCH.toString());
if (multiFieldQuery) {
funcCall.addArg(new ValueConstant(QUERY));
funcCall.addArg(new ValueConstant(Values.literal(queryPatterns.size())));
queryPatterns.stream().map(StatementPattern::getObjectVar).forEach(funcCall::addArg);
} else {
funcCall.addArg(queryPatterns.get(0).getObjectVar());
}
if (subject != null) {
funcCall.addArg(matchesPattern.getSubjectVar());
} else {
funcCall.addArg(new ValueConstant(LuceneSailSchema.ALL_MATCHES));
funcCall.addResultVar(matchesPattern.getSubjectVar());
}
if (propertyPattern != null) {
funcCall.addArg(new ValueConstant(LuceneSailSchema.PROPERTY));
if (propertyURI != null) {
funcCall.addArg(propertyPattern.getObjectVar());
} else {
funcCall.addArg(new ValueConstant(LuceneSailSchema.ALL_PROPERTIES));
funcCall.addResultVar(propertyPattern.getObjectVar());
}
}
if (scoreVar != null) {
funcCall.addArg(new ValueConstant(LuceneSailSchema.SCORE));
funcCall.addResultVar(scoreVar);
}
if (snippetVar != null) {
funcCall.addArg(new ValueConstant(LuceneSailSchema.SNIPPET));
funcCall.addResultVar(snippetVar);
}
if (numDocsPattern != null) {
funcCall.addArg(new ValueConstant(LuceneSailSchema.NUM_DOCS));
funcCall.addArg(numDocsPattern.getObjectVar());
}
Join join = new Join();
matchesPattern.replaceWith(join);
join.setLeftArg(matchesPattern);
join.setRightArg(funcCall);
querySpec.removeQueryPatterns();
}
}
// fail on superflous typePattern, query, score, or snippet patterns.
}
private void failOrWarn(Exception exception) throws SailException {
if (incompleteQueryFails) {
throw exception instanceof SailException ? (SailException) exception : new SailException(exception);
} else {
logger.warn(exception.getMessage(), exception);
}
}
private void failOrWarn(String message) throws SailException {
if (incompleteQueryFails) {
throw new SailException("Invalid Text Query: " + message);
} else {
logger.warn(message);
}
}
/**
* Returns the StatementPattern, if any, from the specified Collection that has the specified subject var. If
* multiple StatementPatterns exist with this subject var, an IllegalArgumentException is thrown. It also removes
* the patter from the arraylist, to be able to check if some patterns are added without a MATCHES property.
*/
private StatementPattern getPattern(Var subjectVar, ArrayList patterns)
throws IllegalArgumentException {
StatementPattern result = null;
for (StatementPattern pattern : patterns) {
if (pattern.getSubjectVar().equals(subjectVar)) {
if (result == null) {
result = pattern;
} else {
throw new IllegalArgumentException(
"multiple StatementPatterns with the same subject: " + result + ", " + pattern);
}
}
}
// remove the result from the list, to filter out superflous patterns
if (result != null) {
patterns.remove(result);
}
return result;
}
/**
* Return all the var of the patterns with the subject subjectVar, if a pattern is a literal, it will return a
* singleton list, otherwise it will return an empty list or a list without any literal var
*/
private List getQueryVar(Var subjectVar, ArrayList patterns)
throws IllegalArgumentException {
StatementPattern litResult = null;
List objectResult = null;
for (StatementPattern pattern : patterns) {
// ignore other subject
if (!pattern.getSubjectVar().equals(subjectVar)) {
continue;
}
Var queryPatternVar = pattern.getObjectVar();
if (queryPatternVar.hasValue() && queryPatternVar.getValue().isLiteral()) {
if (objectResult != null) {
throw new IllegalArgumentException("query can't be done over both literal and resource!");
}
if (litResult != null) {
throw new IllegalArgumentException(
"multiple StatementPatterns with the same subject: " + litResult + ", " + pattern);
} else {
litResult = pattern;
}
} else {
if (litResult != null) {
throw new IllegalArgumentException("query can't be done over both literal and resource!");
}
if (objectResult == null) {
objectResult = new ArrayList<>();
}
objectResult.add(pattern);
}
}
// remove the result from the list, to filter out superflous patterns
// we have one literal
if (litResult != null) {
patterns.remove(litResult);
return List.of(litResult);
}
// we have resources
if (objectResult != null) {
patterns.removeAll(objectResult);
return objectResult;
}
// no query
return List.of();
}
private static class PatternFilter extends AbstractQueryModelVisitor {
public ArrayList typePatterns = new ArrayList<>();
public ArrayList matchesPatterns = new ArrayList<>();
public ArrayList queryPatterns = new ArrayList<>();
public ArrayList propertyPatterns = new ArrayList<>();
public ArrayList scorePatterns = new ArrayList<>();
public ArrayList snippetPatterns = new ArrayList<>();
public ArrayList idPatterns = new ArrayList<>();
public ArrayList boostPatterns = new ArrayList<>();
public ArrayList numDocsPatterns = new ArrayList<>();
/**
* Method implementing the visitor pattern that gathers all statements using a predicate from the LuceneSail's
* namespace.
*/
@Override
public void meet(StatementPattern node) {
Value predicate = node.getPredicateVar().getValue();
if (MATCHES.equals(predicate)) {
matchesPatterns.add(node);
} else if (QUERY.equals(predicate)) {
queryPatterns.add(node);
} else if (PROPERTY.equals(predicate)) {
propertyPatterns.add(node);
} else if (SCORE.equals(predicate)) {
scorePatterns.add(node);
} else if (SNIPPET.equals(predicate)) {
snippetPatterns.add(node);
} else if (INDEXID.equals(predicate)) {
idPatterns.add(node);
} else if (BOOST.equals(predicate)) {
boostPatterns.add(node);
} else if (NUM_DOCS.equals(predicate)) {
numDocsPatterns.add(node);
} else if (TYPE.equals(predicate)) {
Value object = node.getObjectVar().getValue();
if (LUCENE_QUERY.equals(object)) {
typePatterns.add(node);
}
}
}
}
}