org.modeshape.jcr.query.lucene.LuceneQueryEngine Maven / Gradle / Ivy
/*
* ModeShape (http://www.modeshape.org)
* See the COPYRIGHT.txt file distributed with this work for information
* regarding copyright ownership. Some portions may be licensed
* to Red Hat, Inc. under one or more contributor license agreements.
* See the AUTHORS.txt file in the distribution for a full listing of
* individual contributors.
*
* ModeShape is free software. Unless otherwise indicated, all code in ModeShape
* is licensed to you under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation; either version 2.1 of
* the License, or (at your option) any later version.
*
* ModeShape is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this software; if not, write to the Free
* Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA
* 02110-1301 USA, or see the FSF site: http://www.fsf.org.
*/
package org.modeshape.jcr.query.lucene;
import java.io.IOException;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import javax.jcr.RepositoryException;
import javax.jcr.query.InvalidQueryException;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
import org.hibernate.search.SearchFactory;
import org.hibernate.search.engine.spi.SearchFactoryImplementor;
import org.hibernate.search.indexes.impl.IndexManagerHolder;
import org.modeshape.common.logging.Logger;
import org.modeshape.jcr.ExecutionContext;
import org.modeshape.jcr.api.query.QueryCancelledException;
import org.modeshape.jcr.cache.NodeCache;
import org.modeshape.jcr.cache.RepositoryCache;
import org.modeshape.jcr.query.CancellableQuery;
import org.modeshape.jcr.query.QueryContext;
import org.modeshape.jcr.query.QueryIndexing;
import org.modeshape.jcr.query.QueryResults;
import org.modeshape.jcr.query.QueryResults.Columns;
import org.modeshape.jcr.query.lucene.basic.BasicLuceneSchema;
import org.modeshape.jcr.query.lucene.basic.ExistsTupleCollector;
import org.modeshape.jcr.query.model.Constraint;
import org.modeshape.jcr.query.model.FullTextSearchScore;
import org.modeshape.jcr.query.model.QueryCommand;
import org.modeshape.jcr.query.optimize.Optimizer;
import org.modeshape.jcr.query.plan.PlanHints;
import org.modeshape.jcr.query.plan.PlanNode;
import org.modeshape.jcr.query.plan.Planner;
import org.modeshape.jcr.query.process.AbstractAccessComponent;
import org.modeshape.jcr.query.process.ProcessingComponent;
import org.modeshape.jcr.query.process.QueryEngine;
import org.modeshape.jcr.query.process.QueryProcessor;
import org.modeshape.jcr.query.process.SelectComponent;
import org.modeshape.jcr.query.validate.Schemata;
/**
* A {@link QueryEngine} that uses Lucene for answering queries. Each repository uses a single {@link LuceneQueryEngine} instance,
* and all sessions share the same engine. Therefore, this engine is threadsafe (and actually immutable), creating objects for
* each {@link #execute(QueryContext, QueryCommand) submitted query}.
*/
public class LuceneQueryEngine extends QueryEngine {
protected static final Logger LOGGER = Logger.getLogger(LuceneQueryEngine.class);
private final ExecutionContext repositoryContext;
private final BasicLuceneSchema schema;
// private final String repositoryName;
/**
* @param context the execution context for the repository
* @param repositoryName the name of the repository
* @param planner the planner that should be used
* @param optimizer the optimizer that should be used
* @param searchFactory the search factory for accessing the indexes
* @param version the Lucene version used by the indexes
* @param enableFullTextSearch true if full-text searching is enabled, or false otherwise
*/
public LuceneQueryEngine( ExecutionContext context,
String repositoryName,
Planner planner,
Optimizer optimizer,
SearchFactoryImplementor searchFactory,
Version version,
boolean enableFullTextSearch ) {
super(planner, optimizer, new LuceneQueryProcessor(repositoryName, searchFactory));
this.repositoryContext = context;
// this.repositoryName = repositoryName;
this.schema = new BasicLuceneSchema(this.repositoryContext, searchFactory, version, enableFullTextSearch);
((LuceneQueryProcessor)this.processor).initialize(schema);
}
/**
* Shuts down the query engine
*/
public void shutdown() {
this.schema.shutdown();
}
/**
* Returns the global index manager.
*
* @return a {@code IndexManagerHolder} instance.
*/
public IndexManagerHolder getAllIndexesManager() {
return this.schema.getAllIndexesManager();
}
/**
* Get the interface for updating the indexes.
*
* @return the indexing interface
*/
public QueryIndexing getQueryIndexing() {
return this.schema;
}
/**
* Execute the supplied query against the named workspace, using the supplied hints, schemata and variables.
*
* @param context the context in which the query is being executed; may not be null
* @param repositoryCache the repository cache that should be used to load results; may not be null
* @param workspaceNames the name of each workspace to be queried, or an empty set if all the workspaces should be queried;
* may not be null
* @param overriddenNodeCachesByWorkspaceName the NodeCache instances that should be used to load results, which will be used
* instead of the RepositoryCache's NodeCache for a given workspace name; may be null or empty
* @param query the query
* @param schemata the schemata information that should be used for all processing of this query
* @param hints the hints
* @param variables the variables
* @return the results
* @throws InvalidQueryException if the
*/
public CancellableQuery query( ExecutionContext context,
RepositoryCache repositoryCache,
Set workspaceNames,
Map overriddenNodeCachesByWorkspaceName,
final QueryCommand query,
Schemata schemata,
PlanHints hints,
Map variables ) throws InvalidQueryException {
final QueryContext queryContext = new QueryContext(context, repositoryCache, workspaceNames,
overriddenNodeCachesByWorkspaceName, schemata, hints, null, variables);
return new CancellableQuery() {
private final Lock lock = new ReentrantLock();
private QueryResults results;
@Override
public QueryResults getResults() throws QueryCancelledException, RepositoryException {
try {
lock.lock();
if (results == null) {
results = execute(queryContext, query); // this will block and will hold the lock until it is done ...
}
return results;
} finally {
lock.unlock();
}
}
@Override
public boolean cancel() {
return queryContext.cancel();
}
};
}
/**
* The {@link QueryProcessor} implementation used by this {@link LuceneQueryEngine}. It varies from the abstract class only by
* creating a {@link LuceneAccessQuery} for each access query, and which is resposible for submitting the access query to
* Lucene.
*/
protected static class LuceneQueryProcessor extends QueryProcessor {
private final SearchFactory searchFactory;
private final String repositoryName;
private LuceneSchema schema;
protected LuceneQueryProcessor( String repositoryName,
SearchFactory searchFactory ) {
this.searchFactory = searchFactory;
this.repositoryName = repositoryName;
}
protected void initialize( LuceneSchema schema ) {
this.schema = schema;
assert this.schema != null;
}
@Override
protected boolean supportsPushDownExistConstraints() {
return true;
}
@Override
protected LuceneProcessingContext createProcessingContext( QueryContext queryContext ) {
return new LuceneProcessingContext(queryContext, repositoryName, searchFactory, schema);
}
@Override
protected void closeProcessingContext( LuceneProcessingContext processingContext ) {
// This method will _ALWAYS_ be called by the superclass if createProcessingContext() is called
assert processingContext != null;
// Close the index readers ...
processingContext.close();
}
@Override
protected ProcessingComponent createAccessComponent( QueryCommand originalQuery,
QueryContext context,
PlanNode accessNode,
Columns resultColumns,
LuceneProcessingContext processingContext ) {
assert this.schema != null;
return new LuceneAccessQuery(schema, processingContext, context, resultColumns, accessNode);
}
}
/**
* The component that is created to represent a single access query and, when executed, transforms that access query into a
* single Lucene query and issues it against Lucene.
*/
protected static class LuceneAccessQuery extends AbstractAccessComponent {
private final LuceneProcessingContext processingContext;
private final LuceneSchema schema;
public LuceneAccessQuery( LuceneSchema schema,
LuceneProcessingContext processingContext,
QueryContext context,
Columns resultColumns,
PlanNode accessNode ) {
super(context, resultColumns, accessNode);
this.schema = schema;
this.processingContext = processingContext;
}
/**
* {@inheritDoc}
*
* Some kinds of constraints are not easily pushed down to Lucene as are of a Lucene Query, and instead are applied by
* filtering the results. For example, a {@link FullTextSearchScore} applies to the score of the tuple, which cannot be
* (easily?) applied as a {@link Query Lucene Query}.
*
*
* Therefore, each of the AND-ed constraints of the query are evaluated separately. After all, each of the tuples returned
* by the planned access query must satisfy all of the AND-ed constraints. Or, to put it another way, if a tuple does not
* satisfy one of the AND-ed constraints, the tuple should not be included in the query results.
*
*
* Logically, any AND-ed criteria that cannot be pushed down to Lucene can of course be applied as a filter on the
* results. Thus, each AND-ed constraint is processed to first determine if it can be represented as a Lucene query; all
* other AND-ed constraints must be handled as a results filter. Since most queries will likely use one or more simple
* constraints AND-ed together, this approach will likely work very well.
*
*
* The only hairy case is when any AND-ed constraint is actually an OR-ed combination of multiple constraints of which at
* least one cannot be pushed down to Lucene. In this case, the entire AND-ed constraint must be treated as a results
* filter (even if many of those constraints that make up the OR-ed constraint can be pushed down). Hopefully, this will
* not be a common case in actual queries.
*
*
* @see AbstractAccessComponent#execute()
*/
@Override
public List