
com.adobe.cq.searchcollections.lucene.LuceneSearchCollection Maven / Gradle / Ivy
/*************************************************************************
*
* ADOBE CONFIDENTIAL
* __________________
*
* Copyright 2012 Adobe Systems Incorporated
* All Rights Reserved.
*
* NOTICE: All information contained herein is, and remains
* the property of Adobe Systems Incorporated and its suppliers,
* if any. The intellectual and technical concepts contained
* herein are proprietary to Adobe Systems Incorporated and its
* suppliers and are protected by trade secret or copyright law.
* Dissemination of this information or reproduction of this material
* is strictly forbidden unless prior written permission is obtained
* from Adobe Systems Incorporated.
**************************************************************************/
package com.adobe.cq.searchcollections.lucene;
import static com.adobe.cq.searchcollections.lucene.IndexerUtil.escapeQuery;
import static javax.jcr.query.Query.JCR_JQOM;
import static javax.jcr.query.Query.JCR_SQL2;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import javax.jcr.Node;
import javax.jcr.PathNotFoundException;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
import javax.jcr.query.Query;
import javax.jcr.query.qom.QueryObjectModelFactory;
import org.apache.jackrabbit.commons.query.sql2.SQL2QOMBuilder;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.index.CategoryDocumentBuilder;
import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.FacetsCollector;
import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.similar.MoreLikeThis;
import org.apache.lucene.store.SingleInstanceLockFactory;
import org.apache.lucene.util.Version;
import org.apache.sling.api.SlingConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.adobe.cq.searchcollections.api.SearchCollection;
/**
* @deprecated
*/
public class LuceneSearchCollection implements SearchCollection {
private static final Logger log = LoggerFactory
.getLogger(LuceneSearchCollection.class);
private final Session session;
private final String path;
private final NodeIndexer indexer;
private final JCRDirectory directory;
private JCRDirectory taxoDirectory;
private final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
private final Analyzer keywordAnalyzer = new KeywordAnalyzer();
private final LuceneSearchCollectionConfig indexConfig;
private volatile boolean running = true;
private final Collection queue = new LinkedHashSet();
private Thread wThread = null;
private Object execSync = new Object();
private Date exec = null;
//To prevent synchronization issues, use this global lock to only do one
//update at a time. We should really be using JCR locks, but that doesn't
//seem to be working even though mix:lockable is used.
private static Object writeSync = new Object();
class WorkerThread implements Runnable {
private Date lastExec = null;
public void run() {
while (running) {
if (exec == null || exec.equals(lastExec)) {
synchronized(execSync) {
if (exec == null || exec.equals(lastExec)) {
try {
//Wait until another request for work is received.
execSync.wait();
} catch (InterruptedException e) {
log.error("Error executing execSync.wait().", e);
continue;
}
}
}
}
if (! running) {
return;
}
try {
//We have new request for work since exec is not same as
//lastExec.
synchronized(execSync) {
lastExec = exec;
}
synchronized(writeSync) {
doUpdate(getPendingPaths());
}
} catch (Throwable t) {
log.error("Error executing doUpdate.", t);
}
}
}
}
public LuceneSearchCollection(Node directory, Node taxoDirectory, NodeIndexer indexer, boolean useJCRLocks) throws RepositoryException {
this.session = directory.getSession();
this.path = directory.getPath();
this.indexer = indexer;
this.directory = new JCRDirectory(directory);
this.taxoDirectory = null;
if (null != taxoDirectory) {
this.taxoDirectory = new JCRDirectory(taxoDirectory);
}
try {
if (useJCRLocks) {
// path HAS to be mix:lockable if you want to use the
// JCRLockFactory
this.directory.setLockFactory(new JCRLockFactory(this.session,
directory.getPath()));
if (null != taxoDirectory) {
this.taxoDirectory.setLockFactory(new JCRLockFactory(this.session,
directory.getPath()));
}
} else {
this.directory.setLockFactory(new SingleInstanceLockFactory());
if (null != taxoDirectory) {
this.taxoDirectory.setLockFactory(new SingleInstanceLockFactory());
}
}
} catch (Exception e) {
//TODO
}
indexConfig = new LuceneSearchCollectionConfig(directory);
wThread = new Thread(new WorkerThread());
wThread.start();
}
public LuceneSearchCollection(Node directory) throws RepositoryException {
this(directory, null, new DefaultNodeIndexer(), false);
}
public LuceneSearchCollection(Node directory, NodeIndexer indexer)
throws RepositoryException {
this(directory, null, indexer, false);
}
public LuceneSearchCollection(Node directory,Node taxoDirectory, NodeIndexer indexer)
throws RepositoryException {
this(directory, taxoDirectory, indexer, false);
}
public Map findFacets(String fieldName, String resourceTypeFilter, String componentFilter, int count) throws RepositoryException{
try {
Map result = null;
if (null != taxoDirectory) {
IndexReader index = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(index);
TaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDirectory);
BooleanQuery booleanQuery = new BooleanQuery();
if (resourceTypeFilter != null || componentFilter != null) {
if (resourceTypeFilter != null){
org.apache.lucene.search.Query resourceTypeQuery = new QueryParser(
Version.LUCENE_36, SlingConstants.PROPERTY_RESOURCE_TYPE, analyzer).parse(
resourceTypeFilter);
booleanQuery.add(resourceTypeQuery, Occur.MUST);
}
if (componentFilter != null){
//String pathField = ":path".intern();
String escapedComponentFilter = QueryParser.escape(componentFilter);
//Making it Prefixed query
//escapedComponentFilter = escapedComponentFilter + "*";
org.apache.lucene.search.Query componentQuery = new QueryParser(
Version.LUCENE_36, "component", keywordAnalyzer).parse(
escapedComponentFilter);
booleanQuery.add(componentQuery, Occur.MUST);
}
}
FacetIndexingParams indexingParams = new DefaultFacetIndexingParams();
FacetSearchParams facetSearchParams = new FacetSearchParams(indexingParams);
facetSearchParams.addFacetRequest(new CountFacetRequest(
new CategoryPath(fieldName), count));
TopScoreDocCollector topDocsCollector = TopScoreDocCollector.create(10, true);
FacetsCollector facetsCollector = new FacetsCollector(facetSearchParams, index, taxo);
searcher.search(booleanQuery, MultiCollector.wrap(topDocsCollector,facetsCollector));
List res = facetsCollector.getFacetResults();
result = new HashMap();
for (FacetResult fRes : res) {
for (FacetResultNode n : fRes.getFacetResultNode().getSubResults()) {
result.put(n.getLabel().lastComponent(), (int)n.getValue());
}
}
}
return result;
} catch (FileNotFoundException e) {
log.warn("Empty search collection.", e);
return null;
} catch (IOException e) {
throw new RepositoryException(e);
} catch (ParseException e) {
// TODO Auto-generated catch block
throw new RepositoryException(e);
}
}
public List findMLT(String target, String resourceTypeFilter,
String componentFilter, String[] mltFields, int maxResults,
int minTermFreq, int minDocFreq)
throws RepositoryException {
try {
IndexReader index = IndexReader.open(directory);
IndexSearcher searcher = new IndexSearcher(directory, true);
MoreLikeThis mlt = new MoreLikeThis(index);
mlt.setFieldNames(mltFields);
mlt.setMinTermFreq(minTermFreq);
mlt.setMinDocFreq(minDocFreq);
org.apache.lucene.search.Query query = mlt.like(new StringReader(target));
QueryWrapperFilter additionalFilters = null;
if (resourceTypeFilter != null || componentFilter != null) {
BooleanQuery booleanQuery = new BooleanQuery();
if (resourceTypeFilter != null){
org.apache.lucene.search.Query resourceTypeQuery = new QueryParser(
Version.LUCENE_36, SlingConstants.PROPERTY_RESOURCE_TYPE, analyzer).parse(
resourceTypeFilter);
booleanQuery.add(resourceTypeQuery, Occur.MUST);
}
if (componentFilter != null){
String escapedComponentFilter = escapeQuery(componentFilter);
org.apache.lucene.search.Query componentQuery = new QueryParser(
Version.LUCENE_36, "component", analyzer).parse(
escapedComponentFilter);
booleanQuery.add(componentQuery, Occur.MUST);
}
additionalFilters = new QueryWrapperFilter(booleanQuery);
}
TopDocs topDocs = additionalFilters == null ?
searcher.search(query, maxResults):
searcher.search(query, additionalFilters, maxResults);
String pathField = ":path".intern();
List results = new LinkedList();
for ( ScoreDoc scoreDoc : topDocs.scoreDocs ) {
Document doc = searcher.doc( scoreDoc.doc );
String path = doc.get(pathField);
results.add(path);
}
return results;
} catch (FileNotFoundException e) {
log.warn("Empty search collection.", e);
return null;
} catch (IOException e) {
throw new RepositoryException(e);
} catch (ParseException e) {
throw new RepositoryException(e);
}
}
public Query createQuery(Session session, String statement, String language)
throws RepositoryException {
if (!JCR_SQL2.equals(language) && !JCR_JQOM.equals(language)) {
throw new RepositoryException("Query language not supported: "
+ language);
}
QueryObjectModelFactory factory = new QueryObjectModelFactoryImpl(
directory, analyzer, session);
SQL2QOMBuilder b = new SQL2QOMBuilder();
return b.createQueryObjectModel(statement, factory,
session.getValueFactory());
}
public synchronized void update(Set paths) {
int skip = 0;
for (String p : paths) {
if (p.startsWith(path) || !indexConfig.shouldInclude(p)) {
skip++;
continue;
}
queue.add(p);
}
int index = queue.size();
log.debug(
"Lucene SearchCollection pinged, will index {}, will skip {}.",
index, skip);
if (index > 0) {
run();
}
}
public synchronized void run() {
if (!running) {
return;
}
synchronized(execSync) {
//Notify worker thread that there is new request for work.
exec = new Date();
execSync.notify();
}
}
public synchronized void stop() {
running = false;
//Wait at most 10 seconds for worker thread to shut down.
for (int i = 0; i < 10; i++) {
synchronized(execSync) {
//Worker thread should wake up and then stop now that "running" is
//set to false. Set new exec date to prevent thread from sleeping
//if that's what its about to do.
exec = new Date();
execSync.notify();
}
if (! wThread.isAlive()) {
break;
}
try {
Thread.sleep(1000);
} catch (Throwable t) {
log.error("Error waiting for working thread to shut down.", t);
}
}
// cleanup
session.logout();
}
private void doUpdate(Collection paths) {
if (paths.isEmpty()) {
return;
}
long time = System.currentTimeMillis();
try {
IndexWriter writer = new IndexWriter(directory, analyzer,
MaxFieldLength.LIMITED);
writer.setRAMBufferSizeMB(48);
TaxonomyWriter taxo = null;
CategoryDocumentBuilder categoryDocBuilder = null;
if (null != taxoDirectory) {
taxo = new DirectoryTaxonomyWriter(taxoDirectory, OpenMode.CREATE_OR_APPEND);
categoryDocBuilder = new CategoryDocumentBuilder(taxo);
}
try {
for (String path : paths) {
writer.deleteDocuments(indexer.getSubtreeQuery(path));
Node node = null;
try {
node = session.getNode(path);
} catch (PathNotFoundException e) {
// delete / move event, just ignore
} catch (RepositoryException e) {
log.warn(
"Lucene SearchCollection problem, ignoring update: "
+ path, e);
}
try {
if (node != null) {
Document doc = indexer.createDocument(node, categoryDocBuilder);
if (null != taxo) {
taxo.commit();
}
if (doc != null) {
writer.addDocument(doc);
}
}
} catch (RepositoryException e) {
log.warn(
"Lucene SearchCollection problem, ignoring update: "
+ path, e);
}
}
} finally {
writer.close();
if (null != taxo) {
taxo.close();
}
log.debug("Lucene SearchCollection done indexing, took {} ms.",
System.currentTimeMillis() - time);
}
} catch (IOException e) {
log.warn("Lucene SearchCollection problem, ignoring updates: "
+ paths, e);
}
}
private synchronized Collection getPendingPaths() {
// while (running && queue.isEmpty()) {
// wait();
// }
ArrayList paths = new ArrayList(queue);
queue.clear();
return paths;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy