All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.adobe.cq.searchcollections.lucene.LuceneSearchCollection Maven / Gradle / Ivy

/*************************************************************************
 *
 * ADOBE CONFIDENTIAL
 * __________________
 *
 *  Copyright 2012 Adobe Systems Incorporated
 *  All Rights Reserved.
 *
 * NOTICE:  All information contained herein is, and remains
 * the property of Adobe Systems Incorporated and its suppliers,
 * if any.  The intellectual and technical concepts contained
 * herein are proprietary to Adobe Systems Incorporated and its
 * suppliers and are protected by trade secret or copyright law.
 * Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained
 * from Adobe Systems Incorporated.
 **************************************************************************/

package com.adobe.cq.searchcollections.lucene;

import static com.adobe.cq.searchcollections.lucene.IndexerUtil.escapeQuery;
import static javax.jcr.query.Query.JCR_JQOM;
import static javax.jcr.query.Query.JCR_SQL2;

import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;

import javax.jcr.Node;
import javax.jcr.PathNotFoundException;
import javax.jcr.RepositoryException;
import javax.jcr.Session;
import javax.jcr.query.Query;
import javax.jcr.query.qom.QueryObjectModelFactory;

import org.apache.jackrabbit.commons.query.sql2.SQL2QOMBuilder;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.KeywordAnalyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.facet.index.CategoryDocumentBuilder;
import org.apache.lucene.facet.index.params.DefaultFacetIndexingParams;
import org.apache.lucene.facet.index.params.FacetIndexingParams;
import org.apache.lucene.facet.search.FacetsCollector;
import org.apache.lucene.facet.search.params.CountFacetRequest;
import org.apache.lucene.facet.search.params.FacetSearchParams;
import org.apache.lucene.facet.search.results.FacetResult;
import org.apache.lucene.facet.search.results.FacetResultNode;
import org.apache.lucene.facet.taxonomy.CategoryPath;
import org.apache.lucene.facet.taxonomy.TaxonomyReader;
import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyWriter;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriter.MaxFieldLength;
import org.apache.lucene.index.IndexWriterConfig.OpenMode;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MultiCollector;
import org.apache.lucene.search.QueryWrapperFilter;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopScoreDocCollector;
import org.apache.lucene.search.similar.MoreLikeThis;
import org.apache.lucene.store.SingleInstanceLockFactory;
import org.apache.lucene.util.Version;
import org.apache.sling.api.SlingConstants;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.adobe.cq.searchcollections.api.SearchCollection;

/**
 * @deprecated
 */
public class LuceneSearchCollection implements SearchCollection {

    private static final Logger log = LoggerFactory
            .getLogger(LuceneSearchCollection.class);
    
    private final Session session;

    private final String path;

    private final NodeIndexer indexer;

    private final JCRDirectory directory;
    private JCRDirectory taxoDirectory;

    private final Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);

    private final Analyzer keywordAnalyzer = new KeywordAnalyzer();

    private final LuceneSearchCollectionConfig indexConfig;

    private volatile boolean running = true;

    private final Collection queue = new LinkedHashSet();
    
    private Thread wThread = null;
    private Object execSync = new Object();
    private Date exec = null;
    
    //To prevent synchronization issues, use this global lock to only do one 
    //update at a time.  We should really be using JCR locks, but that doesn't
    //seem to be working even though mix:lockable is used.
    private static Object writeSync = new Object();

    class WorkerThread implements Runnable {
		private Date lastExec = null;
    	
        public void run() {
        	while (running) {
        		if (exec == null || exec.equals(lastExec)) {
        			synchronized(execSync) {
        				if (exec == null || exec.equals(lastExec)) {
	        				try {
	        					//Wait until another request for work is received.
	        					execSync.wait();
	        				} catch (InterruptedException e) {
	        	        		log.error("Error executing execSync.wait().", e);
	        	        		continue;
	        				}
        				}
        			}
        		}
        		
        		if (! running) {
        			return;
        		}
        		
	        	try {
	        		//We have new request for work since exec is not same as
	        		//lastExec.
	        		synchronized(execSync) {
	        			lastExec = exec;
	        		}
	        		
	        		synchronized(writeSync) {
	        			doUpdate(getPendingPaths());
	        		}
	        	} catch (Throwable t) {
	        		log.error("Error executing doUpdate.", t);
	        	}	        	
        	}
        }
	}



    public LuceneSearchCollection(Node directory, Node taxoDirectory, NodeIndexer indexer, boolean useJCRLocks) throws RepositoryException {
        this.session = directory.getSession();
        this.path = directory.getPath();
        this.indexer = indexer;
        this.directory = new JCRDirectory(directory);
        this.taxoDirectory = null;
        if (null != taxoDirectory) {
            this.taxoDirectory = new JCRDirectory(taxoDirectory);
        }

        try {
            if (useJCRLocks) {
                // path HAS to be mix:lockable if you want to use the
                // JCRLockFactory
                this.directory.setLockFactory(new JCRLockFactory(this.session,
                        directory.getPath()));
                if (null != taxoDirectory) {
                    this.taxoDirectory.setLockFactory(new JCRLockFactory(this.session,
                        directory.getPath()));
                }
            } else {
                this.directory.setLockFactory(new SingleInstanceLockFactory());
                if (null != taxoDirectory) {
                    this.taxoDirectory.setLockFactory(new SingleInstanceLockFactory());
                }
            }
        } catch (Exception e) {
            //TODO
        }

        indexConfig = new LuceneSearchCollectionConfig(directory);
        
        wThread = new Thread(new WorkerThread());
		wThread.start();

    }

    public LuceneSearchCollection(Node directory) throws RepositoryException {
        this(directory, null, new DefaultNodeIndexer(), false);
    }

    public LuceneSearchCollection(Node directory, NodeIndexer indexer)
            throws RepositoryException {
        this(directory, null, indexer, false);
    }
    
    public LuceneSearchCollection(Node directory,Node taxoDirectory, NodeIndexer indexer)
            throws RepositoryException {
        this(directory, taxoDirectory, indexer, false);
    }
    
    public Map findFacets(String fieldName, String resourceTypeFilter, String componentFilter, int count) throws RepositoryException{
        try {
            Map result = null; 
            if (null != taxoDirectory) {
                IndexReader index = IndexReader.open(directory);
                IndexSearcher searcher = new IndexSearcher(index);
                TaxonomyReader taxo = new DirectoryTaxonomyReader(taxoDirectory);
                
                BooleanQuery booleanQuery = new BooleanQuery();

                if (resourceTypeFilter != null || componentFilter != null) {
                    if (resourceTypeFilter != null){
                        org.apache.lucene.search.Query resourceTypeQuery = new QueryParser(
                            Version.LUCENE_36, SlingConstants.PROPERTY_RESOURCE_TYPE, analyzer).parse(
                                resourceTypeFilter);
                        
                        booleanQuery.add(resourceTypeQuery, Occur.MUST);
                    }               
                    
                    if (componentFilter != null){
                        //String pathField = ":path".intern();
                        String escapedComponentFilter = QueryParser.escape(componentFilter);
                        //Making it Prefixed query
                        //escapedComponentFilter = escapedComponentFilter + "*";
                        org.apache.lucene.search.Query componentQuery = new QueryParser(
                            Version.LUCENE_36, "component", keywordAnalyzer).parse(
                                escapedComponentFilter);
                        
                        booleanQuery.add(componentQuery, Occur.MUST);
                    }
                }
                
                FacetIndexingParams indexingParams = new DefaultFacetIndexingParams();
                FacetSearchParams facetSearchParams = new FacetSearchParams(indexingParams);
                facetSearchParams.addFacetRequest(new CountFacetRequest(
                    new CategoryPath(fieldName), count));
                
                TopScoreDocCollector topDocsCollector = TopScoreDocCollector.create(10, true);
                FacetsCollector facetsCollector = new FacetsCollector(facetSearchParams, index, taxo);
                searcher.search(booleanQuery, MultiCollector.wrap(topDocsCollector,facetsCollector));
                List res = facetsCollector.getFacetResults();
                result = new HashMap();
                for (FacetResult fRes : res) {
                    for (FacetResultNode n : fRes.getFacetResultNode().getSubResults()) {
                        result.put(n.getLabel().lastComponent(), (int)n.getValue());
                    }
                    
                }
            }
            return result;
        } catch (FileNotFoundException e) {
            log.warn("Empty search collection.", e);
            return null;
        } catch (IOException e) {
            throw new RepositoryException(e);
        } catch (ParseException e) {
            // TODO Auto-generated catch block
            throw new RepositoryException(e);
        } 
    }

    public List findMLT(String target, String resourceTypeFilter, 
    	String componentFilter, String[] mltFields, int maxResults, 
    	int minTermFreq, int minDocFreq) 
    throws RepositoryException {
    	try {
			IndexReader index = IndexReader.open(directory);
	    	IndexSearcher searcher = new IndexSearcher(directory, true);
	    	MoreLikeThis mlt = new MoreLikeThis(index);
	    	mlt.setFieldNames(mltFields);
	    	mlt.setMinTermFreq(minTermFreq);
		    mlt.setMinDocFreq(minDocFreq);
	    	org.apache.lucene.search.Query query = mlt.like(new StringReader(target));	    	
	    	
	    	QueryWrapperFilter additionalFilters = null;
	    	
	    	if (resourceTypeFilter != null || componentFilter != null) {
		    	BooleanQuery booleanQuery = new BooleanQuery();
		    	if (resourceTypeFilter != null){
		    		org.apache.lucene.search.Query resourceTypeQuery = new QueryParser(
		    			Version.LUCENE_36, SlingConstants.PROPERTY_RESOURCE_TYPE, analyzer).parse(
		    				resourceTypeFilter);
		    		
		    		booleanQuery.add(resourceTypeQuery, Occur.MUST);
		    	}	    		
		    	
		    	if (componentFilter != null){
		    	
		    		String escapedComponentFilter = escapeQuery(componentFilter);
		    		org.apache.lucene.search.Query componentQuery = new QueryParser(
			    		Version.LUCENE_36, "component", analyzer).parse(
			    			escapedComponentFilter);
		    		
		    		booleanQuery.add(componentQuery, Occur.MUST);
		    	}
	    		additionalFilters = new QueryWrapperFilter(booleanQuery);
	    	}
	    	
	    	TopDocs topDocs = additionalFilters == null ?
	    		searcher.search(query, maxResults): 	
	    		searcher.search(query, additionalFilters, maxResults);
	    	String pathField = ":path".intern();
	    	List results = new LinkedList();
	    	
	    	for ( ScoreDoc scoreDoc : topDocs.scoreDocs ) {
	    		Document doc = searcher.doc( scoreDoc.doc );
	    		
	    		String path =  doc.get(pathField);
	    		results.add(path);
	    	}
	    	return results;
    	} catch (FileNotFoundException e) {
    		log.warn("Empty search collection.", e);
    		return null;
    	} catch (IOException e) {
    		throw new RepositoryException(e);
    	} catch (ParseException e) {
    		throw new RepositoryException(e);
    	}
    }

    public Query createQuery(Session session, String statement, String language)
            throws RepositoryException {
        if (!JCR_SQL2.equals(language) && !JCR_JQOM.equals(language)) {
            throw new RepositoryException("Query language not supported: "
                    + language);
        }

        QueryObjectModelFactory factory = new QueryObjectModelFactoryImpl(
                directory, analyzer, session);

        SQL2QOMBuilder b = new SQL2QOMBuilder();

        return b.createQueryObjectModel(statement, factory,
                session.getValueFactory());
    }



    public synchronized void update(Set paths) {
        int skip = 0;
        for (String p : paths) {
            if (p.startsWith(path) || !indexConfig.shouldInclude(p)) {
                skip++;
                continue;

            }
            queue.add(p);
        }

        int index = queue.size();
        log.debug(
                "Lucene SearchCollection pinged, will index {}, will skip {}.",
                index, skip);
        if (index > 0) {
            run();
        }
    }

    public synchronized void run() {    	
        if (!running) {
            return;
        }
        synchronized(execSync) {
        	//Notify worker thread that there is new request for work.
			exec = new Date();
			execSync.notify();
		}
    }

    public synchronized void stop() {
        running = false;
        
        //Wait at most 10 seconds for worker thread to shut down.
        for (int i = 0; i < 10; i++) {
            
            synchronized(execSync) {
            	//Worker thread should wake up and then stop now that "running" is 
            	//set to false.  Set new exec date to prevent thread from sleeping 
            	//if that's what its about to do.
            	exec = new Date();
    			execSync.notify();
    		}
        	
        	if (! wThread.isAlive()) {
        		break;
        	}
        	try {
        		Thread.sleep(1000);
        	} catch (Throwable t) {
        		log.error("Error waiting for working thread to shut down.", t);
        	}
        }
        // cleanup
        session.logout();
    }



    private void doUpdate(Collection paths) {
        if (paths.isEmpty()) {
            return;
        }
        
        long time = System.currentTimeMillis();

        try {
            IndexWriter writer = new IndexWriter(directory, analyzer,
                    MaxFieldLength.LIMITED);
            writer.setRAMBufferSizeMB(48);
            TaxonomyWriter taxo = null;
            CategoryDocumentBuilder categoryDocBuilder = null;
            if (null != taxoDirectory) {
                taxo = new DirectoryTaxonomyWriter(taxoDirectory, OpenMode.CREATE_OR_APPEND);
                categoryDocBuilder = new CategoryDocumentBuilder(taxo);
            }
            try {

                for (String path : paths) {

                    writer.deleteDocuments(indexer.getSubtreeQuery(path));
                    Node node = null;                    

                    try {
                        node = session.getNode(path);
                    } catch (PathNotFoundException e) {
                        // delete / move event, just ignore
                    } catch (RepositoryException e) {
                        log.warn(
                                "Lucene SearchCollection problem, ignoring update: "
                                        + path, e);
                    }
                                        
                    try {
                    	if (node != null) {
                    		Document doc = indexer.createDocument(node, categoryDocBuilder);
                            if (null != taxo) {
                                taxo.commit();
                            }
                    		if (doc != null) {
                    			writer.addDocument(doc);
                    		}
                    	}
                    } catch (RepositoryException e) {
                        log.warn(
                                "Lucene SearchCollection problem, ignoring update: "
                                        + path, e);
                    }
                }
            } finally {
                writer.close();
                if (null != taxo) {
                    taxo.close();
                }
                log.debug("Lucene SearchCollection done indexing, took {} ms.",
                        System.currentTimeMillis() - time);
            }
        } catch (IOException e) {
            log.warn("Lucene SearchCollection problem, ignoring updates: "
                    + paths, e);
        }
    }

    private synchronized Collection getPendingPaths() {
        // while (running && queue.isEmpty()) {
        // wait();
        // }
        ArrayList paths = new ArrayList(queue);
        queue.clear();
        return paths;
    }
}





© 2015 - 2025 Weber Informatics LLC | Privacy Policy