org.apache.jetspeed.search.lucene.SearchEngineImpl Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of jetspeed-search Show documentation
Jetspeed-2 Search Component
There is a newer version: 2.3.1
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jetspeed.search.lucene;

import java.io.File;
import java.io.IOException;
import java.net.URL;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import org.apache.commons.collections.MultiHashMap;
import org.apache.commons.collections.MultiMap;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.jetspeed.search.BaseParsedObject;
import org.apache.jetspeed.search.HandlerFactory;
import org.apache.jetspeed.search.ObjectHandler;
import org.apache.jetspeed.search.ParsedObject;
import org.apache.jetspeed.search.SearchEngine;
import org.apache.jetspeed.search.SearchResults;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Searcher;

/**
 * @author Jeremy Ford
 *
 */
public class SearchEngineImpl implements SearchEngine
{
    protected final static Log log = LogFactory.getLog(SearchEngineImpl.class);
    private File rootIndexDir = null;
    private String analyzerClassName = null;
    private boolean optimizeAfterUpdate = true;
    private HandlerFactory handlerFactory;
    
    private static final int KEYWORD = 0;
    private static final int TEXT = 1;
    
    public SearchEngineImpl(String indexRoot, String analyzerClassName, boolean optimzeAfterUpdate, HandlerFactory handlerFactory)
    throws Exception
    {
        //assume it's full path for now
        rootIndexDir = new File(indexRoot);
        this.analyzerClassName = analyzerClassName;
        this.optimizeAfterUpdate = optimzeAfterUpdate;
        this.handlerFactory = handlerFactory;
        
        try
        {
            Searcher searcher = null;
            searcher = new IndexSearcher(rootIndexDir.getPath());
            searcher.close();
        }
        catch (Exception e)
        {
            if (rootIndexDir.exists())
            {
                log.error("Failed to open Portal Registry indexes in " + rootIndexDir.getPath(), e);
            }
            try
            {
                rootIndexDir.delete();
                rootIndexDir.mkdirs();
                
                IndexWriter indexWriter = new IndexWriter(rootIndexDir, newAnalyzer(), true);
                indexWriter.close();
                indexWriter = null;
                log.warn("Re-created Lucene Index in " + rootIndexDir.getPath());
            }
            catch (Exception e1)
            {
                String message = "Cannot RECREATE Portlet Registry indexes in "  + rootIndexDir.getPath();
                log.error(message, e1);
                throw new Exception(message);
            }
        }
    }

    /* (non-Javadoc)
     * @see org.apache.jetspeed.search.SearchEnging#add(java.lang.Object)
     */
    public boolean add(Object o)
    {
        Collection c = new ArrayList(1);
        c.add(o);

        return add(c);
    }

    /* (non-Javadoc)
     * @see org.apache.jetspeed.search.SearchEnging#add(java.util.Collection)
     */
    public synchronized boolean add(Collection objects)
    {
        boolean result = false;
        
        IndexWriter indexWriter;
        try
        {
            indexWriter = new IndexWriter(rootIndexDir, newAnalyzer(), false);
        }
        catch (IOException e)
        {
            //logger.error("Error while creating index writer. Skipping add...", e);
            return result;
        }

        Iterator it = objects.iterator();
        while (it.hasNext()) 
        {
            Object o = it.next();
            // Look up appropriate handler
            ObjectHandler handler = null;
            try
            {
                handler = handlerFactory.getHandler(o);
            }
            catch (Exception e)
            {
                //logger.error("Failed to create hanlder for object " + o.getClass().getName());
                continue;
            }

            // Parse the object
            ParsedObject parsedObject = handler.parseObject(o);

            // Create document
            Document doc = new Document();

            // Populate document from the parsed object
            if (parsedObject.getKey() != null)
            {                
                doc.add(new Field(ParsedObject.FIELDNAME_KEY, parsedObject.getKey(), Field.Store.YES, Field.Index.UN_TOKENIZED));
            }
            if (parsedObject.getType() != null)
            {
                doc.add(new Field(ParsedObject.FIELDNAME_TYPE, parsedObject.getType(), Field.Store.YES, Field.Index.TOKENIZED));
            }
            if (parsedObject.getTitle() != null)
            {
                doc.add(new Field(ParsedObject.FIELDNAME_TITLE, parsedObject.getTitle(), Field.Store.YES, Field.Index.TOKENIZED));
            }
            if (parsedObject.getDescription() != null)
            {
                doc.add(new Field(ParsedObject.FIELDNAME_DESCRIPTION, parsedObject.getDescription(), Field.Store.YES, Field.Index.TOKENIZED));
            }
            if (parsedObject.getContent() != null)
            {
                doc.add(new Field(ParsedObject.FIELDNAME_CONTENT, parsedObject.getContent(), Field.Store.YES, Field.Index.TOKENIZED));
            }
            if (parsedObject.getLanguage() != null)
            {
                doc.add(new Field(ParsedObject.FIELDNAME_LANGUAGE, parsedObject.getLanguage(), Field.Store.YES, Field.Index.TOKENIZED));
            }
            if (parsedObject.getURL() != null)
            {
                doc.add(new Field(ParsedObject.FIELDNAME_URL, parsedObject.getURL().toString(), Field.Store.YES, Field.Index.TOKENIZED));
            }
            if(parsedObject.getClassName() != null)
            {
                doc.add(new Field(ParsedObject.FIELDNAME_CLASSNAME, parsedObject.getClassName(), Field.Store.YES, Field.Index.TOKENIZED));
            }
            
            String[] keywordArray = parsedObject.getKeywords();
            if(keywordArray != null)
            {
            	for(int i=0; i