All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.impetus.kundera.index.LuceneIndexer Maven / Gradle / Ivy

There is a newer version: 2.9
Show newest version
/*******************************************************************************
 * * Copyright 2012 Impetus Infotech.
 *  *
 *  * Licensed under the Apache License, Version 2.0 (the "License");
 *  * you may not use this file except in compliance with the License.
 *  * You may obtain a copy of the License at
 *  *
 *  *      http://www.apache.org/licenses/LICENSE-2.0
 *  *
 *  * Unless required by applicable law or agreed to in writing, software
 *  * distributed under the License is distributed on an "AS IS" BASIS,
 *  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  * See the License for the specific language governing permissions and
 *  * limitations under the License.
 ******************************************************************************/
package com.impetus.kundera.index;

import java.io.File;
import java.io.IOException;
import java.lang.reflect.Field;
import java.util.Collection;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

import javax.persistence.metamodel.EmbeddableType;
import javax.persistence.metamodel.EntityType;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogDocMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;

import com.impetus.kundera.Constants;
import com.impetus.kundera.cache.ElementCollectionCacheManager;
import com.impetus.kundera.metadata.model.EntityMetadata;
import com.impetus.kundera.metadata.model.KunderaMetadata;
import com.impetus.kundera.metadata.model.MetamodelImpl;
import com.impetus.kundera.property.PropertyAccessException;
import com.impetus.kundera.property.PropertyAccessorHelper;

/**
 * Provides indexing functionality using lucene library.
 * 
 * @author amresh.singh
 */
public class LuceneIndexer extends DocumentIndexer
{

    /** log for this class. */
    private static Log log = LogFactory.getLog(LuceneIndexer.class);

    /** The w. */
    private static IndexWriter w;

    /** The reader. */
    private static IndexReader reader;

    /** The index. */
    private static Directory index;

    /** The is initialized. */
    private static boolean isInitialized;

    /** The indexer. */
    private static LuceneIndexer indexer;

    /** The ready for commit. */
    private static boolean readyForCommit;

    /** The lucene dir path. */
    private static String luceneDirPath;

    /**
     * Instantiates a new lucene indexer.
     * 
     * @param analyzer
     *            the analyzer
     * @param lucDirPath
     *            the luc dir path
     */
    private LuceneIndexer(Analyzer analyzer, String lucDirPath)
    {
        super(analyzer);
        try
        {
            luceneDirPath = lucDirPath;
            File file = new File(luceneDirPath);
            if (file.exists())
            {
                Directory sourceDir = FSDirectory.open(getIndexDirectory());
                index = new RAMDirectory(sourceDir);
            }
            else
            {
                index = new RAMDirectory();
            }
            /*
             * FSDirectory.open(getIndexDirectory( ))
             */
            // isInitialized
            /* writer */
            w = new IndexWriter(index, new IndexWriterConfig(Version.LUCENE_34, analyzer));
            /* reader = */
            w.setMergePolicy(new LogDocMergePolicy());
            w.setMergeFactor(1000);
            w.getConfig().setRAMBufferSizeMB(32);
        }
        catch (CorruptIndexException e)
        {
            throw new LuceneIndexingException(e);
        }
        catch (LockObtainFailedException e)
        {
            throw new LuceneIndexingException(e);
        }
        catch (IOException e)
        {
            throw new LuceneIndexingException(e);
        }
    }

    /**
     * Gets the single instance of LuceneIndexer.
     * 
     * @param analyzer
     *            the analyzer
     * @param lucDirPath
     *            the luc dir path
     * @return single instance of LuceneIndexer
     */
    public static synchronized LuceneIndexer getInstance(Analyzer analyzer, String lucDirPath)
    {
        // super(analyzer);
        if (indexer == null && lucDirPath != null)
        {
            indexer = new LuceneIndexer(analyzer, lucDirPath);

        }
        return indexer;
    }

    /**
     * Added for HBase support.
     * 
     * @return default index writer
     */
    private IndexWriter getIndexWriter()
    {
        return w;
    }

    /**
     * Returns default index reader.
     * 
     * @return index reader.
     */
    private IndexReader getIndexReader()
    {
        flushInternal();

        if (reader == null)
        {
            try
            {
                if (!isInitialized)
                {
                    Directory sourceDir = FSDirectory.open(getIndexDirectory());
                    sourceDir.copy(sourceDir, index, true);
                    isInitialized = true;
                }
                reader = IndexReader.open(index, true);
            }
            catch (CorruptIndexException e)
            {
                throw new LuceneIndexingException(e);
            }
            catch (IOException e)
            {
                throw new LuceneIndexingException(e);
            }
        }
        return reader;
    }

    /**
     * Creates a Lucene index directory if it does not exist.
     * 
     * @return the index directory
     */
    private File getIndexDirectory()
    {
        File file = new File(luceneDirPath);

        if (!file.isDirectory())
        {
            file.mkdir();
        }
        return file;
    }

    @Override
    public final void index(EntityMetadata metadata, Object object)
    {
        indexDocument(metadata, object, null, null);
        onCommit();
    }

    @Override
    public final void unindex(EntityMetadata metadata, Object id) throws LuceneIndexingException
    {
        log.debug("Unindexing @Entity[" + metadata.getEntityClazz().getName() + "] for key:" + id);
        try
        {
            /* String indexName, Query query, boolean autoCommit */
            getIndexWriter().deleteDocuments(new Term(KUNDERA_ID_FIELD, getKunderaId(metadata, id)));
        }
        catch (CorruptIndexException e)
        {
            throw new LuceneIndexingException(e);
        }
        catch (IOException e)
        {
            throw new LuceneIndexingException(e);
        }
    }

    @SuppressWarnings("deprecation")
    @Override
    public final Map search(String luceneQuery, int start, int count, boolean fetchRelation)
    {

        reader = getIndexReader();
        if (Constants.INVALID == count)
        {
            count = 100;
        }

        log.debug("Searching index with query[" + luceneQuery + "], start:" + start + ", count:" + count);

        // Set entityIds = new HashSet();
        Map indexCol = new HashMap();

        if (reader == null)
        {
            throw new LuceneIndexingException("Index reader is not initialized!");
        }

        IndexSearcher searcher = new IndexSearcher(reader);
        QueryParser qp = new QueryParser(Version.LUCENE_34, DEFAULT_SEARCHABLE_FIELD, new StandardAnalyzer(
                Version.LUCENE_34));

        try
        {
            qp.setLowercaseExpandedTerms(false);
            qp.setAllowLeadingWildcard(true);
            // qp.set
            Query q = qp.parse(luceneQuery);
            TopDocs docs = searcher.search(q, count);

            int nullCount = 0;
            // Assuming Supercol will be null in case if alias only.
            // This is a quick fix
            for (ScoreDoc sc : docs.scoreDocs)
            {
                Document doc = searcher.doc(sc.doc);
                String entityId = doc.get(fetchRelation ? PARENT_ID_FIELD : ENTITY_ID_FIELD);
                String superCol = doc.get(SUPERCOLUMN_INDEX);

                if (superCol == null)
                {
                    superCol = "SuperCol" + nullCount++;
                }
                // In case of super column and association.
                indexCol.put(superCol + "|" + entityId, entityId);
            }
        }
        catch (ParseException e)
        {
            throw new LuceneIndexingException("Error while parsing Lucene Query " + luceneQuery, e);
        }
        catch (IOException e)
        {
            throw new LuceneIndexingException(e);
        }

        reader = null;
        return indexCol;
    }

    /**
     * Indexes document in file system using lucene.
     * 
     * @param metadata
     *            the metadata
     * @param document
     *            the document
     */
    public void indexDocument(EntityMetadata metadata, Document document)
    {
        log.debug("Indexing document: " + document + " for in file system using Lucene");

        IndexWriter w = getIndexWriter();
        try
        {
            // w.setR
            w.addDocument(document);
            // w.optimize();
            // w.commit();
            // w.close();
        }
        catch (CorruptIndexException e)
        {
            log.error("Error while indexing document " + document + " into Lucene. Details:" + e.getMessage());
            throw new LuceneIndexingException("Error while indexing document " + document + " into Lucene.", e);
        }
        catch (IOException e)
        {
            log.error("Error while indexing document " + document + " into Lucene. Details:" + e.getMessage());
            throw new LuceneIndexingException("Error while indexing document " + document + " into Lucene.", e);
        }
    }

    /**
     * Flush internal.
     */
    private void flushInternal()
    {
        try
        {
            if (w != null && readyForCommit)
            {
                w.commit();
                index.copy(index, FSDirectory.open(getIndexDirectory()), false);
                readyForCommit = false;
            }
        }

        catch (CorruptIndexException e)
        {
            log.error("Error while Flushing Lucene Indexes. Details:" + e.getMessage());
            throw new LuceneIndexingException("Error while Flushing Lucene Indexes", e);
        }
        catch (IOException e)
        {
            log.error("Error while Flushing Lucene Indexes" + e.getMessage());
            throw new LuceneIndexingException("Error while Flushing Lucene Indexes", e);
        }
    }

    /**
     * Close of transaction.
     */
    public void close()
    {
        try
        {
            if (w != null && readyForCommit)
            {
                w.commit();
                index.copy(index, FSDirectory.open(getIndexDirectory()), false);
            }
        }

        catch (CorruptIndexException e)
        {
            log.error("Error while closing lucene indexes. Details:" + e.getMessage());
            throw new LuceneIndexingException("Error while closing lucene indexes.", e);
        }
        catch (IOException e)
        {
            log.error("Error while closing lucene indexes. Details:" + e.getMessage());
            throw new LuceneIndexingException("Error while closing lucene indexes.", e);
        }
    }

    /*
     * (non-Javadoc)
     * 
     * @see com.impetus.kundera.index.Indexer#flush()
     */
    @Override
    public void flush()
    {
        /*
         * if (w != null) {
         * 
         * // w.commit(); // w.close(); // index.copy(index,
         * FSDirectory.open(getIndexDirectory()), // false); }
         */
    }

    @Override
    public void index(EntityMetadata metadata, Object object, String parentId, Class clazz)
    {

        indexDocument(metadata, object, parentId, clazz);
        onCommit();
    }

    @Override
    public boolean entityExistsInIndex(Class entityClass)
    {
        String luceneQuery = "+" + ENTITY_CLASS_FIELD + ":" + entityClass.getCanonicalName().toLowerCase();
        Map results;
        try
        {
            results = search(luceneQuery, 0, 10, false);
        }
        catch (LuceneIndexingException e)
        {
            return false;
        }
        if (results == null || results.isEmpty())
        {
            return false;
        }
        else
        {
            return true;
        }
    }

    /**
     * Index document.
     * 
     * @param metadata
     *            the metadata
     * @param object
     *            the object
     * @param parentId
     *            the parent id
     * @param clazz
     *            the clazz
     * @return the document
     */
    private Document indexDocument(EntityMetadata metadata, Object object, String parentId, Class clazz)
    {
        if (!metadata.isIndexable())
        {
            return null;
        }

        log.debug("Indexing @Entity[" + metadata.getEntityClazz().getName() + "] " + object);

        Document currentDoc = null;
        Object embeddedObject = null;
        Object rowKey = null;
        try
        {
            rowKey = PropertyAccessorHelper.getId(object, metadata);
        }
        catch (PropertyAccessException e1)
        {
            throw new LuceneIndexingException("Can't access Primary key property from " + metadata.getEntityClazz(), e1);
        }

        // In case defined entity is Super column family.
        // we need to create seperate lucene document for indexing.

        if (metadata.getType().equals(EntityMetadata.Type.SUPER_COLUMN_FAMILY))
        {

            MetamodelImpl metaModel = (MetamodelImpl) KunderaMetadata.INSTANCE.getApplicationMetadata().getMetamodel(
                    metadata.getPersistenceUnit());

            Map embeddables = metaModel.getEmbeddables(metadata.getEntityClazz());

            // Map embeddedColumnMap =
            // metadata.getEmbeddedColumnsMap();

            Iterator iter = embeddables.keySet().iterator();

            while (iter.hasNext())
            {
                // for(EmbeddableType embeddableAttribute :
                // embeddables.values())
                // {
                String attributeName = iter.next();
                EmbeddableType embeddableAttribute = embeddables.get(attributeName);
                EntityType entityType = metaModel.entity(metadata.getEntityClazz());
                embeddedObject = PropertyAccessorHelper.getObject(object, (Field) entityType
                        .getAttribute(attributeName).getJavaMember());

                if (embeddedObject == null)
                {
                    continue;
                }
                if (embeddedObject instanceof Collection)
                {
                    ElementCollectionCacheManager ecCacheHandler = ElementCollectionCacheManager.getInstance();
                    // Check whether it's first time insert or updation
                    if (ecCacheHandler.isCacheEmpty())
                    { // First time
                      // insert
                        int count = 0;
                        for (Object obj : (Collection) embeddedObject)
                        {
                            String elementCollectionObjectName = attributeName
                                    + Constants.EMBEDDED_COLUMN_NAME_DELIMITER + count;

                            currentDoc = prepareDocumentForSuperColumn(metadata, object, elementCollectionObjectName,
                                    parentId, clazz);
                            indexSuperColumn(metadata, object, currentDoc, obj, embeddableAttribute);
                            count++;
                        }
                    }
                    else
                    {
                        // Updation, Check whether this object is already in
                        // cache, which means we already have an embedded
                        // column
                        // Otherwise we need to generate a fresh embedded
                        // column name
                        int lastEmbeddedObjectCount = ecCacheHandler.getLastElementCollectionObjectCount(rowKey);
                        for (Object obj : (Collection) embeddedObject)
                        {
                            String elementCollectionObjectName = ecCacheHandler.getElementCollectionObjectName(rowKey,
                                    obj);
                            if (elementCollectionObjectName == null)
                            { // Fresh
                              // row
                                elementCollectionObjectName = attributeName + Constants.EMBEDDED_COLUMN_NAME_DELIMITER
                                        + (++lastEmbeddedObjectCount);
                            }

                            currentDoc = prepareDocumentForSuperColumn(metadata, object, elementCollectionObjectName,
                                    parentId, clazz);
                            indexSuperColumn(metadata, object, currentDoc, obj, embeddableAttribute);
                        }
                    }

                }
                else
                {
                    currentDoc = prepareDocumentForSuperColumn(metadata, object, attributeName, parentId, clazz);
                    indexSuperColumn(metadata, object, currentDoc,
                            metaModel.isEmbeddable(embeddedObject.getClass()) ? embeddedObject : object,
                            embeddableAttribute);
                }
            }
        }
        else
        {
            currentDoc = new Document();

            // Add entity class, PK info into document
            addEntityClassToDocument(metadata, object, currentDoc);

            // Add all entity fields(columns) into document
            addEntityFieldsToDocument(metadata, object, currentDoc);

            indexParentKey(parentId, currentDoc, clazz);
            // Store document into index
            indexDocument(metadata, currentDoc);
        }

        return currentDoc;
    }

    /**
     * On commit.
     */
    private void onCommit()
    {
        // TODO: Sadly this required to keep lucene happy, in case of indexing
        // and searching with same entityManager.
        // Other alternative would be to issue flush on each search
        // try
        // {
        // w.commit();
        isInitialized = true;
        readyForCommit = true;
        // }
        // catch (CorruptIndexException e)
        // {
        // throw new IndexingException(e.getMessage());
        // }
        // catch (IOException e)
        // {
        // throw new IndexingException(e.getMessage());
        // }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy