org.nuiton.topia.index.LuceneIndexer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of topia-service-index Show documentation
Hibernate based index service
The newest version!
/* *##% 
 * ToPIA :: Service Index
 * Copyright (C) 2004 - 2009 CodeLutin
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Lesser Public License for more details.
 *
 * You should have received a copy of the GNU General Lesser Public
 * License along with this program.  If not, see
 * .
 * ##%*/

/* *
 * LuceneIndexer.java
 *
 * Created: 8 oct. 06 15:48:37
 *
 * @author poussin
 * @version $Revision: 1459 $
 *
 * Last update: $Date: 2009-05-16 09:56:47 +0200 (Sat, 16 May 2009) $
 * by : $Author: tchemit $
 */
package org.nuiton.topia.index;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.SortedSet;
import java.util.TreeSet;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.util.Version;
import org.hibernate.HibernateException;
import org.hibernate.metadata.ClassMetadata;
import org.nuiton.topia.TopiaNotFoundException;
import org.nuiton.topia.framework.TopiaContextImplementor;
import org.nuiton.topia.persistence.TopiaId;

/**
 * To use this indexer you must have two properties defined in config file:
 * topia.index.engin=org.nuiton.topia.framework.LuceneIndexer
 * topia.index.lucene.directory=[/path/to/index/directory]
 * 
 * Pour l'utilisation:
 *  * SortedSet<IndexEntry> result = context.getIndexEngin().search("quelque chose")
 * SortedSet<IndexEntry> result = context.getIndexEngin().search("class:org.nuiton.chorem.entities.Person name:poussin")
 * // ou equivalent avec une map 
 * Map m = new HashMap();
 * m.put("class", "org.nuiton.chorem.entities.Person");
 * m.put("name", "poussin");
 * SortedSet<IndexEntry> result = context.getIndexEngin().search(m);
 * 
 * 
 * @author poussin
 *
 */
public class LuceneIndexer implements TopiaIndexImplementor {

    /** to use log facility, just put in your code: log.info(\"...\"); */
    static private Log log = LogFactory.getLog(LuceneIndexer.class);

    private static final String TOPIA_ID = "topiaId";

    protected File directory = null;

    protected TopiaContextImplementor context;

    /** contient les objets a reindexer car creer, modifier ou supprimer. key: id, value: fields values or null for deletion */
    protected Map indexationMap = new HashMap();

    /*
     * @see org.nuiton.topia.framework.TopiaService#getServiceName()
     */
    @Override
    public String getServiceName() {
        return TopiaIndexService.SERVICE_NAME;
    }

    /*
     * @see org.nuiton.topia.framework.TopiaService#getPersistenceClasses()
     */
    @Override
    public Class[] getPersistenceClasses() {
        return null;
    }

    @Override
    public void recordForIndexation(Object id, Object[] fields) {
        if (id == null) {
            log.warn("Id must not be null, this object will be not indexed");
        } else {
            indexationMap.put(id, fields);
        }
    }

    /*
     * @see org.nuiton.topia.framework.TopiaService#init(org.nuiton.topia.framework.TopiaContextImplementor)
     */
    @Override
    public boolean preInit(TopiaContextImplementor context) {
        return true;
    }

    @Override
    public boolean postInit(TopiaContextImplementor context) {
        this.context = context;
        Properties prop = context.getConfig();
        String dirname = prop.getProperty("topia.index.lucene.directory");
        directory = new File(dirname);
        directory.mkdirs();
        
        return true;
    }

    @Override
    public void clearLastRecordedIndexation() {
        indexationMap.clear();
    }

    @Override
    public void doIndexation() {

        try {
            Directory indexDirectory = new NIOFSDirectory(directory);
            // TODO maybe put in configuration to allow localized analysers
            Analyzer analyzer = new SimpleAnalyzer();
            
            boolean create = false;
            if (!IndexReader.indexExists(indexDirectory)) {
                // si l'index n'existe pas, on force la creation
                create = true;
            } else {
                IndexReader reader = IndexReader.open(indexDirectory);

                for (Map.Entry e : indexationMap.entrySet()) {
                    String id = e.getKey().toString();
                    removeIndex(reader, id);
                }
                reader.close();
            }

            IndexWriter writer = new IndexWriter(indexDirectory, analyzer, create, IndexWriter.MaxFieldLength.LIMITED);
            for (Map.Entry e : indexationMap.entrySet()) {
                String id = e.getKey().toString();
                Object[] fields = e.getValue();
                if (fields != null) {
                    try {
                        index(writer, id, fields);
                    } catch (HibernateException eee) {
                        if (log.isWarnEnabled()) {
                            log.warn("Can't index: " + id);
                            if (log.isDebugEnabled()) {
                                log.debug("StackTrace", eee);
                            }
                        }
                    } catch (TopiaNotFoundException eee) {
                        if (log.isWarnEnabled()) {
                            log.warn("Can't index: " + id);
                            if (log.isDebugEnabled()) {
                                log.debug("StackTrace", eee);
                            }
                        }
                    }
                }
            }
            writer.close();
        } catch (IOException eee) {
            if (log.isWarnEnabled()) {
                log.warn("Can't index");
                if (log.isDebugEnabled()) {
                    log.debug("StackTrace", eee);
                }
            }
        }
    }

    /**
     * @param id identifiant de l'objet a indexer
     * @param fields les champs de l'objet
     * @throws IOException 
     * @throws TopiaNotFoundException 
     * @throws HibernateException 
     */
    private void index(IndexWriter writer, String id, Object[] fields) throws IOException, HibernateException, TopiaNotFoundException {
        String classname = TopiaId.getClassNameAsString(id);
        ClassMetadata cm = context.getHibernateFactory().getClassMetadata(classname + "Impl");
        String[] names = cm.getPropertyNames();

        org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document();

        doc.add(new Field("class", classname, Store.YES, Index.ANALYZED));
        doc.add(new Field("topiaId", id, Store.YES, Index.ANALYZED));
        StringBuffer all = new StringBuffer();
        for (int i = 0; i < fields.length; i++) {
            String name = names[i];
            Object o = fields[i];

            if (o == null) {
                // do nothing
            } else {
                String val = String.valueOf(o);
                doc.add(new Field(name, val, Store.YES, Index.ANALYZED));
                all.append(val);
                all.append(" ");
            }
        }
        if (all.length() > 0) {
            doc.add(new Field("__all__", all.toString(), Field.Store.NO, Index.ANALYZED));
        }
        writer.addDocument(doc);
    }

    /**
     * @param ididentifiant de l'objet a supprimer
     * @throws IOException 
     */
    private void removeIndex(IndexReader reader, String id) throws IOException {
        Term term = new Term(TOPIA_ID, id);
        reader.deleteDocuments(term);
    }

    public SortedSet search(String queryText) {
        TopDocs topDocs = null;
        Searcher searcher = null;

        try {

            Directory indexDirectory = new NIOFSDirectory(directory);
            // TODO maybe put in configuration to allow localized analysers
            Analyzer analyzer = new SimpleAnalyzer();

            if (IndexReader.indexExists(indexDirectory)) {
                searcher = new IndexSearcher(indexDirectory);

                if (queryText.equals("") || queryText.length() == 0) {
                    // la chaine passée en parametre est vide !
                    log.debug("requete vide, pas de resultat a renvoyer.");
                } else {
                    QueryParser parser = new QueryParser(Version.LUCENE_30, "__all__", analyzer);
                    Query luceneQuery = parser.parse(queryText);
                    if (log.isDebugEnabled()) {
                        log.debug("Recherche du terme : " + luceneQuery.toString());
                    }

                    // TODO improve doc collectors using
                    topDocs = searcher.search(luceneQuery, null /*filter*/, 100);
                    searcher.close();
                }
            } else {
                // l'index n'a pas encore ete créé !
                if (log.isDebugEnabled()) {
                    log.debug("Index inexistant, pas de resultats à renvoyer !");
                }
            }
        } catch (IOException ioe) {
            if (log.isDebugEnabled()) {
                log.debug(ioe.getMessage(), ioe);
            }
        } catch (ParseException pe) {
            if (log.isDebugEnabled()) {
                log.debug(pe.getMessage(), pe);
            }
        }

        //retourne les resultats trouves
        TreeSet result = new TreeSet();
        if (searcher != null && topDocs != null) {
            ScoreDoc[] scoreDocs = topDocs.scoreDocs;
            for (ScoreDoc scoreDoc : scoreDocs) {
                try {
                    Document doc = searcher.doc(scoreDoc.doc);
                    String topiaId = doc.get(TOPIA_ID);
                    IndexEntry ie = new IndexEntry(scoreDoc.score, topiaId);
                    result.add(ie);
                } catch (IOException eee) {
                    if (log.isWarnEnabled()) {
                        log.warn("Can't get result");
                        if (log.isDebugEnabled()) {
                            log.debug("StackTrace is", eee);
                        }
                    }
                }
            }
        }

        return result;
    }

    /**
     * @param query Map that contains field name as key and query in value for 
     * this field
     * @return ?
     */
    @Override
    public SortedSet search(Map query) {
        SortedSet result = null;
        // construction de la requete lucene
        String queryText = "";
        Iterator keys = query.keySet().iterator();
        while (keys.hasNext()) {
            String key = (String) keys.next();
            String value = (String) query.get(key);
            if (value != null) {
                // on decoupe la valeur pour separer les chaines,
                // sinon lucene cherche la chaine complete au lieu
                // des mots
                String[] st = value.split("\\s");
                for (int i = 0; i < st.length; i++) {
                    String token = st[i];
                    if ((token != null) && (!token.equals(""))) {
                        queryText += key + ":" + st[i] + " ";
                    }
                }
            }
        }
        result = search(queryText);

        //retourne les resultats trouves
        return result;
    }
}