All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.nuiton.topia.index.LuceneIndexer Maven / Gradle / Ivy

The newest version!
/* *##% 
 * ToPIA :: Service Index
 * Copyright (C) 2004 - 2009 CodeLutin
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Lesser Public License for more details.
 *
 * You should have received a copy of the GNU General Lesser Public
 * License along with this program.  If not, see
 * .
 * ##%*/

/* *
 * LuceneIndexer.java
 *
 * Created: 8 oct. 06 15:48:37
 *
 * @author poussin
 * @version $Revision: 1459 $
 *
 * Last update: $Date: 2009-05-16 09:56:47 +0200 (Sat, 16 May 2009) $
 * by : $Author: tchemit $
 */
package org.nuiton.topia.index;

import java.io.File;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.Properties;
import java.util.SortedSet;
import java.util.TreeSet;

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.NIOFSDirectory;
import org.apache.lucene.util.Version;
import org.hibernate.HibernateException;
import org.hibernate.metadata.ClassMetadata;
import org.nuiton.topia.TopiaNotFoundException;
import org.nuiton.topia.framework.TopiaContextImplementor;
import org.nuiton.topia.persistence.TopiaId;

/**
 * To use this indexer you must have two properties defined in config file:
 * 
  • topia.index.engin=org.nuiton.topia.framework.LuceneIndexer *
  • topia.index.lucene.directory=[/path/to/index/directory] * * Pour l'utilisation: *
     * SortedSet<IndexEntry> result = context.getIndexEngin().search("quelque chose")
     * SortedSet<IndexEntry> result = context.getIndexEngin().search("class:org.nuiton.chorem.entities.Person name:poussin")
     * // ou equivalent avec une map 
     * Map m = new HashMap();
     * m.put("class", "org.nuiton.chorem.entities.Person");
     * m.put("name", "poussin");
     * SortedSet<IndexEntry> result = context.getIndexEngin().search(m);
     * 
    * * @author poussin * */ public class LuceneIndexer implements TopiaIndexImplementor { /** to use log facility, just put in your code: log.info(\"...\"); */ static private Log log = LogFactory.getLog(LuceneIndexer.class); private static final String TOPIA_ID = "topiaId"; protected File directory = null; protected TopiaContextImplementor context; /** contient les objets a reindexer car creer, modifier ou supprimer. key: id, value: fields values or null for deletion */ protected Map indexationMap = new HashMap(); /* * @see org.nuiton.topia.framework.TopiaService#getServiceName() */ @Override public String getServiceName() { return TopiaIndexService.SERVICE_NAME; } /* * @see org.nuiton.topia.framework.TopiaService#getPersistenceClasses() */ @Override public Class[] getPersistenceClasses() { return null; } @Override public void recordForIndexation(Object id, Object[] fields) { if (id == null) { log.warn("Id must not be null, this object will be not indexed"); } else { indexationMap.put(id, fields); } } /* * @see org.nuiton.topia.framework.TopiaService#init(org.nuiton.topia.framework.TopiaContextImplementor) */ @Override public boolean preInit(TopiaContextImplementor context) { return true; } @Override public boolean postInit(TopiaContextImplementor context) { this.context = context; Properties prop = context.getConfig(); String dirname = prop.getProperty("topia.index.lucene.directory"); directory = new File(dirname); directory.mkdirs(); return true; } @Override public void clearLastRecordedIndexation() { indexationMap.clear(); } @Override public void doIndexation() { try { Directory indexDirectory = new NIOFSDirectory(directory); // TODO maybe put in configuration to allow localized analysers Analyzer analyzer = new SimpleAnalyzer(); boolean create = false; if (!IndexReader.indexExists(indexDirectory)) { // si l'index n'existe pas, on force la creation create = true; } else { IndexReader reader = IndexReader.open(indexDirectory); for (Map.Entry e : indexationMap.entrySet()) { String id = e.getKey().toString(); removeIndex(reader, id); } reader.close(); } IndexWriter writer = new IndexWriter(indexDirectory, analyzer, create, IndexWriter.MaxFieldLength.LIMITED); for (Map.Entry e : indexationMap.entrySet()) { String id = e.getKey().toString(); Object[] fields = e.getValue(); if (fields != null) { try { index(writer, id, fields); } catch (HibernateException eee) { if (log.isWarnEnabled()) { log.warn("Can't index: " + id); if (log.isDebugEnabled()) { log.debug("StackTrace", eee); } } } catch (TopiaNotFoundException eee) { if (log.isWarnEnabled()) { log.warn("Can't index: " + id); if (log.isDebugEnabled()) { log.debug("StackTrace", eee); } } } } } writer.close(); } catch (IOException eee) { if (log.isWarnEnabled()) { log.warn("Can't index"); if (log.isDebugEnabled()) { log.debug("StackTrace", eee); } } } } /** * @param id identifiant de l'objet a indexer * @param fields les champs de l'objet * @throws IOException * @throws TopiaNotFoundException * @throws HibernateException */ private void index(IndexWriter writer, String id, Object[] fields) throws IOException, HibernateException, TopiaNotFoundException { String classname = TopiaId.getClassNameAsString(id); ClassMetadata cm = context.getHibernateFactory().getClassMetadata(classname + "Impl"); String[] names = cm.getPropertyNames(); org.apache.lucene.document.Document doc = new org.apache.lucene.document.Document(); doc.add(new Field("class", classname, Store.YES, Index.ANALYZED)); doc.add(new Field("topiaId", id, Store.YES, Index.ANALYZED)); StringBuffer all = new StringBuffer(); for (int i = 0; i < fields.length; i++) { String name = names[i]; Object o = fields[i]; if (o == null) { // do nothing } else { String val = String.valueOf(o); doc.add(new Field(name, val, Store.YES, Index.ANALYZED)); all.append(val); all.append(" "); } } if (all.length() > 0) { doc.add(new Field("__all__", all.toString(), Field.Store.NO, Index.ANALYZED)); } writer.addDocument(doc); } /** * @param ididentifiant de l'objet a supprimer * @throws IOException */ private void removeIndex(IndexReader reader, String id) throws IOException { Term term = new Term(TOPIA_ID, id); reader.deleteDocuments(term); } public SortedSet search(String queryText) { TopDocs topDocs = null; Searcher searcher = null; try { Directory indexDirectory = new NIOFSDirectory(directory); // TODO maybe put in configuration to allow localized analysers Analyzer analyzer = new SimpleAnalyzer(); if (IndexReader.indexExists(indexDirectory)) { searcher = new IndexSearcher(indexDirectory); if (queryText.equals("") || queryText.length() == 0) { // la chaine passée en parametre est vide ! log.debug("requete vide, pas de resultat a renvoyer."); } else { QueryParser parser = new QueryParser(Version.LUCENE_30, "__all__", analyzer); Query luceneQuery = parser.parse(queryText); if (log.isDebugEnabled()) { log.debug("Recherche du terme : " + luceneQuery.toString()); } // TODO improve doc collectors using topDocs = searcher.search(luceneQuery, null /*filter*/, 100); searcher.close(); } } else { // l'index n'a pas encore ete créé ! if (log.isDebugEnabled()) { log.debug("Index inexistant, pas de resultats à renvoyer !"); } } } catch (IOException ioe) { if (log.isDebugEnabled()) { log.debug(ioe.getMessage(), ioe); } } catch (ParseException pe) { if (log.isDebugEnabled()) { log.debug(pe.getMessage(), pe); } } //retourne les resultats trouves TreeSet result = new TreeSet(); if (searcher != null && topDocs != null) { ScoreDoc[] scoreDocs = topDocs.scoreDocs; for (ScoreDoc scoreDoc : scoreDocs) { try { Document doc = searcher.doc(scoreDoc.doc); String topiaId = doc.get(TOPIA_ID); IndexEntry ie = new IndexEntry(scoreDoc.score, topiaId); result.add(ie); } catch (IOException eee) { if (log.isWarnEnabled()) { log.warn("Can't get result"); if (log.isDebugEnabled()) { log.debug("StackTrace is", eee); } } } } } return result; } /** * @param query Map that contains field name as key and query in value for * this field * @return ? */ @Override public SortedSet search(Map query) { SortedSet result = null; // construction de la requete lucene String queryText = ""; Iterator keys = query.keySet().iterator(); while (keys.hasNext()) { String key = (String) keys.next(); String value = (String) query.get(key); if (value != null) { // on decoupe la valeur pour separer les chaines, // sinon lucene cherche la chaine complete au lieu // des mots String[] st = value.split("\\s"); for (int i = 0; i < st.length; i++) { String token = st[i]; if ((token != null) && (!token.equals(""))) { queryText += key + ":" + st[i] + " "; } } } } result = search(queryText); //retourne les resultats trouves return result; } }




  • © 2015 - 2025 Weber Informatics LLC | Privacy Policy