All Downloads are FREE. Search and download functionalities are using the official Maven repository.

proj.zoie.impl.indexing.internal.LuceneIndexDataLoader Maven / Gradle / Ivy

There is a newer version: 3.3.0
Show newest version
package proj.zoie.impl.indexing.internal;
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import it.unimi.dsi.fastutil.longs.Long2ObjectMap;
import it.unimi.dsi.fastutil.longs.Long2ObjectOpenHashMap;
import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
import it.unimi.dsi.fastutil.longs.LongSet;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;

import org.apache.log4j.Logger;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.Similarity;

import proj.zoie.api.DataConsumer;
import proj.zoie.api.ZoieException;
import proj.zoie.api.ZoieHealth;
import proj.zoie.api.ZoieIndexReader;
import proj.zoie.api.ZoieSegmentReader;
import proj.zoie.api.indexing.AbstractZoieIndexable;
import proj.zoie.api.indexing.IndexingEventListener;
import proj.zoie.api.indexing.ZoieIndexable;
import proj.zoie.api.indexing.ZoieIndexable.IndexingReq;

public abstract class LuceneIndexDataLoader implements DataConsumer
{
	private static final Logger log = Logger.getLogger(LuceneIndexDataLoader.class);
	protected final Analyzer _analyzer;
	protected final Similarity _similarity;
	protected final SearchIndexManager _idxMgr;
	protected final Comparator _versionComparator;
	private Filter _purgeFilter;

  private final Queue _lsnrList;

	protected LuceneIndexDataLoader(Analyzer analyzer, Similarity similarity,SearchIndexManager idxMgr,Comparator versionComparator,Queue lsnrList) {
		_analyzer = analyzer;
		_similarity = similarity;
		_idxMgr=idxMgr;
		_versionComparator = versionComparator;
		_purgeFilter = null;
		_lsnrList = lsnrList;
	}
	
	public void setPurgeFilter(Filter purgeFilter){
		_purgeFilter = purgeFilter;
	}

	protected abstract BaseSearchIndex getSearchIndex();
	
    protected abstract void propagateDeletes(LongSet delDocs) throws IOException;
    protected abstract void commitPropagatedDeletes() throws IOException;
    
    private final void purgeDocuments(){
    	if (_purgeFilter!=null){
    		BaseSearchIndex idx = getSearchIndex();
    		IndexReader writeReader = null;
    		log.info("purging docs started...");
    		int count = 0;
    		long start = System.currentTimeMillis();

        ZoieIndexReader reader = null;
    		try{
          synchronized(idx)
          {
            reader = idx.openIndexReader();
            if (reader != null)
              reader.incZoieRef();
          }

    		  writeReader = idx.openIndexReaderForDelete();

    			DocIdSetIterator iter = _purgeFilter.getDocIdSet(reader).iterator();
    			
    			int doc;
    			while((doc = iter.nextDoc()) != DocIdSetIterator.NO_MORE_DOCS){
    				count++;
    				writeReader.deleteDocument(doc);
    			}
    		}
    		catch(Throwable th){
    			log.error("problem creating purge filter: "+th.getMessage(),th);
    		}
    		finally{
          if (reader != null)
            reader.decZoieRef();
    			if (writeReader!=null){
    				try{
    				  writeReader.close();
    				}
    				catch(IOException ioe){
    					ZoieHealth.setFatal();
    					log.error(ioe.getMessage(),ioe);
    				}
    			}
    		}
    		
    		long end = System.currentTimeMillis();
    		log.info("purging docs completed in "+(end-start)+"ms");
    		log.info("total docs purged: " +count);
    	}
    }

	/**
	 * @Precondition incoming events sorted by version number
	 * 
every event in the events collection must be non-null * * @see proj.zoie.api.DataConsumer#consume(java.util.Collection) * */ public void consume(Collection> events) throws ZoieException { if (events == null) return; int eventCount = events.size(); if (eventCount==0){ return; } BaseSearchIndex idx = getSearchIndex(); if (idx==null){ throw new ZoieException("trying to consume to null index"); } Long2ObjectMap> addList = new Long2ObjectOpenHashMap>(); String version = idx.getVersion(); // current version LongSet delSet =new LongOpenHashSet(); try { for(DataEvent evt : events) { if (evt == null) continue; //version = Math.max(version, evt.getVersion()); version = version == null ? evt.getVersion() : (_versionComparator.compare(version,evt.getVersion()) < 0 ? evt.getVersion() : version); if (evt instanceof MarkerDataEvent) continue; // interpret and get get the indexable instance ZoieIndexable indexable = evt.getData(); if (indexable == null || indexable.isSkip()) continue; long uid = indexable.getUID(); delSet.add(uid); addList.remove(uid); if (!(indexable.isDeleted() || evt.isDelete())) // update event { try { IndexingReq[] reqs = indexable.buildIndexingReqs(); for (IndexingReq req : reqs) { if (req != null) // if doc is provided, interpret as // a delete, e.g. update with // nothing { Document doc = req.getDocument(); if (doc!=null){ ZoieSegmentReader.fillDocumentID(doc, uid); if (indexable.isStorable()){ byte[] bytes = indexable.getStoreValue(); if (bytes!=null){ doc.add(new Field(AbstractZoieIndexable.DOCUMENT_STORE_FIELD,bytes)); } } } // add to the insert list List docList = addList.get(uid); if (docList == null) { docList = new LinkedList(); addList.put(uid, docList); } docList.add(req); } } } catch (Exception ex) { log.error("Couldn't index the event with uid - " + uid, ex); } } // hao: we do not need the following few lines //else { //addList.remove(uid); //} } List docList = new ArrayList(addList.size()); for (List tmpList : addList.values()) { docList.addAll(tmpList); } purgeDocuments(); idx.updateIndex(delSet, docList, _analyzer,_similarity); propagateDeletes(delSet); synchronized(_idxMgr) { idx.refresh(); commitPropagatedDeletes(); } } catch (IOException ioe) { ZoieHealth.setFatal(); log.error("Problem indexing batch: " + ioe.getMessage(), ioe); } finally { try { if (idx != null) { idx.setVersion(version); // update the version of the idx.incrementEventCount(eventCount); // index } } catch (Exception e) // catch all exceptions, or it would screw // up jobs framework { log.warn(e.getMessage()); } finally { if (idx instanceof DiskSearchIndex) { log.info("disk indexing requests flushed."); } } } } public void loadFromIndex(RAMSearchIndex ramIndex) throws ZoieException { try { // hao: get disk search idx, BaseSearchIndex idx = getSearchIndex(); //hao: merge the realyOnly ram idx with the disk idx idx.loadFromIndex(ramIndex); // duplicate clearDeletes, delDoc may change for realtime delete after loadFromIndex() // idx.clearDeletes(); // clear old deletes as deletes are written to the lucene index // hao: update the disk idx reader idx.refresh(); // load the index reader purgeDocuments(); idx.markDeletes(ramIndex.getDelDocs()); // inherit deletes idx.commitDeletes(); idx.incrementEventCount(ramIndex.getEventsHandled()); //Map commitData = idx.getCommitData(); //System.out.println("disk vesion from the commit data" + commitData); //V newVersion = idx.getVersion().compareTo(ramIndex.getVersion()) < 0 ? ramIndex.getVersion(): idx.getVersion(); String newVersion = idx.getVersion() == null ? ramIndex.getVersion() : (_versionComparator.compare(idx.getVersion(), ramIndex.getVersion()) < 0 ? ramIndex.getVersion(): idx.getVersion()); idx.setVersion(newVersion); //System.out.println("disk verson from the signature" + newVersion.toString()); //idx.setVersion(Math.max(idx.getVersion(), ramIndex.getVersion())); } catch(IOException ioe) { ZoieHealth.setFatal(); log.error("Problem copying segments: " + ioe.getMessage(), ioe); throw new ZoieException(ioe); } } /** * @return the version number of the search index. */ public String getVersion() { BaseSearchIndex idx = getSearchIndex(); String version = null; if (idx != null) version = idx.getVersion(); return version; } /** * @return the version comparator. */ public Comparator getVersionComparator() { return _versionComparator; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy