All Downloads are FREE. Search and download functionalities are using the official Maven repository.

proj.zoie.impl.indexing.internal.BatchedIndexDataLoader Maven / Gradle / Ivy

There is a newer version: 3.3.0
Show newest version
package proj.zoie.impl.indexing.internal;
/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;

import org.apache.log4j.Logger;
import org.apache.lucene.index.IndexReader;

import proj.zoie.api.DataConsumer;
import proj.zoie.api.LifeCycleCotrolledDataConsumer;
import proj.zoie.api.ZoieException;
import proj.zoie.api.ZoieHealth;
import proj.zoie.api.indexing.AbstractZoieIndexable;
import proj.zoie.api.indexing.IndexingEventListener;
import proj.zoie.api.indexing.IndexingEventListener.IndexingEvent;
import proj.zoie.api.indexing.ZoieIndexable;
import proj.zoie.api.indexing.ZoieIndexableInterpreter;
import proj.zoie.impl.indexing.IndexUpdatedEvent;
import proj.zoie.impl.indexing.IndexingThread;

/**
 * Runs a background thread that flushes incoming data events in batch to the background DataConsumer.
 * Incoming data is buffered first.
 * A flush is carried out when the batch size is significant, 
 * a client requesting a flush, or significant amount of time has passed.
 * The data is flushed to the underlying dataloader, which is a DataConsumer.
 * When incoming data comes in too fast, the thread sending data will be put on hold.
 * This acts as incoming data throttling.
 * 
 * @param 
 * @param 
 */
public class BatchedIndexDataLoader implements LifeCycleCotrolledDataConsumer {

	protected int _batchSize;
	protected long _delay;
	protected final DataConsumer _dataLoader;
	protected List> _batchList;
	protected final LoaderThread _loadMgrThread;
	protected long _lastFlushTime;
	protected int _eventCount;
	protected int _maxBatchSize;
	protected volatile boolean _stop;
	protected boolean _flush;
	protected final SearchIndexManager _idxMgr;
	protected final ZoieIndexableInterpreter _interpreter;
	private final Queue _lsnrList;
	  
	  private static Logger log = Logger.getLogger(BatchedIndexDataLoader.class);
	  
	  /**
	   * @param dataLoader
	   * @param batchSize
	   * @param maxBatchSize
	   * @param delay
	   * @param idxMgr
	   * @param lsnrList the list of IndexingEventListeners. This should be a Synchronized list if the content of this list is mutable.
	   */
	  public BatchedIndexDataLoader(DataConsumer dataLoader, int batchSize,int maxBatchSize,long delay,
                                    SearchIndexManager idxMgr,
                                    ZoieIndexableInterpreter interpreter,
                                    Queue lsnrList)
	  {
	    _maxBatchSize=Math.max(maxBatchSize, batchSize);
	    _batchSize=Math.min(batchSize, _maxBatchSize);
	    _delay=delay;
	    _dataLoader=dataLoader;
	    _batchList=new LinkedList>();
	    _lastFlushTime=0L;
	    _eventCount=0;
	    _loadMgrThread=new LoaderThread();
	    _loadMgrThread.setName("disk indexer data loader");
	    _stop=false;
	    _flush=false;
	    _idxMgr = idxMgr;
	    _interpreter = interpreter;
	    _lsnrList = lsnrList;
      log.info("constructor: _maxBatchSize: " + _maxBatchSize + " _batchSize: " + _batchSize +" _delay: " + _delay);
	  }
	  
	  protected final void fireIndexingEvent(IndexingEvent evt){
		  if (_lsnrList!=null && _lsnrList.size() > 0){
  		    synchronized(_lsnrList) {
  			  for (IndexingEventListener lsnr : _lsnrList){
  				  try{
  				    lsnr.handleIndexingEvent(evt);
  				  }
  				  catch(Exception e){
  					  log.error(e.getMessage(),e);
  				  }
  			  }
  		    }
		  }
	  }
	  
	  protected final void fireNewVersionEvent(String newVersion){
		  if (_lsnrList!=null && _lsnrList.size() > 0){
  		    synchronized(_lsnrList) {
  			  for (IndexingEventListener lsnr : _lsnrList){
  				  try{
  				    lsnr.handleUpdatedDiskVersion(newVersion);
  				  }
  				  catch(Exception e){
  					  log.error(e.getMessage(),e);
  				  }
  			  }
  		    }
		  }
	  }
	  
	  public synchronized int getMaxBatchSize()
	  {
	    return _maxBatchSize;
	  }
	  
	  public synchronized void setMaxBatchSize(int maxBatchSize)
	  {
	    _maxBatchSize = Math.max(maxBatchSize, _batchSize);
	    _batchSize = Math.min(_batchSize, _maxBatchSize);
	    log.info("setMaxBatchSize: " + _maxBatchSize);
	  }
	  
	  public synchronized int getBatchSize()
	  {
	    return _batchSize;
	  }
	  
	  public synchronized void setBatchSize(int batchSize)
	  {
	    _batchSize=Math.min(Math.max(1, batchSize), _maxBatchSize);
      log.info("setBatchSize: " + _batchSize);
	  }
	  
	  public synchronized long getDelay()
	  {
	    return _delay;
	  }
	  
	  public synchronized void setDelay(long delay)
	  {
	    _delay=delay;
	    log.info("setDelay: " + _delay);
	  }
	  
	  public synchronized int getEventCount()
	  {
	    return _eventCount;
	  }
	  
	  /**
	   * 
	   * @see proj.zoie.api.DataConsumer#consume(java.util.Collection)
	   */
	  public void consume(Collection> events) throws ZoieException
	  {
	    if (events != null)
	    {
	      ArrayList> indexableList =
	          new ArrayList>(events.size());
	      Iterator> iter = events.iterator();
	      while (iter.hasNext())
	      {
	        try
	        {
	          DataEvent event = iter.next();
	          ZoieIndexable indexable = ((ZoieIndexableInterpreter) _interpreter).convertAndInterpret(event.getData());
	          DataEvent newEvent = new DataEvent(indexable,
                                                                             event.getVersion(),
                                                                             event.isDelete());
	          indexableList.add(newEvent);
	        }
	        catch (Exception e)
	        {
	          ZoieHealth.setFatal();
	          log.error(e.getMessage(), e);
	        }
	      }

	      synchronized (this) // this blocks the batch disk loader thread while indexing to RAM
	      {
	        while (_batchList.size() > _maxBatchSize)
	        {
	          // check if load manager thread is alive
	          if(_loadMgrThread == null || !_loadMgrThread.isAlive())
	          {
	            throw new ZoieException("fatal: indexing thread loader manager has stopped");
	          }
	          
	          try
	          {
	            this.wait(60000); // 1 min
	          }
	          catch (InterruptedException e)
	          {
	            continue;
	          }
	        }
	        _eventCount += indexableList.size();
	        _batchList.addAll(indexableList);
	        this.notifyAll();
	      }
	    }
	  }
	  
      public synchronized int getCurrentBatchSize()
      {
        return (_batchList != null ? _batchList.size() : 0);
      }
      
      /**
       * This method needs to be called within a synchronized block on 'this'.
       * @return the list of data events already received. A new list is created to receive new data events.
       */
      protected List> getBatchList()
	  {
        List> tmpList=_batchList;
        _batchList=new LinkedList>();
        return tmpList;
	  }
	  
	  /**
	   * Wait for timeOut amount of time for the indexing thread to process data events.
	   * If there are still remaining unprocessed events by the end of timeOut duration,
	   * a ZoieException is thrown.
	   * @param timeOut a timeout value in milliseconds.
	   * @throws ZoieException
	   */
	  public void flushEvents(long timeOut) throws ZoieException
	  {
	    synchronized(this)
	    {
	      while(_eventCount>0)
	      {
	        _flush=true;
	        this.notifyAll();
	        long now1 = System.currentTimeMillis();
		    
	        if (timeOut<=0)
	        {
	          log.error("sync timed out");
	          throw new ZoieException("timed out");          
	        }
	        try
	        {
	          long waittime = Math.min(200, timeOut);
	          this.wait(waittime);
	        }
	        catch (InterruptedException e)
	        {
	          throw new ZoieException(e.getMessage());
	        }
	        
	        long now2 = System.currentTimeMillis();
	        
	        timeOut -= (now2 - now1);
	      }
	    }
	  }

	  /**
	   * Used by the indexing thread to flush incoming data events in batch.
	   * A flush is carried out when the batch size is significant, 
	   * a client requesting a flush, or significant amount of time has passed.
	   * The data is flushed to the underlying dataloader, which is a DataConsumer.
	   */
	  protected void processBatch()
	  {
        List> tmpList=null;
        long now=System.currentTimeMillis();
        long duration=now-_lastFlushTime;

        String currentVersion;
	      
	    try{
	      currentVersion = _idxMgr.getCurrentDiskVersion();
	    }
	    catch(IOException ioe){
	      currentVersion = null;
	    }
	    
        synchronized(this)
        {
          while(_batchList.size()<_batchSize && !_stop && !_flush && duration<_delay)
          {
            try
            {
              this.wait(_delay - duration);
            }
            catch (InterruptedException e)
            {
              log.warn(e.getMessage());
            }
            now=System.currentTimeMillis();
            duration=now-_lastFlushTime;
          }
          _flush=false;
          _lastFlushTime=now;

          if (_batchList.size()>0)
          {
            // change the status and get the batch list
            // this has to be done in the block synchronized on BatchIndexDataLoader
            _idxMgr.setDiskIndexerStatus(SearchIndexManager.Status.Working);
            tmpList = getBatchList();
          }
        }
        
        if (tmpList != null)
        {
          long t1=System.currentTimeMillis();
          int eventCount = tmpList.size();
          Comparator versioComparator = _idxMgr.getVersionComparator();
          for (DataEvent evt : tmpList){
            String newVersion = evt.getVersion();
            if (currentVersion==null){
              currentVersion = newVersion;
            }
            else{
              if (versioComparator.compare(currentVersion, newVersion)<0){
                currentVersion = newVersion;
              }
            }
          }
          try
          {
            _dataLoader.consume(tmpList);
          }
          catch (ZoieException e)
          {
            ZoieHealth.setFatal();
            log.error(e.getMessage(),e);
          }
          finally
          {
            long t2=System.currentTimeMillis();
            synchronized(this)
            {
              _eventCount -= eventCount;
              this.notifyAll();
              log.info(this+" flushed batch of "+eventCount+" events to disk indexer, took: "+(t2-t1)+" current event count: "+_eventCount);
             
              IndexUpdatedEvent evt = new IndexUpdatedEvent(eventCount,t1,t2,_eventCount);
              fireIndexingEvent(evt);
              try{
                String oldVersion = _idxMgr.getCurrentDiskVersion();
                if (currentVersion!=null && !currentVersion.equals(oldVersion)){
                	fireNewVersionEvent(currentVersion);
                }
              }
              catch(IOException ioe){
            	 log.error(ioe.getMessage(),ioe); 
              }
            }
          }
        }
        else
        {
          log.debug("batch size is 0");
        }
	  }
	  
	  protected class LoaderThread extends IndexingThread
	  {		  
	    LoaderThread()
	    {
	      super("disk indexer data loader");
	    }
	    
	    public void run()
	    {
	      while(!_stop)
	      {
	        processBatch();
	      }
	    }
	  }
	  
	  /**
	   * Starts the build-in indexing thread.
	   */
	  public void start()
	  {
	    _loadMgrThread.setName(String.valueOf(this));
	    _loadMgrThread.start();
	  }

      /**
       * Shutdown the build-in indexing thread and wait until it dies.
       */
	  public void stop()
	  {
	    synchronized(this)
	    {
	      _stop = true;
	      this.notifyAll();
	    }
	    try 
	    {
			_loadMgrThread.join();
		} catch (InterruptedException e) {
			log.error(e.getMessage(),e);
		}
	  }

	  protected static class ZoieIndexableDecorator extends AbstractZoieIndexable
	  {
	    private final ZoieIndexable _inner;
	    private ZoieIndexableDecorator(ZoieIndexable inner)
	    {
	      _inner = inner;
	    }

	    public static ZoieIndexableDecorator decorate(ZoieIndexable inner)
	    {
	      return (inner == null ? null : new ZoieIndexableDecorator(inner));
	    }
	    
	    

	    @Override
		public IndexingReq[] buildIndexingReqs() {
			return _inner.buildIndexingReqs();
		}

	    public long getUID() {
	      return _inner.getUID();
	    }

	    public boolean isDeleted() {
	      return _inner.isDeleted();
	    }

	    public boolean isSkip() {
	      return _inner.isSkip();
	    }

	  }
	  
	public String getVersion()
	{
	  throw new UnsupportedOperationException();
	}

	public Comparator getVersionComparator()
	{
	  throw new UnsupportedOperationException();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy