All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.culturegraph.mf.search.index.BatchIndexer Maven / Gradle / Ivy

package org.culturegraph.mf.search.index;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**

 * @author Markus Michael Geipel
 *
 */
public final class BatchIndexer{

	public static final String DEFAULT_FIELD = "all";
	private static final Logger LOG = LoggerFactory.getLogger(BatchIndexer.class);
	private static final int DEFAUT_BATCH_SIZE =  10000;
	private static final String ID_NAME = "_id";
	private final IndexWriter indexWriter;

	private int batchSize = DEFAUT_BATCH_SIZE;
	private final List docBuffer = new ArrayList();
	private Document currentDoc;
	private int count;
	

	public BatchIndexer(final IndexWriter indexWriter) {
			this.indexWriter = indexWriter;
	}
	
	public int getCount() {
		return count;
	}
	
	public void startDocument(final String identifier) {
		currentDoc = new Document();
		add(new Field(ID_NAME, identifier, Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
	}
	
	public void startDocument() {
		currentDoc = new Document();
	}

	public void endDocument() {
		if(!currentDoc.getFields().isEmpty()){
			docBuffer.add(currentDoc);
			++count;
		}
		
		if(count%batchSize==0){
			flush();			
		}
	}

	public void add(final Field field) {
		//LOG.info("adding field " + field);
		currentDoc.add(field);
	}
	
	public void flush() {
		try {
			indexWriter.addDocuments(docBuffer);
			docBuffer.clear();
			LOG.info(count + " records indexed");
		} catch (CorruptIndexException e) {
			throw new IndexException(e);
		} catch (IOException e) {
			throw new IndexException(e);
		}
	}
	

	public void setBatchSize(final int batchSize) {
		this.batchSize = batchSize;
	}

	public int getBatchSize() {
		return batchSize;
	}

	public void close()  {
		flush();

		try {
			LOG.info("commiting index");
			indexWriter.commit();

		} catch (CorruptIndexException e) {
			throw new IndexException(e);
		} catch (IOException e) {
			throw new IndexException(e);
		}
	}

	public IndexWriter getIndexWriter() {
		return indexWriter;
		
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy