All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.nicosensei.elasticindexbatch.IndexBatchJob Maven / Gradle / Ivy

package com.github.nicosensei.elasticindexbatch;

import java.util.Collection;
import java.util.Map;

import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.xcontent.XContentBuilder;

import com.github.nicosensei.textbatch.Tool;
import com.github.nicosensei.textbatch.ToolException;
import com.github.nicosensei.textbatch.job.InputFileReader;
import com.github.nicosensei.textbatch.job.InputLine;
import com.github.nicosensei.textbatch.job.Job;

/**
 * @author ngiraud
 *
 */
public abstract class IndexBatchJob extends Job {
	
	Tool tool = Tool.getInstance();

	private TransportClient client;
	
	private final String indexName;
	
	private final String documentTypeName;
	
	protected IndexBatchJob(
			final InputFileReader input,
			final IndexBatchState state,
			final TransportClient client,
			final String indexName,
			final String documentTypeName) {
		super(input, state);
		
		this.indexName = indexName;
		this.documentTypeName = documentTypeName;
		this.client = client;
	}

	@Override
	public IndexBatchState getProgress() {
		return (IndexBatchState) super.getProgress();
	}
	
	protected abstract boolean canIngest(I line) throws ToolException;

	@Override
	protected void processLine(I line) throws ToolException {
		
		if (!canIngest(line)) {
            return;
        }
	
		buildBulkElement(line);
	}
	
	protected abstract void buildBulkElement(I line) throws ToolException;
	
	protected abstract Collection getBulkContents();
	
	protected abstract void cleanBulkContents() throws ToolException;
	
	@Override
	protected void sectionComplete() throws ToolException {
		Collection bulkContents = getBulkContents();
		
		if (bulkContents.isEmpty()) {
			return;
		}		
		
		BulkRequestBuilder bulk = client.prepareBulk();
		for (D doc : bulkContents) {
			IndexRequestBuilder indexReqBuilder = client.prepareIndex(
                    indexName,
                    documentTypeName,
                    doc.getDocumentId()); 
			switch (getDocumentSourceType()) {
				case documentAsMap: 
					indexReqBuilder.setSource(getDocumentAsMap(doc));
					break;
				case xContentBuilder:
					indexReqBuilder.setSource(getDocumentContentBuilder(doc));
					break;
			}
			bulk.add(indexReqBuilder);
		}
		
		IndexBatchState state = getProgress();
		
		try {
			BulkResponse bulkResponse = bulk.execute().actionGet();
			if (bulkResponse.hasFailures()) {
				tool.logWarning(bulkResponse.buildFailureMessage());
				for (BulkItemResponse itemResp : bulkResponse.getItems()) {
					if (itemResp.isFailed()) {
						if (state.getLinesSkipped() >= IndexBatch.SKIP_LIMIT) {
							throw new SkipLimitExceededException(IndexBatch.SKIP_LIMIT);
						}
						getProgress().notifyLineSkipped();
						tool.logWarning("Skipped item " + itemResp.toString());
					}
				}
			}
		} finally {
			cleanBulkContents();
		}
	}

	@Override
	protected void jobComplete() {
		
	}
	
	public enum DocumentSourceType {
		documentAsMap,
		xContentBuilder
	}
	
	protected abstract DocumentSourceType getDocumentSourceType();
	
	protected abstract Map getDocumentAsMap(D doc) throws ToolException;
	
	protected abstract XContentBuilder getDocumentContentBuilder(D doc) throws ToolException;
	
}