
com.github.nicosensei.elasticindexbatch.IndexBatchJob Maven / Gradle / Ivy
package com.github.nicosensei.elasticindexbatch;
import java.util.Collection;
import java.util.Map;
import org.elasticsearch.action.bulk.BulkItemResponse;
import org.elasticsearch.action.bulk.BulkRequestBuilder;
import org.elasticsearch.action.bulk.BulkResponse;
import org.elasticsearch.action.index.IndexRequestBuilder;
import org.elasticsearch.client.transport.TransportClient;
import org.elasticsearch.common.xcontent.XContentBuilder;
import com.github.nicosensei.textbatch.Tool;
import com.github.nicosensei.textbatch.ToolException;
import com.github.nicosensei.textbatch.job.InputFileReader;
import com.github.nicosensei.textbatch.job.InputLine;
import com.github.nicosensei.textbatch.job.Job;
/**
* @author ngiraud
*
*/
public abstract class IndexBatchJob extends Job {
Tool tool = Tool.getInstance();
private TransportClient client;
private final String indexName;
private final String documentTypeName;
protected IndexBatchJob(
final InputFileReader input,
final IndexBatchState state,
final TransportClient client,
final String indexName,
final String documentTypeName) {
super(input, state);
this.indexName = indexName;
this.documentTypeName = documentTypeName;
this.client = client;
}
@Override
public IndexBatchState getProgress() {
return (IndexBatchState) super.getProgress();
}
protected abstract boolean canIngest(I line) throws ToolException;
@Override
protected void processLine(I line) throws ToolException {
if (!canIngest(line)) {
return;
}
buildBulkElement(line);
}
protected abstract void buildBulkElement(I line) throws ToolException;
protected abstract Collection getBulkContents();
protected abstract void cleanBulkContents() throws ToolException;
@Override
protected void sectionComplete() throws ToolException {
Collection bulkContents = getBulkContents();
if (bulkContents.isEmpty()) {
return;
}
BulkRequestBuilder bulk = client.prepareBulk();
for (D doc : bulkContents) {
IndexRequestBuilder indexReqBuilder = client.prepareIndex(
indexName,
documentTypeName,
doc.getDocumentId());
switch (getDocumentSourceType()) {
case documentAsMap:
indexReqBuilder.setSource(getDocumentAsMap(doc));
break;
case xContentBuilder:
indexReqBuilder.setSource(getDocumentContentBuilder(doc));
break;
}
bulk.add(indexReqBuilder);
}
IndexBatchState state = getProgress();
try {
BulkResponse bulkResponse = bulk.execute().actionGet();
if (bulkResponse.hasFailures()) {
tool.logWarning(bulkResponse.buildFailureMessage());
for (BulkItemResponse itemResp : bulkResponse.getItems()) {
if (itemResp.isFailed()) {
if (state.getLinesSkipped() >= IndexBatch.SKIP_LIMIT) {
throw new SkipLimitExceededException(IndexBatch.SKIP_LIMIT);
}
getProgress().notifyLineSkipped();
tool.logWarning("Skipped item " + itemResp.toString());
}
}
}
} finally {
cleanBulkContents();
}
}
@Override
protected void jobComplete() {
}
public enum DocumentSourceType {
documentAsMap,
xContentBuilder
}
protected abstract DocumentSourceType getDocumentSourceType();
protected abstract Map getDocumentAsMap(D doc) throws ToolException;
protected abstract XContentBuilder getDocumentContentBuilder(D doc) throws ToolException;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy