All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.euler.elasticsearch.ElasticsearchFragmentSink Maven / Gradle / Ivy

There is a newer version: 0.10.0
Show newest version
package com.github.euler.elasticsearch;

import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;

import org.elasticsearch.action.index.IndexRequest;
import org.elasticsearch.client.RestHighLevelClient;

import com.github.euler.core.ProcessingContext;
import com.github.euler.elasticsearch.req.InsertRequestFactory;
import com.github.euler.tika.FlushConfig;
import com.github.euler.tika.FragmentBatchSink;
import com.github.euler.tika.SinkResponse;

public class ElasticsearchFragmentSink extends ElasticsearchMetadataSink implements FragmentBatchSink {

    public ElasticsearchFragmentSink(RestHighLevelClient client, String index, FlushConfig flushConfig) {
        super(client, index, flushConfig, new InsertRequestFactory());
    }

    @Override
    protected Map buildSource(URI uri, ProcessingContext ctx) {
        Map metadata = new HashMap<>(super.buildSource(uri, ctx));
        metadata.put("join_field", "item");
        return metadata;
    }

    @Override
    public SinkResponse storeFragment(String parentId, int fragIndex, String fragment) {
        Map data = new HashMap<>();
        data.put("content", fragment);
        data.put("size", fragment.length());
        data.put("fragment-index", fragIndex);

        Map joinField = new HashMap(2);
        joinField.put("name", "fragment");
        joinField.put("parent", parentId);

        data.put("join_field", joinField);

        String fragId = UUID.randomUUID().toString();

        IndexRequest req = new IndexRequest(this.getIndex());
        req.routing(parentId);
        req.id(fragId);
        req.source(data);
        add(req);
        return flush(fragId, false);
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy