com.digitalpebble.stormcrawler.elasticsearch.persistence.ScrollSpout Maven / Gradle / Ivy

Go to download

Show more of this group Show more artifacts with this name
Show all versions of storm-crawler-elasticsearch Show documentation

Elasticsearch resources for StormCrawler

There is a newer version: 2.11

Show newest version

/**
 * Licensed to DigitalPebble Ltd under one or more contributor license agreements. See the NOTICE
 * file distributed with this work for additional information regarding copyright ownership.
 * DigitalPebble licenses this file to You under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License. You may obtain a copy of the
 * License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * 
Unless required by applicable law or agreed to in writing, software distributed under the
 * License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
 * express or implied. See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.digitalpebble.stormcrawler.elasticsearch.persistence;

import com.digitalpebble.stormcrawler.Constants;
import com.digitalpebble.stormcrawler.Metadata;
import com.digitalpebble.stormcrawler.persistence.AbstractStatusUpdaterBolt;
import com.digitalpebble.stormcrawler.persistence.Status;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Queue;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.tuple.Fields;
import org.apache.storm.tuple.Values;
import org.apache.storm.utils.Utils;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.action.search.SearchScrollRequest;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.core.TimeValue;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.SearchHit;
import org.elasticsearch.search.SearchHits;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Reads all the documents from a shard and emits them on the status stream. Used for copying an
 * index.
 */
public class ScrollSpout extends AbstractSpout implements ActionListener {

    private String scrollId = null;
    private boolean hasFinished = false;

    private Queue queue = new LinkedList<>();

    private static final Logger LOG = LoggerFactory.getLogger(ScrollSpout.class);

    @Override
    // simplified version of the super method so that we can store the fields in
    // the
    // map of things being processed
    public void nextTuple() {
        synchronized (queue) {
            if (!queue.isEmpty()) {
                List