
com.digitalpebble.stormcrawler.elasticsearch.metrics.StatusMetricsBolt Maven / Gradle / Ivy
Show all versions of storm-crawler-elasticsearch Show documentation
/**
* Licensed to DigitalPebble Ltd under one or more contributor license agreements. See the NOTICE
* file distributed with this work for additional information regarding copyright ownership.
* DigitalPebble licenses this file to You under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License. You may obtain a copy of the
* License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
*
Unless required by applicable law or agreed to in writing, software distributed under the
* License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.digitalpebble.stormcrawler.elasticsearch.metrics;
import com.digitalpebble.stormcrawler.elasticsearch.ElasticSearchConnection;
import com.digitalpebble.stormcrawler.util.ConfUtils;
import java.util.HashMap;
import java.util.Map;
import org.apache.storm.Config;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.utils.TupleUtils;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.client.RequestOptions;
import org.elasticsearch.client.core.CountRequest;
import org.elasticsearch.client.core.CountResponse;
import org.elasticsearch.index.query.QueryBuilders;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Queries the status index periodically to get the count of URLs per status. This bolt can be
* connected to the output of any other bolt and will not produce anything as output.
*/
public class StatusMetricsBolt extends BaseRichBolt {
private static final Logger LOG = LoggerFactory.getLogger(StatusMetricsBolt.class);
private static final String ESBoltType = "status";
private static final String ESStatusIndexNameParamName = "es.status.index.name";
private String indexName;
private ElasticSearchConnection connection;
private Map latestStatusCounts = new HashMap<>(6);
private int freqStats = 60;
private OutputCollector _collector;
private transient StatusActionListener[] listeners;
private class StatusActionListener implements ActionListener {
private final String name;
private boolean ready = true;
public boolean isReady() {
return ready;
}
public void busy() {
this.ready = false;
}
StatusActionListener(String statusName) {
name = statusName;
}
@Override
public void onResponse(CountResponse response) {
ready = true;
LOG.debug("Got {} counts for status:{}", response.getCount(), name);
latestStatusCounts.put(name, response.getCount());
}
@Override
public void onFailure(Exception e) {
ready = true;
LOG.error("Failure when getting counts for status:{}", name, e);
}
}
@Override
public void prepare(
Map stormConf, TopologyContext context, OutputCollector collector) {
_collector = collector;
indexName = ConfUtils.getString(stormConf, ESStatusIndexNameParamName, "status");
try {
connection = ElasticSearchConnection.getConnection(stormConf, ESBoltType);
} catch (Exception e1) {
LOG.error("Can't connect to ElasticSearch", e1);
throw new RuntimeException(e1);
}
context.registerMetric(
"status.count",
() -> {
return latestStatusCounts;
},
freqStats);
listeners = new StatusActionListener[6];
listeners[0] = new StatusActionListener("DISCOVERED");
listeners[1] = new StatusActionListener("FETCHED");
listeners[2] = new StatusActionListener("FETCH_ERROR");
listeners[3] = new StatusActionListener("REDIRECTION");
listeners[4] = new StatusActionListener("ERROR");
listeners[5] = new StatusActionListener("TOTAL");
}
@Override
public Map getComponentConfiguration() {
Config conf = new Config();
conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, freqStats);
return conf;
}
@Override
public void execute(Tuple input) {
_collector.ack(input);
// this bolt can be connected to anything
// we just want to trigger a new search when the input is a tick tuple
if (!TupleUtils.isTick(input)) {
return;
}
for (StatusActionListener listener : listeners) {
// still waiting for results from previous request
if (!listener.isReady()) {
LOG.debug("Not ready to get counts for status {}", listener.name);
continue;
}
CountRequest request = new CountRequest(indexName);
if (!listener.name.equalsIgnoreCase("TOTAL")) {
SearchSourceBuilder sourceBuilder = new SearchSourceBuilder();
sourceBuilder.query(QueryBuilders.termQuery("status", listener.name));
request.source(sourceBuilder);
}
listener.busy();
connection.getClient().countAsync(request, RequestOptions.DEFAULT, listener);
}
}
@Override
public void cleanup() {
connection.close();
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// NONE - THIS BOLT DOES NOT GET CONNECTED TO ANY OTHERS
}
}