
com.digitalpebble.stormcrawler.elasticsearch.metrics.StatusMetricsBolt Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of storm-crawler-elasticsearch Show documentation
Show all versions of storm-crawler-elasticsearch Show documentation
Elasticsearch resources for StormCrawler
/**
* Licensed to DigitalPebble Ltd under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* DigitalPebble licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.digitalpebble.stormcrawler.elasticsearch.metrics;
import java.util.HashMap;
import java.util.Map;
import org.apache.storm.Config;
import org.apache.storm.metric.api.IMetric;
import org.apache.storm.task.OutputCollector;
import org.apache.storm.task.TopologyContext;
import org.apache.storm.topology.OutputFieldsDeclarer;
import org.apache.storm.topology.base.BaseRichBolt;
import org.apache.storm.tuple.Tuple;
import org.apache.storm.utils.TupleUtils;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.ListenableActionFuture;
import org.elasticsearch.action.search.SearchRequestBuilder;
import org.elasticsearch.action.search.SearchResponse;
import org.elasticsearch.index.query.QueryBuilders;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.digitalpebble.stormcrawler.elasticsearch.ElasticSearchConnection;
import com.digitalpebble.stormcrawler.persistence.Status;
import com.digitalpebble.stormcrawler.util.ConfUtils;
/**
* Queries the status index periodically to get the count of URLs per status.
* This bolt can be connected to the output of any other bolt and will not
* produce anything as output.
**/
public class StatusMetricsBolt extends BaseRichBolt {
private static final Logger LOG = LoggerFactory
.getLogger(StatusMetricsBolt.class);
private static final String ESBoltType = "status";
private static final String ESStatusIndexNameParamName = "es.status.index.name";
private static final String ESStatusDocTypeParamName = "es.status.doc.type";
private String indexName;
private String docType;
private ElasticSearchConnection connection;
private Map latestStatusCounts = new HashMap<>(5);
private int freqStats = 60;
private OutputCollector _collector;
@Override
public void prepare(Map stormConf, TopologyContext context,
OutputCollector collector) {
_collector = collector;
indexName = ConfUtils.getString(stormConf, ESStatusIndexNameParamName,
"status");
docType = ConfUtils.getString(stormConf, ESStatusDocTypeParamName,
"doc");
try {
connection = ElasticSearchConnection.getConnection(stormConf,
ESBoltType);
} catch (Exception e1) {
LOG.error("Can't connect to ElasticSearch", e1);
throw new RuntimeException(e1);
}
context.registerMetric("status.count", new IMetric() {
@Override
public Object getValueAndReset() {
return latestStatusCounts;
}
}, freqStats);
}
@Override
public Map getComponentConfiguration() {
Config conf = new Config();
conf.put(Config.TOPOLOGY_TICK_TUPLE_FREQ_SECS, freqStats);
return conf;
}
@Override
public void execute(Tuple input) {
_collector.ack(input);
// this bolt can be connected to anything
// we just want to trigger a new search when the input is a tick tuple
if (!TupleUtils.isTick(input)) {
return;
}
Status[] slist = new Status[] { Status.DISCOVERED, Status.ERROR,
Status.FETCH_ERROR, Status.FETCHED, Status.REDIRECTION };
SearchRequestBuilder build = connection.getClient()
.prepareSearch(indexName).setTypes(docType).setFrom(0)
.setSize(0).setExplain(false);
// should be faster than running the aggregations
for (Status s : slist) {
build.setQuery(QueryBuilders.termQuery("status", s.name()));
ListenableActionFuture future = build.execute();
future.addListener(new ActionListener() {
public void onResponse(SearchResponse response) {
long total = response.getHits().getTotalHits();
latestStatusCounts.put(s.name(), total);
}
public void onFailure(Exception e) {
LOG.error("Problem retrieving counts for status {}",
s.name(), e);
}
});
future.actionGet();
}
}
@Override
public void cleanup() {
connection.close();
}
@Override
public void declareOutputFields(OutputFieldsDeclarer declarer) {
// NONE - THIS BOLT DOES NOT GET CONNECTED TO ANY OTHERS
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy