org.graylog.events.search.MoreSearch Maven / Gradle / Ivy
/**
* This file is part of Graylog.
*
* Graylog is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* Graylog is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with Graylog. If not, see .
*/
package org.graylog.events.search;
import com.codahale.metrics.MetricRegistry;
import com.google.auto.value.AutoValue;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Stopwatch;
import com.google.common.collect.ImmutableSet;
import io.searchbox.client.JestClient;
import io.searchbox.core.Search;
import io.searchbox.core.SearchResult;
import io.searchbox.core.search.sort.Sort;
import io.searchbox.params.Parameters;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.index.query.BoolQueryBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.builder.SearchSourceBuilder;
import org.graylog.events.event.EventDto;
import org.graylog.events.processor.EventProcessorException;
import org.graylog.plugins.views.search.Parameter;
import org.graylog.plugins.views.search.Query;
import org.graylog.plugins.views.search.SearchJob;
import org.graylog.plugins.views.search.elasticsearch.ESQueryDecorators;
import org.graylog.plugins.views.search.elasticsearch.ElasticsearchQueryString;
import org.graylog.plugins.views.search.elasticsearch.IndexRangeContainsOneOfStreams;
import org.graylog.plugins.views.search.errors.EmptyParameterError;
import org.graylog.plugins.views.search.errors.SearchException;
import org.graylog2.Configuration;
import org.graylog2.database.NotFoundException;
import org.graylog2.indexer.IndexHelper;
import org.graylog2.indexer.IndexMapping;
import org.graylog2.indexer.IndexSetRegistry;
import org.graylog2.indexer.cluster.jest.JestUtils;
import org.graylog2.indexer.indices.Indices;
import org.graylog2.indexer.ranges.IndexRange;
import org.graylog2.indexer.ranges.IndexRangeService;
import org.graylog2.indexer.results.ResultMessage;
import org.graylog2.indexer.results.ScrollResult;
import org.graylog2.indexer.searches.Searches;
import org.graylog2.indexer.searches.Sorting;
import org.graylog2.plugin.Message;
import org.graylog2.plugin.indexer.searches.timeranges.TimeRange;
import org.graylog2.plugin.streams.Stream;
import org.graylog2.streams.StreamService;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.inject.Inject;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.stream.Collectors;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Strings.isNullOrEmpty;
import static java.util.Objects.requireNonNull;
import static org.elasticsearch.index.query.QueryBuilders.boolQuery;
import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
import static org.elasticsearch.index.query.QueryBuilders.queryStringQuery;
import static org.elasticsearch.index.query.QueryBuilders.termsQuery;
/**
* This class contains search helper for the events system.
*/
public class MoreSearch extends Searches {
private static final Logger LOG = LoggerFactory.getLogger(MoreSearch.class);
private final StreamService streamService;
private final IndexRangeService indexRangeService;
private final ScrollResult.Factory scrollResultFactory;
private final JestClient jestClient;
private final boolean allowLeadingWildcardSearches;
private final ESQueryDecorators esQueryDecorators;
@Inject
public MoreSearch(StreamService streamService,
Indices indices,
IndexRangeService indexRangeService,
IndexSetRegistry indexSetRegistry,
MetricRegistry metricRegistry,
ScrollResult.Factory scrollResultFactory,
JestClient jestClient,
Configuration configuration,
ESQueryDecorators esQueryDecorators) {
super(configuration, indexRangeService, metricRegistry, streamService, indices, indexSetRegistry, jestClient, scrollResultFactory);
this.streamService = streamService;
this.indexRangeService = indexRangeService;
this.scrollResultFactory = scrollResultFactory;
this.jestClient = jestClient;
this.allowLeadingWildcardSearches = configuration.isAllowLeadingWildcardSearches();
this.esQueryDecorators = esQueryDecorators;
}
/**
* Executes an events search for the given parameters.
*
* @param parameters event search parameters
* @param filterString filter string
* @param eventStreams event streams to search in
* @param forbiddenSourceStreams forbidden source streams
* @return the result
*/
// TODO: We cannot use Searches#search() at the moment because that method cannot handle multiple streams. (because of Searches#extractStreamId())
// We also cannot use the new search code at the moment because it doesn't do pagination.
Result eventSearch(EventsSearchParameters parameters, String filterString, Set eventStreams, Set forbiddenSourceStreams) {
checkArgument(parameters != null, "parameters cannot be null");
checkArgument(!eventStreams.isEmpty(), "eventStreams cannot be empty");
checkArgument(forbiddenSourceStreams != null, "forbiddenSourceStreams cannot be null");
final Sorting.Direction sortDirection = parameters.sortDirection() == EventsSearchParameters.SortDirection.ASC ? Sorting.Direction.ASC : Sorting.Direction.DESC;
final Sorting sorting = new Sorting(parameters.sortBy(), sortDirection);
final String queryString = parameters.query().trim();
final Set affectedIndices = getAffectedIndices(eventStreams, parameters.timerange());
final QueryBuilder query = (queryString.isEmpty() || queryString.equals("*")) ?
matchAllQuery() :
queryStringQuery(queryString).allowLeadingWildcard(allowLeadingWildcardSearches);
final BoolQueryBuilder filter = boolQuery()
.filter(query)
.filter(termsQuery(EventDto.FIELD_STREAMS, eventStreams))
.filter(requireNonNull(IndexHelper.getTimestampRangeFilter(parameters.timerange())));
if (!isNullOrEmpty(filterString)) {
filter.filter(queryStringQuery(filterString));
}
if (!forbiddenSourceStreams.isEmpty()) {
// If an event has any stream in "source_streams" that the calling search user is not allowed to access,
// the event must not be in the search result.
filter.filter(boolQuery().mustNot(termsQuery(EventDto.FIELD_SOURCE_STREAMS, forbiddenSourceStreams)));
}
final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
.query(filter)
.from((parameters.page() - 1) * parameters.perPage())
.size(parameters.perPage())
.sort(sorting.getField(), sorting.asElastic());
final Search.Builder searchBuilder = new Search.Builder(searchSourceBuilder.toString())
.addType(IndexMapping.TYPE_MESSAGE)
.addIndex(affectedIndices.isEmpty() ? Collections.singleton("") : affectedIndices)
.allowNoIndices(false)
.ignoreUnavailable(false);
if (LOG.isDebugEnabled()) {
LOG.debug("Query:\n{}", searchSourceBuilder.toString(new ToXContent.MapParams(Collections.singletonMap("pretty", "true"))));
LOG.debug("Execute search: {}", searchBuilder.build().toString());
}
final SearchResult searchResult = wrapInMultiSearch(searchBuilder.build(), () -> "Unable to perform search query");
@SuppressWarnings("unchecked") final List hits = searchResult.getHits(Map.class, false).stream()
.map(hit -> ResultMessage.parseFromSource(hit.id, hit.index, (Map) hit.source, hit.highlight))
.collect(Collectors.toList());
return Result.builder()
.results(hits)
.resultsCount(searchResult.getTotal())
.duration(tookMsFromSearchResult(searchResult))
.usedIndexNames(affectedIndices)
.executedQuery(searchSourceBuilder.toString())
.build();
}
private Set getAffectedIndices(Set streamIds, TimeRange timeRange) {
final SortedSet indexRanges = indexRangeService.find(timeRange.getFrom(), timeRange.getTo());
// We support an empty streams list and return all affected indices in that case.
if (streamIds.isEmpty()) {
return indexRanges.stream()
.map(IndexRange::indexName)
.collect(Collectors.toSet());
} else {
final Set streams = loadStreams(streamIds);
final IndexRangeContainsOneOfStreams indexRangeContainsOneOfStreams = new IndexRangeContainsOneOfStreams(streams);
return indexRanges.stream()
.filter(indexRangeContainsOneOfStreams)
.map(IndexRange::indexName)
.collect(Collectors.toSet());
}
}
/**
* This scrolls results for the given query, streams and time range from Elasticsearch. The result is passed to
* the given callback in batches. (using the given batch size)
*
* The search will continue until it is done, an error occurs or the search is stopped by setting the
* {@code continueScrolling} boolean to {@code false} from the {@link ScrollCallback}.
*
* TODO: Elasticsearch has a default limit of 500 concurrent scrolls. Every caller of this method should check
* if there is capacity to create a new scroll request. This can be done by using the ES nodes stats API.
* See: https://www.elastic.co/guide/en/elasticsearch/reference/current/search-request-scroll.html#scroll-search-context
*
* @param queryString the search query string
* @param streams the set of streams to search in
* @param timeRange the time range for the search
* @param batchSize the number of documents to retrieve at once
* @param resultCallback the callback that gets executed for each batch
*/
public void scrollQuery(String queryString, Set streams, Set queryParameters, TimeRange timeRange, int batchSize, ScrollCallback resultCallback) throws EventProcessorException {
final String scrollTime = "1m"; // TODO: Does scroll time need to be configurable?
final Set affectedIndices = getAffectedIndices(streams, timeRange);
try {
queryString = decorateQuery(queryParameters, timeRange, queryString);
} catch (SearchException e) {
if (e.error() instanceof EmptyParameterError) {
LOG.debug("Empty parameter from lookup table. Assuming non-matching query. Error: {}", e.getMessage());
return;
}
throw e;
}
final QueryBuilder query = (queryString.trim().isEmpty() || queryString.trim().equals("*")) ?
matchAllQuery() :
queryStringQuery(queryString).allowLeadingWildcard(allowLeadingWildcardSearches);
final BoolQueryBuilder filter = boolQuery()
.filter(query)
.filter(requireNonNull(IndexHelper.getTimestampRangeFilter(timeRange)));
// Filtering with an empty streams list doesn't work and would return zero results
if (!streams.isEmpty()) {
filter.filter(termsQuery(Message.FIELD_STREAMS, streams));
}
final SearchSourceBuilder searchSourceBuilder = new SearchSourceBuilder()
.query(filter)
.size(batchSize);
final Search.Builder searchBuilder = new Search.Builder(searchSourceBuilder.toString())
.addType(IndexMapping.TYPE_MESSAGE)
// Scroll requests contain the indices in the URL. If the list of indices is too long, the request can
// fail. There is no way of executing a scroll search without having the list of indices in the URL,
// as of this writing. (ES 6.8/7.1)
.addIndex(affectedIndices.isEmpty() ? Collections.singleton("") : affectedIndices)
// For correlation need the oldest messages to come in first
.addSort(new Sort("timestamp", Sort.Sorting.ASC))
.allowNoIndices(false)
.ignoreUnavailable(false)
.setParameter(Parameters.SCROLL, scrollTime);
if (LOG.isDebugEnabled()) {
LOG.debug("Query:\n{}", searchSourceBuilder.toString(new ToXContent.MapParams(Collections.singletonMap("pretty", "true"))));
LOG.debug("Execute search: {}", searchBuilder.build().toString());
}
final SearchResult result = JestUtils.execute(jestClient, searchBuilder.build(), () -> "Unable to scroll indices.");
final ScrollResult scrollResult = scrollResultFactory.create(result, searchSourceBuilder.toString(), scrollTime, Collections.emptyList());
final AtomicBoolean continueScrolling = new AtomicBoolean(true);
final Stopwatch stopwatch = Stopwatch.createStarted();
try {
ScrollResult.ScrollChunk scrollChunk = scrollResult.nextChunk();
while (continueScrolling.get() && scrollChunk != null) {
final List messages = scrollChunk.getMessages();
LOG.debug("Passing <{}> messages to callback", messages.size());
resultCallback.call(Collections.unmodifiableList(messages), continueScrolling);
// Stop if the resultCallback told us to stop
if (!continueScrolling.get()) {
break;
}
scrollChunk = scrollResult.nextChunk();
}
} catch (IOException e) {
throw new UncheckedIOException(e);
} finally {
try {
// Tell Elasticsearch that we are done with the scroll so it can release resources as soon as possible
// instead of waiting for the scroll timeout to kick in.
scrollResult.cancel();
} catch (Exception ignored) {
}
LOG.debug("Scrolling done - took {} ms", stopwatch.stop().elapsed(TimeUnit.MILLISECONDS));
}
}
public Set loadStreams(Set streamIds) {
// TODO: Use method from `StreamService` which loads a collection of ids (when implemented) to prevent n+1.
// Track https://github.com/Graylog2/graylog2-server/issues/4897 for progress.
Set streams = new HashSet<>();
for (String streamId : streamIds) {
try {
Stream load = streamService.load(streamId);
streams.add(load);
} catch (NotFoundException e) {
LOG.debug("Failed to load stream <{}>", streamId);
}
}
return streams;
}
/**
* Substitute query string parameters using ESQueryDecorators.
*/
private String decorateQuery(Set queryParameters, TimeRange timeRange, String queryString) {
// TODO
// We need to create a dummy SearchJob and a Query to use the decorator API.
// Maybe the decorate call could be refactored to make this easier.
org.graylog.plugins.views.search.Search search = org.graylog.plugins.views.search.Search.builder()
.parameters(ImmutableSet.copyOf(queryParameters))
.build();
SearchJob searchJob = new SearchJob("1234", search, "events backend");
Query dummyQuery = Query.builder()
.id("123")
.timerange(timeRange)
.query(ElasticsearchQueryString.builder().queryString(queryString).build())
.build();
return esQueryDecorators.decorate(queryString, searchJob, dummyQuery, ImmutableSet.of());
}
/**
* Callback that receives message batches from {@link #scrollQuery(String, Set, Set, TimeRange, int, ScrollCallback)}.
*/
public interface ScrollCallback {
/**
* This will be called with message batches from a scroll query. To stop the scroll query, the
* {@code continueScrolling} boolean can be set to {@code false}.
*
* @param messages the message batch
* @param continueScrolling the boolean that can be set to {@code false} to stop the scroll query
*/
void call(List messages, AtomicBoolean continueScrolling) throws EventProcessorException;
}
@VisibleForTesting
static String buildStreamFilter(Set streams) {
checkArgument(streams != null, "streams parameter cannot be null");
checkArgument(!streams.isEmpty(), "streams parameter cannot be empty");
final String streamFilter = streams.stream()
.map(String::trim)
.map(stream -> String.format(Locale.ENGLISH, "streams:%s", stream))
.collect(Collectors.joining(" OR "));
return "(" + streamFilter + ")";
}
@AutoValue
public static abstract class Result {
public abstract List results();
public abstract long resultsCount();
public abstract long duration();
public abstract Set usedIndexNames();
public abstract String executedQuery();
public static Builder builder() {
return new AutoValue_MoreSearch_Result.Builder();
}
@AutoValue.Builder
public abstract static class Builder {
public abstract Builder results(List results);
public abstract Builder resultsCount(long resultsCount);
public abstract Builder duration(long duration);
public abstract Builder usedIndexNames(Set usedIndexNames);
public abstract Builder executedQuery(String executedQuery);
public abstract Result build();
}
}
}