org.graylog2.indexer.searches.Searches Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of graylog2-server Show documentation
Graylog
There is a newer version: 6.1.4
/*
 * Copyright (C) 2020 Graylog, Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the Server Side Public License, version 1,
 * as published by MongoDB, Inc.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * Server Side Public License for more details.
 *
 * You should have received a copy of the Server Side Public License
 * along with this program. If not, see
 * .
 */
package org.graylog2.indexer.searches;

import com.codahale.metrics.Counter;
import com.codahale.metrics.Histogram;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Timer;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableSortedSet;
import org.graylog2.database.NotFoundException;
import org.graylog2.indexer.IndexSet;
import org.graylog2.indexer.IndexSetRegistry;
import org.graylog2.indexer.indices.Indices;
import org.graylog2.indexer.ranges.IndexRange;
import org.graylog2.indexer.ranges.IndexRangeService;
import org.graylog2.indexer.results.ChunkedResult;
import org.graylog2.indexer.results.CountResult;
import org.graylog2.indexer.results.FieldStatsResult;
import org.graylog2.indexer.results.SearchResult;
import org.graylog2.indexer.searches.timeranges.TimeRanges;
import org.graylog2.plugin.indexer.searches.timeranges.TimeRange;
import org.graylog2.plugin.streams.Stream;
import org.graylog2.streams.StreamService;

import javax.annotation.Nullable;
import javax.inject.Inject;
import javax.inject.Singleton;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.SortedSet;
import java.util.concurrent.TimeUnit;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;

import static com.codahale.metrics.MetricRegistry.name;
import static com.google.common.base.Strings.isNullOrEmpty;
import static java.util.Objects.requireNonNull;
import static org.graylog2.indexer.EventIndexTemplateProvider.EVENT_TEMPLATE_TYPE;
import static org.graylog2.indexer.MessageIndexTemplateProvider.MESSAGE_TEMPLATE_TYPE;

@Singleton
public class Searches {
    private static final Pattern filterStreamIdPattern = Pattern.compile("^(.+[^\\p{Alnum}])?streams:([\\p{XDigit}]+)");

    private final IndexRangeService indexRangeService;
    private final Timer esRequestTimer;
    private final Histogram esTimeRangeHistogram;
    private final Counter esTotalSearchesCounter;
    private final StreamService streamService;
    private final Indices indices;
    private final IndexSetRegistry indexSetRegistry;
    private final SearchesAdapter searchesAdapter;

    @Inject
    public Searches(IndexRangeService indexRangeService,
                    MetricRegistry metricRegistry,
                    StreamService streamService,
                    Indices indices,
                    IndexSetRegistry indexSetRegistry,
                    SearchesAdapter searchesAdapter) {
        this.indexRangeService = requireNonNull(indexRangeService, "indexRangeService");

        this.esRequestTimer = metricRegistry.timer(name(Searches.class, "elasticsearch", "requests"));
        this.esTimeRangeHistogram = metricRegistry.histogram(name(Searches.class, "elasticsearch", "ranges"));
        this.esTotalSearchesCounter = metricRegistry.counter(name(Searches.class, "elasticsearch", "total-searches"));
        this.streamService = requireNonNull(streamService, "streamService");
        this.indices = requireNonNull(indices, "indices");
        this.indexSetRegistry = requireNonNull(indexSetRegistry, "indexSetRegistry");
        this.searchesAdapter = searchesAdapter;
    }

    public CountResult count(String query, TimeRange range) {
        return count(query, range, null);
    }

    public CountResult count(String query, TimeRange range, String filter) {
        final Set affectedIndices = determineAffectedIndices(range, filter);
        if (affectedIndices.isEmpty()) {
            return CountResult.empty();
        }

        final CountResult result = searchesAdapter.count(affectedIndices, query, range, filter);

        recordEsMetrics(result.tookMs(), range);

        return result;
    }

    @Deprecated
    public ChunkedResult scroll(String query, TimeRange range, int limit, int offset, List fields, String filter, int batchSize) {
        final Set affectedIndices = determineAffectedIndices(range, filter);
        final Set indexWildcards = indexSetRegistry.getForIndices(affectedIndices).stream()
                .map(IndexSet::getIndexWildcard)
                .collect(Collectors.toSet());

        final Sorting sorting = new Sorting("_doc", Sorting.Direction.ASC);

        ChunkCommand.Builder scrollCommandBuilder = ChunkCommand.builder()
                .query(query)
                .range(range)
                .offset(offset)
                .fields(fields)
                .filter(filter)
                .sorting(sorting)
                .indices(indexWildcards);

        // limit > 0 instead of ScrollCommand.NO_LIMIT is a fix for #9817, the caller of this method are only in the legacy-API
        scrollCommandBuilder = limit > 0 ? scrollCommandBuilder.limit(limit) : scrollCommandBuilder;
        scrollCommandBuilder = batchSize != ChunkCommand.NO_BATCHSIZE ? scrollCommandBuilder.batchSize(batchSize) : scrollCommandBuilder;

        final ChunkedResult result = searchesAdapter.scroll(scrollCommandBuilder.build());

        recordEsMetrics(result.tookMs(), range);

        return result;
    }

    public SearchResult search(String query, TimeRange range, int limit, int offset, Sorting sorting) {
        return search(query, null, range, limit, offset, sorting);
    }

    public SearchResult search(String query, String filter, TimeRange range, int limit, int offset, Sorting sorting) {
        final SearchesConfig searchesConfig = SearchesConfig.builder()
                .query(query)
                .filter(filter)
                .range(range)
                .limit(limit)
                .offset(offset)
                .sorting(sorting)
                .build();

        return search(searchesConfig);
    }

    public SearchResult search(SearchesConfig config) {
        final Set indexRanges = determineAffectedIndicesWithRanges(config.range(), config.filter());

        final Set indices = extractIndexNamesFromIndexRanges(indexRanges);
        final SearchResult result = searchesAdapter.search(indices, indexRanges, config);
        recordEsMetrics(result.tookMs(), config.range());
        return result;
    }

    public FieldStatsResult fieldStats(String field, String query, TimeRange range) {
        return fieldStats(field, query, null, range, true, true, true);
    }

    public FieldStatsResult fieldStats(String field,
                                       String query,
                                       String filter,
                                       TimeRange range,
                                       boolean includeCardinality,
                                       boolean includeStats,
                                       boolean includeCount) {
        final Set indices = indicesContainingField(determineAffectedIndices(range, filter), field);

        final FieldStatsResult result = searchesAdapter.fieldStats(query, filter, range, indices, field, includeCardinality, includeStats, includeCount);

        recordEsMetrics(result.tookMs(), range);
        return result;
    }

    private Set indicesContainingField(Set strings, String field) {
        return indices.getAllMessageFieldsForIndices(strings.toArray(new String[0]))
            .entrySet()
            .stream()
            .filter(entry -> entry.getValue().contains(field))
            .map(Map.Entry::getKey)
            .collect(Collectors.toSet());
    }

    private void recordEsMetrics(long tookMs, TimeRange range) {
        esTotalSearchesCounter.inc();

        esRequestTimer.update(tookMs, TimeUnit.MILLISECONDS);

        if (range != null) {
            esTimeRangeHistogram.update(TimeRanges.toSeconds(range));
        }
    }

    /**
     * Extracts the last stream id from the filter string passed as part of the elasticsearch query. This is used later
     * to pass to possibly existing message decorators for stream-specific configurations.
     *
     * The assumption is that usually (when listing/searching messages for a stream) only a single stream filter is passed.
     * When this is not the case, only the last stream id will be taked into account.
     *
     * This is currently a workaround. A better solution would be to pass the stream id which is supposed to be the scope
     * for a search query as a separate parameter.
     *
     * @param filter the filter string like "streams:xxxyyyzzz"
     * @return the optional stream id
     */
    public static Optional extractStreamId(String filter) {
        if (isNullOrEmpty(filter)) {
            return Optional.empty();
        }
        final Matcher streamIdMatcher = filterStreamIdPattern.matcher(filter);
        if (streamIdMatcher.find()) {
            return Optional.of(streamIdMatcher.group(2));
        }
        return Optional.empty();
    }

    @VisibleForTesting
    Set determineAffectedIndices(TimeRange range, @Nullable String filter) {
        return extractIndexNamesFromIndexRanges(determineAffectedIndicesWithRanges(range, filter));
    }

    private Set extractIndexNamesFromIndexRanges(Set indexRanges) {
        return indexRanges.stream()
                .map(IndexRange::indexName)
                .collect(Collectors.toSet());
    }

    @VisibleForTesting
    Set determineAffectedIndicesWithRanges(TimeRange range, @Nullable String filter) {
        final Optional streamId = extractStreamId(filter);
        IndexSet indexSet = null;
        // if we are searching in a stream, we are further restricting the indices using the currently
        // configure index set of that stream.
        // later on we will also test against each index range (we load all of them) to see if there are
        // additional index ranges that match, this can happen with restored archives or when the index set for
        // a stream has changed: a stream only knows about its currently configured index set, no the history
        if (streamId.isPresent()) {
            try {
                final Stream stream = streamService.load(streamId.get());
                indexSet = stream.getIndexSet();
            } catch (NotFoundException ignored) {
            }
        }

        final ImmutableSortedSet.Builder indices = ImmutableSortedSet.orderedBy(IndexRange.COMPARATOR);
        final SortedSet indexRanges = indexRangeService.find(range.getFrom(), range.getTo());
        final Set affectedIndexNames = indexRanges.stream().map(IndexRange::indexName).collect(Collectors.toSet());
        final Set eventIndexSets = indexSetRegistry.getForIndices(affectedIndexNames).stream()
                .filter(indexSet1 -> EVENT_TEMPLATE_TYPE.equals(indexSet1.getConfig().indexTemplateType().orElse(MESSAGE_TEMPLATE_TYPE)))
                .collect(Collectors.toSet());
        for (IndexRange indexRange : indexRanges) {
            // if we aren't in a stream search, we look at all the ranges matching the time range.
            if (indexSet == null && filter == null) {
                // Don't include the index range if it's for an event index set to avoid sorting issues.
                // See the following issues for details:
                // - https://github.com/Graylog2/graylog2-server/issues/6384
                // - https://github.com/Graylog2/graylog2-server/issues/6490
                if (eventIndexSets.stream().anyMatch(set -> set.isManagedIndex(indexRange.indexName()))) {
                    continue;
                }
                indices.add(indexRange);
                continue;
            }

            // A range applies to this search if either: the current index set of the stream matches or a previous index set matched.
            final boolean streamInIndexRange = streamId.isPresent() && indexRange.streamIds() != null && indexRange.streamIds().contains(streamId.get());
            final boolean streamInCurrentIndexSet = indexSet != null && indexSet.isManagedIndex(indexRange.indexName());

            if (streamInIndexRange) {
                indices.add(indexRange);
            }
            if (streamInCurrentIndexSet) {
                indices.add(indexRange);
            }
        }

        return indices.build();
    }
}