All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.graylog2.system.processing.DBProcessingStatusService Maven / Gradle / Ivy

/*
 * Copyright (C) 2020 Graylog, Inc.
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the Server Side Public License, version 1,
 * as published by MongoDB, Inc.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
 * Server Side Public License for more details.
 *
 * You should have received a copy of the Server Side Public License
 * along with this program. If not, see
 * .
 */
package org.graylog2.system.processing;

import com.github.joschi.jadconfig.util.Duration;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.collect.ImmutableList;
import com.mongodb.BasicDBObject;
import com.mongodb.MongoException;
import org.bson.types.ObjectId;
import org.graylog.scheduler.clock.JobSchedulerClock;
import org.graylog2.bindings.providers.MongoJackObjectMapperProvider;
import org.graylog2.database.MongoConnection;
import org.graylog2.database.MongoDBUpsertRetryer;
import org.graylog2.plugin.BaseConfiguration;
import org.graylog2.plugin.system.NodeId;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import org.mongojack.DBCursor;
import org.mongojack.DBQuery;
import org.mongojack.DBSort;
import org.mongojack.JacksonDBCollection;

import javax.inject.Inject;
import javax.inject.Named;
import java.util.Arrays;
import java.util.List;
import java.util.Optional;

import static org.graylog2.system.processing.ProcessingStatusDto.FIELD_UPDATED_AT;

/**
 * Manages the database collection for processing status.
 */
public class DBProcessingStatusService {
    public static final String COLLECTION_NAME = "processing_status";
    private static final String FIELD_WRITTEN_MESSAGES_1M = ProcessingStatusDto.FIELD_INPUT_JOURNAL + "." + ProcessingStatusDto.JournalInfo.FIELD_WRITTEN_MESSAGES_1M_RATE;
    private static final String FIELD_UNCOMMITTED_ENTRIES = ProcessingStatusDto.FIELD_INPUT_JOURNAL + "." + ProcessingStatusDto.JournalInfo.FIELD_UNCOMMITTED_ENTRIES;
    private static final String FIELD_JOURNAL_ENABLED = ProcessingStatusDto.FIELD_INPUT_JOURNAL + "." + ProcessingStatusDto.JournalInfo.FIELD_JOURNAL_ENABLED;

    private final String nodeId;
    private final JobSchedulerClock clock;
    private final Duration updateThreshold;
    private final double journalWriteRateThreshold;
    private final JacksonDBCollection db;
    private final BaseConfiguration baseConfiguration;

    @Inject
    public DBProcessingStatusService(MongoConnection mongoConnection,
                                     NodeId nodeId,
                                     JobSchedulerClock clock,
                                     @Named(ProcessingStatusConfig.UPDATE_THRESHOLD) Duration updateThreshold,
                                     @Named(ProcessingStatusConfig.JOURNAL_WRITE_RATE_THRESHOLD) int journalWriteRateThreshold,
                                     MongoJackObjectMapperProvider mapper,
                                     BaseConfiguration baseConfiguration) {
        this.nodeId = nodeId.toString();
        this.clock = clock;
        this.updateThreshold = updateThreshold;
        this.journalWriteRateThreshold = ((Number) journalWriteRateThreshold).doubleValue();
        this.baseConfiguration = baseConfiguration;
        this.db = JacksonDBCollection.wrap(mongoConnection.getDatabase().getCollection(COLLECTION_NAME),
                ProcessingStatusDto.class,
                ObjectId.class,
                mapper.get());

        db.createIndex(new BasicDBObject(ProcessingStatusDto.FIELD_NODE_ID, 1), new BasicDBObject("unique", true));

        // Remove the old (3.1.0) index before creating the new one. This is needed, because mongodb >= 4.2 won't allow
        // the creation of identical indices with a different name. We don't use a migration,
        // because it can race with the code below that creates the same index with a shorter name.
        // TODO remove this in a future release (maybe at 3.5)
        final String OLD_INDEX_NAME = "updated_at_1_input_journal.uncommitted_entries_1_input_journal.written_messages_1m_rate_1";
        try {
            if (db.getIndexInfo().stream().anyMatch(dbo -> dbo.get("name").equals(OLD_INDEX_NAME))) {
                db.dropIndex(OLD_INDEX_NAME);
            }
        } catch (MongoException ignored) {
            // index was either never created or already deleted
        }

        // Use a custom index name to avoid the automatically generated index name which will be pretty long and
        // might cause errors due to the 127 character index name limit. (e.g. when using a long database name)
        // See: https://github.com/Graylog2/graylog2-server/issues/6322
        db.createIndex(new BasicDBObject(FIELD_UPDATED_AT, 1)
                .append(FIELD_UNCOMMITTED_ENTRIES, 1)
                .append(FIELD_WRITTEN_MESSAGES_1M, 1), new BasicDBObject("name", "compound_0"));
    }

    /**
     * Rerturns all existing processing status entries from the database.
     *
     * @return a list of all processing status entries
     */
    public List all() {
        return ImmutableList.copyOf(db.find().sort(DBSort.asc("_id")).iterator());
    }

    /**
     * Returns the processing status entry for the calling node.
     *
     * @return the processing status entry or an empty optional if none exists
     */
    public Optional get() {
        return Optional.ofNullable(db.findOne(DBQuery.is(ProcessingStatusDto.FIELD_NODE_ID, nodeId)));
    }

    /**
     * Returns the earliest post-indexing receive timestamp of all active Graylog nodes in the cluster.
     * This can be used to find out if a certain timerange is already searchable in Elasticsearch.
     * 

* Beware: This only takes the message receive time into account. It doesn't help when log sources send their * messages late. * * @return earliest post-indexing timestamp or empty optional if no processing status entries exist */ public Optional earliestPostIndexingTimestamp() { final String sortField = ProcessingStatusDto.FIELD_RECEIVE_TIMES + "." + ProcessingStatusDto.ReceiveTimes.FIELD_POST_INDEXING; final DateTime updateThresholdTimestamp = clock.nowUTC().minus(updateThreshold.toMilliseconds()); final DBQuery.Query queryWithoutMetrics = DBQuery.greaterThan(FIELD_UPDATED_AT, updateThresholdTimestamp); final DBQuery.Query queryWithMetrics = getDataSelectionQuery(clock, updateThreshold, journalWriteRateThreshold); // First try to query processing status from nodes that are active (include journal metrics restrictions). // If no result is found, query the processing status again, but without weeding out nodes with a low input volume. // This prevents to completely stall the event processing if the ingestion volume is too low. for (DBQuery.Query query: Arrays.asList(queryWithMetrics, queryWithoutMetrics)) { // Get the earliest timestamp of the post-indexing receive timestamp by sorting and returning the first one. // We use the earliest timestamp because some nodes can be faster than others and we need to make sure // to return the timestamp of the slowest one. try (DBCursor cursor = db.find(query).sort(DBSort.asc(sortField)).limit(1)) { if (cursor.hasNext()) { return Optional.of(cursor.next().receiveTimes().postIndexing()); } } } return Optional.empty(); } // This has been put into a static method to simplify testing the processing status selection @VisibleForTesting static DBQuery.Query getDataSelectionQuery(JobSchedulerClock clock, Duration updateThreshold, double journalWriteRateThreshold) { final DateTime updateThresholdTimestamp = clock.nowUTC().minus(updateThreshold.toMilliseconds()); return DBQuery.and( // Only select processing status for a node ... // ... that has been updated recently DBQuery.greaterThan(FIELD_UPDATED_AT, updateThresholdTimestamp), // ... and either ... DBQuery.or( // ... received a certain amount of messages in the last minute DBQuery.greaterThanEquals(FIELD_WRITTEN_MESSAGES_1M, journalWriteRateThreshold), // ... or has messages left in the journal DBQuery.greaterThanEquals(FIELD_UNCOMMITTED_ENTRIES, 1L), // ... or has journaling disabled DBQuery.is(FIELD_JOURNAL_ENABLED, false) ) ); } /** * Create or update (upsert) a processing status entry for the given {@link ProcessingStatusRecorder} using the * caller's node ID. * * @param processingStatusRecorder the processing recorder object to create/update * @return the created/updated entry */ public ProcessingStatusDto save(ProcessingStatusRecorder processingStatusRecorder) { return save(processingStatusRecorder, DateTime.now(DateTimeZone.UTC)); } @VisibleForTesting ProcessingStatusDto save(ProcessingStatusRecorder processingStatusRecorder, DateTime updatedAt) { // TODO: Using a timestamp provided by the node for "updated_at" can be bad if the node clock is skewed. // Ideally we would use MongoDB's "$currentDate" but there doesn't seem to be a way to use that // with mongojack. return MongoDBUpsertRetryer.run(() -> db.findAndModify( DBQuery.is(ProcessingStatusDto.FIELD_NODE_ID, nodeId), null, null, false, ProcessingStatusDto.of(nodeId, processingStatusRecorder, updatedAt, baseConfiguration.isMessageJournalEnabled()), true, // We want to return the updated document to the caller true)); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy