io.camunda.zeebe.broker.jobstream.RemoteJobStreamErrorHandler Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of zeebe-broker Show documentation
Zeebe Broker
There is a newer version: 8.7.0-alpha1
/*
 * Copyright Camunda Services GmbH and/or licensed to Camunda Services GmbH under
 * one or more contributor license agreements. See the NOTICE file distributed
 * with this work for additional information regarding copyright ownership.
 * Licensed under the Camunda License 1.0. You may not use this file
 * except in compliance with the Camunda License 1.0.
 */
package io.camunda.zeebe.broker.jobstream;

import io.camunda.zeebe.broker.PartitionListener;
import io.camunda.zeebe.broker.bootstrap.BrokerStartupContext;
import io.camunda.zeebe.logstreams.log.LogStreamWriter;
import io.camunda.zeebe.logstreams.log.WriteContext;
import io.camunda.zeebe.protocol.Protocol;
import io.camunda.zeebe.protocol.impl.stream.job.ActivatedJob;
import io.camunda.zeebe.stream.api.scheduling.ScheduledCommandCache.NoopScheduledCommandCache;
import io.camunda.zeebe.stream.api.scheduling.TaskResult;
import io.camunda.zeebe.stream.impl.BufferedTaskResultBuilder;
import io.camunda.zeebe.transport.stream.api.RemoteStreamErrorHandler;
import io.camunda.zeebe.util.logging.ThrottledLogger;
import java.time.Duration;
import org.agrona.collections.Int2ObjectHashMap;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A {@link RemoteStreamErrorHandler} for {@link ActivatedJob} payloads, which will write any
 * followup commands produced by the given {@link JobStreamErrorHandler} delegate. The followup
 * commands are written on the same partition as the job's.
 *
 * In order to obtain partition writers, this implementation is then also a {@link
 * PartitionListener} which must be added to the {@link
 * BrokerStartupContext#getPartitionListeners()} during the broker's startup process.
 *
 * It's possible that a job was pushed when this node was leader for its partition, but that the
 * error handling occurs after an election change, at which point the job will remain activated
 * until it times out.
 */
final class RemoteJobStreamErrorHandler implements RemoteStreamErrorHandler {
  private static final Logger LOGGER = LoggerFactory.getLogger(RemoteStreamErrorHandler.class);
  private static final Logger NO_WRITER_LOGGER = new ThrottledLogger(LOGGER, Duration.ofSeconds(1));
  private static final Logger FAILED_WRITER_LOGGER =
      new ThrottledLogger(LOGGER, Duration.ofSeconds(1));

  private final JobStreamErrorHandler errorHandler;

  private final Int2ObjectHashMap partitionWriters = new Int2ObjectHashMap<>();

  RemoteJobStreamErrorHandler(final JobStreamErrorHandler errorHandler) {
    this.errorHandler = errorHandler;
  }

  @Override
  public void handleError(final Throwable error, final ActivatedJob job) {
    final var partitionId = Protocol.decodePartitionId(job.jobKey());
    final var writer = partitionWriters.get(partitionId);
    if (writer == null) {
      NO_WRITER_LOGGER.warn(
          """
          Cannot handle failed job push on partition {} there is no writer registered;
          this can occur during an election""",
          partitionId);
      return;
    }

    final var resultBuilder =
        new BufferedTaskResultBuilder(writer::canWriteEvents, new NoopScheduledCommandCache());
    errorHandler.handleError(job, error, resultBuilder);

    final var result = resultBuilder.build();
    writeEntries(partitionId, job, writer, result);
  }

  void addWriter(final int partitionId, final LogStreamWriter writer) {
    partitionWriters.put(partitionId, writer);
  }

  void removeWriter(final int partitionId) {
    partitionWriters.remove(partitionId);
  }

  private void writeEntries(
      final int partitionId,
      final ActivatedJob job,
      final LogStreamWriter writer,
      final TaskResult result) {
    final var writeResult =
        writer.tryWrite(WriteContext.processingResult(), result.getRecordBatch().entries());
    if (writeResult.isLeft()) {
      FAILED_WRITER_LOGGER.warn(
          """
          Failed to handle failed job push {} on partition {}. Write to logstream failed with {};
          job will remain activated until it times out.""",
          job.jobKey(),
          partitionId,
          writeResult.getLeft());
    }
  }
}