All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.camunda.zeebe.transport.stream.impl.ClientStreamRequestManager Maven / Gradle / Ivy

There is a newer version: 8.7.0-alpha2
Show newest version
/*
 * Copyright Camunda Services GmbH and/or licensed to Camunda Services GmbH under
 * one or more contributor license agreements. See the NOTICE file distributed
 * with this work for additional information regarding copyright ownership.
 * Licensed under the Camunda License 1.0. You may not use this file
 * except in compliance with the Camunda License 1.0.
 */
package io.camunda.zeebe.transport.stream.impl;

import io.atomix.cluster.MemberId;
import io.atomix.cluster.messaging.ClusterCommunicationService;
import io.atomix.cluster.messaging.MessagingException.NoSuchMemberException;
import io.atomix.cluster.messaging.MessagingException.ProtocolException;
import io.atomix.cluster.messaging.MessagingException.RemoteHandlerFailure;
import io.camunda.zeebe.scheduler.ConcurrencyControl;
import io.camunda.zeebe.transport.stream.impl.ClientStreamRegistration.State;
import io.camunda.zeebe.transport.stream.impl.messages.AddStreamRequest;
import io.camunda.zeebe.transport.stream.impl.messages.AddStreamResponse;
import io.camunda.zeebe.transport.stream.impl.messages.ErrorResponse;
import io.camunda.zeebe.transport.stream.impl.messages.RemoveStreamRequest;
import io.camunda.zeebe.transport.stream.impl.messages.RemoveStreamResponse;
import io.camunda.zeebe.transport.stream.impl.messages.StreamResponseDecoder;
import io.camunda.zeebe.transport.stream.impl.messages.StreamTopics;
import io.camunda.zeebe.util.Either;
import io.camunda.zeebe.util.VisibleForTesting;
import io.camunda.zeebe.util.buffer.BufferUtil;
import io.camunda.zeebe.util.buffer.BufferWriter;
import io.camunda.zeebe.util.exception.UnrecoverableException;
import java.time.Duration;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.UUID;
import java.util.function.Function;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * Coordinates registering/de-registering {@link AggregatedClientStream} with arbitrary set of
 * servers.
 *
 * 

NOTE: all operations are potentially asynchronous as network I/O is involved. To avoid race * conditions w.r.t to the stream lifecycle, the manager keeps track of the registration state per * server via a {@link ClientStreamRegistration}. A registration follows a strict lifecycle via a * state machine: INITIAL -> ADDING -> ADDED -> REMOVING -> REMOVED -> CLOSED. As such, * registrations cannot be reused, and are recreated per stream/host pair. * *

This class is NOT thread-safe, and must be executed within the same thread context as the * {@link ClientStreamManager}. */ final class ClientStreamRequestManager { private static final Logger LOGGER = LoggerFactory.getLogger(ClientStreamRequestManager.class); private static final byte[] REMOVE_ALL_REQUEST = new byte[0]; private static final Duration RETRY_DELAY = Duration.ofSeconds(1); private static final Duration REQUEST_TIMEOUT = Duration.ofSeconds(5); // maps the registration state, for each known host, of each stream private final Map>> registrations = new HashMap<>(); private final StreamResponseDecoder responseDecoder = new StreamResponseDecoder(); private final ClusterCommunicationService communicationService; private final ConcurrencyControl executor; ClientStreamRequestManager( final ClusterCommunicationService communicationService, final ConcurrencyControl executor) { this.communicationService = communicationService; this.executor = executor; } /** * Registers the given stream to all the given servers. See {@link #add(AggregatedClientStream, * MemberId)} for the individual operation. * * @param stream the stream to register * @param serverIds the list of servers on which to register the stream * @throws IllegalStateException if the stream was already removed, is currently being removed, or * has been closed, on any of the servers */ void add(final AggregatedClientStream stream, final Collection serverIds) { for (final var serverId : serverIds) { add(stream, serverId); } } /** * Registers the given stream to all the given servers. When a stream is finally registered * remotely to a specific server, its {@link AggregatedClientStream#isConnected(MemberId)} will * return true. * *

This operation is idempotent: if the stream is already registered on a server, it will not * be registered again. * *

NOTE: if the stream is currently being removed, this will throw an exception. The idea is * that if the stream is being removed, then it doesn't exist in the top level registry anymore, * and shouldn't be reused. * * @param stream the stream to register * @param serverId the server on which to register the stream * @throws IllegalStateException if the stream was already removed, is currently being removed, or * has been closed */ void add(final AggregatedClientStream stream, final MemberId serverId) { final var registration = registrationFor(stream, serverId); add(registration); } /** * De-registers a given stream from all given servers. See {@link #remove(AggregatedClientStream, * MemberId)} for more. * * @param stream the stream to remove * @param serverIds the list of servers to remove it from */ void remove(final AggregatedClientStream stream, final Collection serverIds) { for (final var serverId : serverIds) { remove(stream, serverId); } } /** * De-registers the stream from the given server. Does nothing if the stream was already closed or * is already removed, or in the process of being removed, from the server. This operation is * fully idempotent. * * @param stream the stream to remove * @param serverId the server on which it should be removed */ void remove(final AggregatedClientStream stream, final MemberId serverId) { final var streamsPerHost = registrations.get(serverId); if (streamsPerHost == null) { return; } final var registration = streamsPerHost.get(stream.streamId()); if (registration != null) { remove(registration); } } /** * Sends a single remove all request to each given server, and closes all pending registrations. * * @param servers the list of servers to notify */ void removeAll(final Collection servers) { registrations.values().stream() .flatMap(m -> m.values().stream()) .forEach(ClientStreamRegistration::transitionToClosed); registrations.clear(); servers.forEach(this::doRemoveAll); } /** * Send remove stream request to servers without waiting for ack and without retry. As this is * used to send even when no stream was originally registered, we ignore any existing * registrations. */ void removeUnreliable(final UUID streamId, final Collection servers) { final var request = new RemoveStreamRequest().streamId(streamId); final var payload = BufferUtil.bufferAsArray(request); servers.forEach( serverId -> { communicationService.unicast( StreamTopics.REMOVE.topic(), payload, Function.identity(), serverId, true); purgeRegistration(streamId, serverId); }); } private void add(final ClientStreamRegistration registration) { if (registration.state() == State.ADDING || !registration.transitionToAdding()) { return; } final var request = new AddStreamRequest() .streamId(registration.streamId()) .streamType(registration.logicalId().streamType()) .metadata(registration.logicalId().metadata()); final var pendingRequest = registration.pendingRequest(); if (pendingRequest != null) { // error - should not have a pending request if we're registering! throw new IllegalStateException( "Failed to add remote client stream %s to %s; there is an incomplete pending request" .formatted(registration.streamId(), registration.serverId())); } final var payload = BufferUtil.bufferAsArray(request); sendAddRequest(registration, payload); } private void remove(final ClientStreamRegistration registration) { if (registration.state() == State.INITIAL) { // bail early, as we never added this stream registration.transitionToRemoved(); return; } if (registration.state() == State.REMOVING || !registration.transitionToRemoving()) { return; } final var request = new RemoveStreamRequest().streamId(registration.streamId()); final var payload = BufferUtil.bufferAsArray(request); final var pendingRequest = registration.pendingRequest(); if (pendingRequest == null) { sendRemoveRequest(registration, payload); return; } // to minimize the likelihood of out-of-order requests on the server side, wait until the // current request is finished before sending the next request, regardless of the result pendingRequest.whenCompleteAsync( (ok, error) -> sendRemoveRequest(registration, payload), executor::run); } /** * Closes all pending registrations for this server, and removes them from the in-memory cache. */ void onServerRemoved(final MemberId serverId) { final var perHost = registrations.remove(serverId); if (perHost == null) { return; } LOGGER.trace("Closing all registrations for server {}", serverId); perHost.values().forEach(ClientStreamRegistration::transitionToClosed); } @VisibleForTesting("Allows easier test set up and validation") ClientStreamRegistration registrationFor( final AggregatedClientStream stream, final MemberId serverId) { final var streamsPerHost = registrations.computeIfAbsent(serverId, ignored -> new HashMap<>()); return streamsPerHost.computeIfAbsent( stream.streamId(), streamId -> new ClientStreamRegistration<>(stream, serverId)); } private void sendAddRequest( final ClientStreamRegistration registration, final byte[] request) { if (registration.state() != State.ADDING) { return; } final var pendingRequest = communicationService.send( StreamTopics.ADD.topic(), request, Function.identity(), Function.identity(), registration.serverId(), REQUEST_TIMEOUT); registration.setPendingRequest(pendingRequest); pendingRequest.whenCompleteAsync( (response, error) -> handleAddResponse(registration, request, response, error), executor::run); } private void handleAddResponse( final ClientStreamRegistration registration, final byte[] request, final byte[] responseBuffer, final Throwable error) { final var state = registration.state(); if (state != State.ADDING) { LOGGER.trace("Skip handling ADD response since the state is {}", state, error); return; } final Throwable failure; final Either response; if (error == null) { response = responseDecoder.decode(responseBuffer, new AddStreamResponse()); if (response.isRight()) { registration.transitionToAdded(); return; } failure = response.getLeft().asException(); } else { failure = error; } // For now, always retry; in some cases, the request will eventually go through. However, in // some cases it never will. This should be covered in a follow-up issue. // Unrecoverable cases: // - RemoteHandlerError // - ErrorResponse.code in [ MALFORMED, INVALID ] LOGGER.warn( "Failed to add stream {} on {}; will retry in {}", registration.streamId(), registration.serverId(), RETRY_DELAY, failure); executor.schedule(RETRY_DELAY, () -> sendAddRequest(registration, request)); } private void sendRemoveRequest( final ClientStreamRegistration registration, final byte[] request) { if (registration.state() != State.REMOVING) { return; } final var pendingRequest = communicationService.send( StreamTopics.REMOVE.topic(), request, Function.identity(), Function.identity(), registration.serverId(), REQUEST_TIMEOUT); registration.setPendingRequest(pendingRequest); pendingRequest.whenCompleteAsync( (response, error) -> handleRemoveResponse(registration, request, response, error), executor::run); } private void handleRemoveResponse( final ClientStreamRegistration registration, final byte[] request, final byte[] responseBuffer, final Throwable error) { final var state = registration.state(); if (state != State.REMOVING) { LOGGER.trace("Skip handling REMOVE response since the state is {}", state, error); return; } final Throwable failure; final Either response; if (error == null) { response = responseDecoder.decode(responseBuffer, new RemoveStreamResponse()); if (response.isRight()) { registration.transitionToRemoved(); purgeRegistration(registration.streamId(), registration.serverId()); return; } failure = response.getLeft().asException(); } else { failure = error; } // For now, always retry; in some cases, the request will eventually go through. However, in // some cases it never will. This should be covered in a follow-up issue. // Unrecoverable cases: // - RemoteHandlerError // - ErrorResponse.code in [ MALFORMED, INVALID ] switch (failure) { // all returned errors are currently unnecessary to retry case final UnrecoverableException e -> handleUnrecoverableExceptionOnRemove(registration, e); // no point retrying if the remote handler failed to handle our request case final RemoteHandlerFailure e -> handleUnrecoverableExceptionOnRemove(registration, e); // should not happen, since it means the member was removed from the topology, but keep as a // failsafe case final NoSuchMemberException e -> handleUnrecoverableExceptionOnRemove(registration, e); // essentially happens due to malformed request, no point retrying case final ProtocolException e -> handleUnrecoverableExceptionOnRemove(registration, e); default -> { LOGGER.debug( "Failed to remove remote stream {} on {}, will retry in {}", registration.streamId(), registration.serverId(), RETRY_DELAY, failure); executor.schedule(RETRY_DELAY, () -> sendRemoveRequest(registration, request)); } } } private void handleUnrecoverableExceptionOnRemove( final ClientStreamRegistration registration, final Throwable e) { LOGGER.debug( """ Failed to remove stream '{}' for member '{}'; unrecoverable error occurred on recipient side, will not retry.""", registration.streamId(), registration.serverId(), e); registration.transitionToRemoved(); purgeRegistration(registration.streamId(), registration.serverId()); } private void purgeRegistration(final UUID streamId, final MemberId serverId) { final var perHost = registrations.get(serverId); if (perHost != null) { final var registration = perHost.remove(streamId); if (registration != null) { registration.transitionToClosed(); } if (perHost.isEmpty()) { registrations.remove(serverId); } } } private void doRemoveAll(final MemberId brokerId) { // Do not wait for response, as this is expected to be called while shutting down. communicationService.unicast( StreamTopics.REMOVE_ALL.topic(), REMOVE_ALL_REQUEST, Function.identity(), brokerId, true); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy