All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.luminis.quic.recovery.RecoveryManager Maven / Gradle / Ivy

There is a newer version: 0.10.1
Show newest version
/*
 * Copyright © 2019, 2020, 2021, 2022, 2023, 2024 Peter Doornbosch
 *
 * This file is part of Kwik, an implementation of the QUIC protocol in Java.
 *
 * Kwik is free software: you can redistribute it and/or modify it under
 * the terms of the GNU Lesser General Public License as published by the
 * Free Software Foundation, either version 3 of the License, or (at your option)
 * any later version.
 *
 * Kwik is distributed in the hope that it will be useful, but
 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for
 * more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with this program. If not, see .
 */
package net.luminis.quic.recovery;

import net.luminis.quic.cc.CongestionController;
import net.luminis.quic.concurrent.DaemonThreadFactory;
import net.luminis.quic.common.EncryptionLevel;
import net.luminis.quic.common.PnSpace;
import net.luminis.quic.frame.AckFrame;
import net.luminis.quic.frame.Padding;
import net.luminis.quic.frame.PingFrame;
import net.luminis.quic.frame.QuicFrame;
import net.luminis.quic.impl.*;
import net.luminis.quic.log.Logger;
import net.luminis.quic.packet.QuicPacket;
import net.luminis.quic.send.Sender;

import java.time.Clock;
import java.time.Duration;
import java.time.Instant;
import java.time.LocalTime;
import java.time.ZoneId;
import java.time.format.DateTimeFormatter;
import java.util.Collections;
import java.util.List;
import java.util.Optional;
import java.util.concurrent.*;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
 * QUIC Loss Detection is specified in https://www.rfc-editor.org/rfc/rfc9002.html.
 *
 * "QUIC senders use acknowledgments to detect lost packets and a PTO to ensure acknowledgments are received"
 * It uses a single timer, because either there are lost packets to detect, or a probe must be scheduled, never both.
 *
 * 

Ack based loss detection

* When an Ack is received, packets that are sent "long enough" before the largest acked, are deemed lost; for the * packets not send "long enough", a timer is set to mark them as lost when "long enough" time has been passed. * * An example: *
{@code
 *         -----------------------time------------------->>
 * sent:   1           2      3        4
 * acked:                                    4
 *         \--- long enough before 4 --/                       => 1 is marked lost immediately
 *                     \--not long enough before 4 --/
 *                                                   |
 *                                                   Set timer at this point in time, as that will be "long enough".
 *                                                   At that time, a new timer will be set for 3, unless acked meanwhile.
 * }

* *

Detecting tail loss with probe timeout

* When no Acks arrive, no packets will be marked as lost. To trigger the peer to send an ack (so loss detection can do * its job again), a probe (ack-eliciting packet) will be sent after the probe timeout. If the situation does not change * (i.e. no Acks received), additional probes will be sent, but with an exponentially growing delay. * * An example: *
{@code
 *         -----------------------time------------------->>
 * sent:   1           2      3        4
 * acked:                                    4
 *                            \-- timer set at loss time  --/
 *                                                          |
 *                                                          When the timer fires, there is no new ack received, so
 *                                                          nothing can be marked as lost. A probe is scheduled for
 *                                                          "probe timeout" time after the time 3 was sent:
 *                            \-- timer set at "probe timeout" time after 3 was sent --\
 *                                                                                     |
 *                                                                                     Send probe!
 * }
* Note that packet 3 will not be marked as lost as long no ack is received! *

* *

Exceptions * Because a server might be blocked by the anti-amplification limit, a client must also send probes when it has no * ack eliciting packets in flight, but is not sure whether the peer has validated the client address. *

*/ public class RecoveryManager implements FrameReceivedListener, HandshakeStateListener { private final Clock clock; private final Role role; private final RttEstimator rttEstimater; private final LossDetector[] lossDetectors = new LossDetector[PnSpace.values().length]; private final Sender sender; private final Logger log; private final ScheduledExecutorService scheduler; private int receiverMaxAckDelay; private ScheduledFuture lossDetectionFuture; // Concurrency: guarded by scheduleLock private final Object scheduleLock = new Object(); private volatile int ptoCount; private volatile Instant timerExpiration; private volatile HandshakeState handshakeState = HandshakeState.Initial; private volatile boolean hasBeenReset = false; public RecoveryManager(Role role, RttEstimator rttEstimater, CongestionController congestionController, Sender sender, Logger logger) { this(Clock.systemUTC(), role, rttEstimater, congestionController, sender, logger); } public RecoveryManager(Clock clock, Role role, RttEstimator rttEstimater, CongestionController congestionController, Sender sender, Logger logger) { this.clock = clock; this.role = role; this.rttEstimater = rttEstimater; for (PnSpace pnSpace: PnSpace.values()) { lossDetectors[pnSpace.ordinal()] = new LossDetector(clock ,this, rttEstimater, congestionController, () -> sender.flush(), logger.getQLog()); } this.sender = sender; log = logger; scheduler = Executors.newScheduledThreadPool(1, new DaemonThreadFactory("loss-detection")); synchronized (scheduleLock) { lossDetectionFuture = new NullScheduledFuture(); } } void setLossDetectionTimer() { PnSpaceTime earliestLossTime = getEarliestLossTime(LossDetector::getLossTime); Instant lossTime = earliestLossTime != null? earliestLossTime.lossTime: null; if (lossTime != null) { rescheduleLossDetectionTimeout(lossTime); } else { boolean ackElicitingInFlight = ackElicitingInFlight(); boolean peerAwaitingAddressValidation = peerAwaitingAddressValidation(); // https://datatracker.ietf.org/doc/html/draft-ietf-quic-recovery-34#section-6.2.2.1 // "That is, the client MUST set the probe timer if the client has not received an acknowledgment for any of // its Handshake packets and the handshake is not confirmed (...), even if there are no packets in flight." if (ackElicitingInFlight || peerAwaitingAddressValidation) { PnSpaceTime ptoTimeAndSpace = getPtoTimeAndSpace(); if (ptoTimeAndSpace == null) { log.recovery("cancelling loss detection timer (no loss time set, no ack eliciting in flight, peer not awaiting address validation (1))"); unschedule(); } else { rescheduleLossDetectionTimeout(ptoTimeAndSpace.lossTime); if (log.logRecovery()) { int timeout = (int) Duration.between(clock.instant(), ptoTimeAndSpace.lossTime).toMillis(); log.recovery("reschedule loss detection timer for PTO over " + timeout + " millis, " + "based on %s/" + ptoTimeAndSpace.pnSpace + ", because " + (peerAwaitingAddressValidation ? "peerAwaitingAddressValidation " : "") + (ackElicitingInFlight ? "ackElicitingInFlight " : "") + "| RTT:" + rttEstimater.getSmoothedRtt() + "/" + rttEstimater.getRttVar(), ptoTimeAndSpace.lossTime); } } } else { log.recovery("cancelling loss detection timer (no loss time set, no ack eliciting in flight, peer not awaiting address validation (2))"); unschedule(); } } } /** * Determines the current probe timeout. * This method is defined in https://www.rfc-editor.org/rfc/rfc9002.html#name-setting-the-loss-detection-. * @return a PnSpaceTime object defining the next probe: its time and for which packet number space. */ private PnSpaceTime getPtoTimeAndSpace() { int ptoDuration = rttEstimater.getSmoothedRtt() + Integer.max(1, 4 * rttEstimater.getRttVar()); ptoDuration *= (int) (Math.pow(2, ptoCount)); // The pseudo code in https://www.rfc-editor.org/rfc/rfc9002.html#name-setting-the-loss-detection- test for // ! ackElicitingInFlight() to determine whether peer is awaiting address validation. In a multi-threaded // implementation, that solution is subject to all kinds of race conditions, so its better to just check: if (peerAwaitingAddressValidation()) { if (handshakeState.hasNoHandshakeKeys()) { log.recovery("getPtoTimeAndSpace: no ack eliciting in flight and no handshake keys -> probe Initial"); return new PnSpaceTime(PnSpace.Initial, clock.instant().plusMillis(ptoDuration)); } else { log.recovery("getPtoTimeAndSpace: no ack eliciting in flight but handshake keys -> probe Handshake"); return new PnSpaceTime(PnSpace.Handshake, clock.instant().plusMillis(ptoDuration)); } } // Find earliest pto time Instant ptoTime = Instant.MAX; PnSpace ptoSpace = null; for (PnSpace pnSpace: PnSpace.values()) { if (lossDetectors[pnSpace.ordinal()].ackElicitingInFlight()) { if (pnSpace == PnSpace.App && handshakeState.isNotConfirmed()) { // https://www.rfc-editor.org/rfc/rfc9002.html#name-setting-the-loss-detection- // "Skip Application Data until handshake confirmed" log.recovery("getPtoTimeAndSpace is skipping level App, because handshake not yet confirmed!"); continue; // Because App is the last, this is effectively a return. } if (pnSpace == PnSpace.App) { // https://www.rfc-editor.org/rfc/rfc9002.html#name-setting-the-loss-detection- // "Include max_ack_delay and backoff for Application Data" ptoDuration += receiverMaxAckDelay * (int) (Math.pow(2, ptoCount)); } Instant lastAckElicitingSent = lossDetectors[pnSpace.ordinal()].getLastAckElicitingSent(); if (lastAckElicitingSent != null && lastAckElicitingSent.plusMillis(ptoDuration).isBefore(ptoTime)) { ptoTime = lastAckElicitingSent.plusMillis(ptoDuration); ptoSpace = pnSpace; } } } if (ptoSpace != null) { return new PnSpaceTime(ptoSpace, ptoTime); } else { return null; } } private boolean peerAwaitingAddressValidation() { return role == Role.Client && handshakeState.isNotConfirmed() && lossDetectors[PnSpace.Handshake.ordinal()].noAckedReceived(); } private void lossDetectionTimeout() { // Because cancelling the ScheduledExecutor task quite often fails, double check whether the timer should expire. Instant expiration = timerExpiration; if (expiration == null) { // Timer was cancelled, but it still fired; ignore log.warn("Loss detection timeout: Timer was cancelled."); return; } else if (clock.instant().isBefore(expiration) && Duration.between(clock.instant(), expiration).toMillis() > 0) { // Might be due to an old task that was cancelled, but unfortunately, it also happens that the scheduler // executes tasks much earlier than requested (30 ~ 40 ms). In that case, rescheduling is necessary to avoid // losing the loss detection timeout event. // To be sure the latest timer expiration is used, use timerExpiration i.s.o. the expiration of this call. log.warn(String.format("Loss detection timeout running (at %s) is %s ms too early; rescheduling to %s", clock.instant(), Duration.between(clock.instant(), expiration).toMillis(), timerExpiration)); rescheduleLossDetectionTimeout(timerExpiration); } else { log.recovery("%s loss detection timeout handler running", clock.instant()); } PnSpaceTime earliestLossTime = getEarliestLossTime(LossDetector::getLossTime); Instant lossTime = earliestLossTime != null? earliestLossTime.lossTime: null; if (lossTime != null) { lossDetectors[earliestLossTime.pnSpace.ordinal()].detectLostPackets(); sender.flush(); setLossDetectionTimer(); } else { sendProbe(); // Calling setLossDetectionTimer here not necessary, because the event of sending the probe will trigger it anyway. // And if done here, time of last-ack-eliciting might not be set yet (because packets are sent async), leading to trouble. } } private void sendProbe() { if (log.logRecovery()) { PnSpaceTime earliestLastAckElicitingSentTime = getEarliestLossTime(LossDetector::getLastAckElicitingSent); if (earliestLastAckElicitingSentTime != null) { log.recovery(String.format("Sending probe %d, because no ack since %%s. Current RTT: %d/%d.", ptoCount, rttEstimater.getSmoothedRtt(), rttEstimater.getRttVar()), earliestLastAckElicitingSentTime.lossTime); } else { log.recovery(String.format("Sending probe %d. Current RTT: %d/%d.", ptoCount, rttEstimater.getSmoothedRtt(), rttEstimater.getRttVar())); } } ptoCount++; int nrOfProbes = ptoCount > 1 ? 2 : 1; if (ackElicitingInFlight()) { PnSpaceTime ptoTimeAndSpace = getPtoTimeAndSpace(); if (ptoTimeAndSpace == null) { // So, the "ack eliciting in flight" has just been acked; a new timeout will be set, no need to send a probe now log.recovery("Refraining from sending probe because received ack meanwhile"); return; } sendOneOrTwoAckElicitingPackets(ptoTimeAndSpace.pnSpace, nrOfProbes); } else { // Must be the peer awaiting address validation or race condition if (peerAwaitingAddressValidation()) { log.recovery("Sending probe because peer awaiting address validation"); // https://tools.ietf.org/html/draft-ietf-quic-recovery-33#section-6.2.2.1 // "When the PTO fires, the client MUST send a Handshake packet if it has Handshake keys, otherwise it // MUST send an Initial packet in a UDP datagram with a payload of at least 1200 bytes." if (handshakeState.hasNoHandshakeKeys()) { sendOneOrTwoAckElicitingPackets(PnSpace.Initial, 1); } else { sendOneOrTwoAckElicitingPackets(PnSpace.Handshake, 1); } } else { log.recovery("Refraining from sending probe as no ack eliciting in flight and no peer awaiting address validation"); } } } private void sendOneOrTwoAckElicitingPackets(PnSpace pnSpace, int numberOfPackets) { if (pnSpace == PnSpace.Initial) { List framesToRetransmit = getFramesToRetransmit(PnSpace.Initial); if (!framesToRetransmit.isEmpty()) { log.recovery("(Probe is an initial retransmit)"); repeatSend(numberOfPackets, () -> sender.sendProbe(framesToRetransmit , EncryptionLevel.Initial)); } else { // This can happen, when the probe is sent because of peer awaiting address validation log.recovery("(Probe is Initial ping, because there is no Initial data to retransmit)"); repeatSend(numberOfPackets, () -> sender.sendProbe(List.of(new PingFrame(), new Padding(2)), EncryptionLevel.Initial)); } } else if (pnSpace == PnSpace.Handshake) { // Client role: find ack eliciting handshake packet that is not acked and retransmit its contents. List framesToRetransmit = getFramesToRetransmit(PnSpace.Handshake); if (!framesToRetransmit.isEmpty()) { log.recovery("(Probe is a handshake retransmit)"); repeatSend(numberOfPackets, () -> sender.sendProbe(framesToRetransmit, EncryptionLevel.Handshake)); } else { log.recovery("(Probe is a handshake ping)"); repeatSend(numberOfPackets, () -> sender.sendProbe(List.of(new PingFrame(), new Padding(2)), EncryptionLevel.Handshake)); } } else { EncryptionLevel probeLevel = pnSpace.relatedEncryptionLevel(); List framesToRetransmit = getFramesToRetransmit(pnSpace); if (!framesToRetransmit.isEmpty()) { log.recovery(("(Probe is retransmit on level " + probeLevel + ")")); repeatSend(numberOfPackets, () -> sender.sendProbe(framesToRetransmit, probeLevel)); } else { log.recovery(("(Probe is ping on level " + probeLevel + ")")); repeatSend(numberOfPackets, () -> sender.sendProbe(List.of(new PingFrame(), new Padding(2)), probeLevel)); } } } List getFramesToRetransmit(PnSpace pnSpace) { List unAckedPackets = lossDetectors[pnSpace.ordinal()].unAcked(); Optional ackEliciting = unAckedPackets.stream() .filter(p -> p.isAckEliciting()) // Filter out Ping packets, ie. packets consisting of PingFrame's, padding and AckFrame's only. .filter(p -> ! p.getFrames().stream().allMatch(frame -> frame instanceof PingFrame || frame instanceof Padding || frame instanceof AckFrame)) .findFirst(); if (ackEliciting.isPresent()) { List framesToRetransmit = ackEliciting.get().getFrames().stream() .filter(frame -> !(frame instanceof AckFrame)) .collect(Collectors.toList()); return framesToRetransmit; } else { return Collections.emptyList(); } } PnSpaceTime getEarliestLossTime(Function pnSpaceTimeFunction) { PnSpaceTime earliestLossTime = null; for (PnSpace pnSpace: PnSpace.values()) { Instant pnSpaceLossTime = pnSpaceTimeFunction.apply(lossDetectors[pnSpace.ordinal()]); if (pnSpaceLossTime != null) { if (earliestLossTime == null) { earliestLossTime = new PnSpaceTime(pnSpace, pnSpaceLossTime); } else { if (! earliestLossTime.lossTime.isBefore(pnSpaceLossTime)) { earliestLossTime = new PnSpaceTime(pnSpace, pnSpaceLossTime); } } } } return earliestLossTime; } void rescheduleLossDetectionTimeout(Instant scheduledTime) { try { synchronized (scheduleLock) { // Cancelling the current future and setting the new must be in a sync'd block to ensure the right future is cancelled lossDetectionFuture.cancel(false); timerExpiration = scheduledTime; long delay = Duration.between(clock.instant(), scheduledTime).toMillis(); // Delay can be 0 or negative, but that's no problem for ScheduledExecutorService: "Zero and negative delays are also allowed, and are treated as requests for immediate execution." lossDetectionFuture = scheduler.schedule(this::runLossDetectionTimeout, delay, TimeUnit.MILLISECONDS); } } catch (RejectedExecutionException taskRejected) { // Can happen if has been reset concurrently if (!hasBeenReset) { throw taskRejected; } } } private void runLossDetectionTimeout() { try { lossDetectionTimeout(); } catch (Exception error) { log.error("Runtime exception occurred while running loss detection timeout handler", error); } } /** * Creates a Runnable to run the lossDetectionTimeout method, but first checks whether it is not running to early. * For debugging purposes only: it is / can be used to prove that scheduled tasks sometimes run 30 ~ 40 milliseconds too early. * @param scheduledTime * @return */ private Runnable createLossDetectionTimeoutRunnerWithTooEarlyDetection(final Instant scheduledTime) { return () -> { Instant now = clock.instant(); // Allow for 1 ms difference, as Instant has much more precision than the ScheduledExecutorService if (now.plusMillis(1).isBefore(scheduledTime)) { log.error(String.format("Task scheduled for %s is running already at %s (%s ms too early)", scheduledTime, now, Duration.between(now, scheduledTime).toMillis())); } runLossDetectionTimeout(); }; } void unschedule() { lossDetectionFuture.cancel(true); timerExpiration = null; } public void onAckReceived(AckFrame ackFrame, PnSpace pnSpace, Instant timeReceived) { if (! hasBeenReset) { if (ptoCount > 0) { // https://datatracker.ietf.org/doc/html/draft-ietf-quic-recovery-34#section-6.2.1 // "To protect such a server from repeated client probes, the PTO backoff is not reset at a client that // is not yet certain that the server has finished validating the client's address. if (!peerAwaitingAddressValidation()) { ptoCount = 0; } else { log.recovery("probe count not reset on ack because handshake not yet confirmed"); } } lossDetectors[pnSpace.ordinal()].onAckReceived(ackFrame, timeReceived); } } public void packetSent(QuicPacket packet, Instant sent, Consumer packetLostCallback) { if (! hasBeenReset) { if (packet.isInflightPacket()) { lossDetectors[packet.getPnSpace().ordinal()].packetSent(packet, sent, packetLostCallback); setLossDetectionTimer(); } } } private boolean ackElicitingInFlight() { return Stream.of(lossDetectors).anyMatch(detector -> detector.ackElicitingInFlight()); } public synchronized void setReceiverMaxAckDelay(int receiverMaxAckDelay) { this.receiverMaxAckDelay = receiverMaxAckDelay; } public void stopRecovery() { if (! hasBeenReset) { hasBeenReset = true; unschedule(); scheduler.shutdown(); for (PnSpace pnSpace: PnSpace.values()) { lossDetectors[pnSpace.ordinal()].reset(); } } } public void stopRecovery(PnSpace pnSpace) { if (! hasBeenReset) { lossDetectors[pnSpace.ordinal()].reset(); // https://tools.ietf.org/html/draft-ietf-quic-recovery-33#section-6.2.2 // "When Initial or Handshake keys are discarded, the PTO and loss detection timers MUST be reset" ptoCount = 0; setLossDetectionTimer(); } } public long getLost() { return Stream.of(lossDetectors).mapToLong(ld -> ld.getLost()).sum(); } @Override public void handshakeStateChangedEvent(HandshakeState newState) { if (! hasBeenReset) { HandshakeState oldState = handshakeState; handshakeState = newState; if (newState == HandshakeState.Confirmed && oldState != HandshakeState.Confirmed) { log.recovery("State is set to " + newState); // https://tools.ietf.org/html/draft-ietf-quic-recovery-30#section-6.2.1 // "A sender SHOULD restart its PTO timer (...), when the handshake is confirmed (...)," setLossDetectionTimer(); } } } @Override public void received(AckFrame frame, PnSpace pnSpace, Instant timeReceived) { onAckReceived(frame, pnSpace, timeReceived); } private static class NullScheduledFuture implements ScheduledFuture { @Override public int compareTo(Delayed o) { return 0; } @Override public long getDelay(TimeUnit unit) { return 0; } @Override public boolean cancel(boolean mayInterruptIfRunning) { return false; } @Override public boolean isCancelled() { return false; } @Override public boolean isDone() { return false; } @Override public Void get() throws InterruptedException, ExecutionException { return null; } @Override public Void get(long timeout, TimeUnit unit) throws InterruptedException, ExecutionException, TimeoutException { return null; } } private void repeatSend(int count, Runnable task) { for (int i = 0; i < count; i++) { task.run(); try { Thread.sleep(1); // Use a small delay when sending multiple packets } catch (InterruptedException e) { } } } String timeNow() { LocalTime localTimeNow = LocalTime.from(clock.instant().atZone(ZoneId.systemDefault())); DateTimeFormatter timeFormatter = DateTimeFormatter.ofPattern("mm:ss.SSS"); return timeFormatter.format(localTimeNow); } static class PnSpaceTime { public PnSpace pnSpace; public Instant lossTime; public PnSpaceTime(PnSpace pnSpace, Instant pnSpaceLossTime) { this.pnSpace = pnSpace; lossTime = pnSpaceLossTime; } @Override public String toString() { return lossTime.toString() + " (in " + pnSpace + ")"; } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy