All Downloads are FREE. Search and download functionalities are using the official Maven repository.

r.0.9.1.source-code.LogReplicator.kt Maven / Gradle / Ivy

The newest version!
package se.wollan.tolr

import kotlinx.coroutines.CancellationException
import kotlinx.coroutines.CoroutineScope
import kotlinx.coroutines.channels.Channel
import kotlinx.coroutines.channels.SendChannel
import kotlinx.coroutines.delay
import kotlinx.coroutines.launch
import org.slf4j.Logger
import se.wollan.datascope.DataScope
import se.wollan.time.HLCTimestamp
import java.security.SecureRandom
import java.util.*
import kotlin.random.asKotlinRandom
import kotlin.time.Duration

internal interface LogReplicator {
    suspend fun startReplicating()
    suspend fun triggerReplication()
    suspend fun triggerReplicationFor(remote: RemoteHostname): Boolean
    suspend fun handleIncomingBatch(batch: ReplicationBatch): ReplicationBatch
}

private enum class RetryPolicy { IMMEDIATE, SCHEDULED, NO_RETRY }

private val DEFAULT_RETRY_POLICY = RetryPolicy.IMMEDIATE

private data class TriggerRequest(
    val responseChannel: SendChannel?,
    val retryPolicy: RetryPolicy = DEFAULT_RETRY_POLICY
) {
    companion object {
        val noResponse = TriggerRequest(responseChannel = null)
    }
}

internal class LogReplicatorImpl(
    private val coroutineScope: CoroutineScope,
    private val recordRepo: LogRecordRepo,
    private val serverServerAPI: ServerServerAPI,
    private val logger: Logger,
    private val dataScope: DataScope,
    private val configurationProvider: ConfigurationProvider,
) : LogReplicator {

    // always use [triggers] function instead to get values
    @Volatile
    private var _triggers: Map>? = null
    private val random = SecureRandom().asKotlinRandom()

    override suspend fun triggerReplication() {
        for (requestChannel in triggers().values.shuffled())
            requestChannel.trySend(TriggerRequest.noResponse)
    }

    override suspend fun triggerReplicationFor(remote: RemoteHostname): Boolean =
        triggerReplicationFor(remote, withPolicy = DEFAULT_RETRY_POLICY)

    private suspend fun triggerReplicationFor(remote: RemoteHostname, withPolicy: RetryPolicy): Boolean {
        val channel = triggers()[remote] ?: return false
        channel.trySend(TriggerRequest(null, withPolicy))
        return true
    }

    override suspend fun startReplicating() {
        startReplicationLoops()
        startHeartbeatReplicationTrigger()
    }

    private suspend fun startReplicationLoops() {
        for ((remoteHostname, channel) in triggers()) {
            coroutineScope.launch {
                for ((responseChannel, retryPolicy) in channel) {
                    try {
                        startReplicationWithRetry(remoteHostname, retryPolicy)
                        responseChannel?.trySend(Unit)
                    } catch (e: Exception) {
                        responseChannel?.close(e)
                    }
                }
            }
        }
    }

    private fun startHeartbeatReplicationTrigger() = coroutineScope.launch {
        awaitSingleReplicationForEachNodeBestEffort()

        while (true) {
            val interval = heartbeatTriggerInterval() * nextJitterFactor()
            delay(interval)
            logger.debug("heartbeat trigger after $interval interval")
            triggerReplication()
        }
    }

    private suspend fun awaitSingleReplicationForEachNodeBestEffort() {
        for (requestChannel in triggers().values.shuffled(random).toList()) {
            val responseChannel = Channel(Channel.RENDEZVOUS)
            requestChannel.send(TriggerRequest(responseChannel))
            responseChannel.receive()
        }
    }

    private suspend fun startReplicationWithRetry(remote: RemoteHostname, retryPolicy: RetryPolicy) {
        logger.info("start replication with $remote")
        try {
            startReplicationWith(remote)
            logger.info("successfully completed replication with $remote")
        } catch (_: CancellationException) {
            logger.warn("replication with $remote was cancelled, no retry")
        } catch (e: Exception) {
            when (retryPolicy) {
                RetryPolicy.IMMEDIATE -> {
                    // let's do one fast retry, because let's face it - most likely it's a shaky network and will work next time.. right?
                    logger.warn("replication with $remote failed due to '${e.message}', retrying at once", e)
                    startReplicationWithRetry(remote, RetryPolicy.SCHEDULED)
                }
                RetryPolicy.SCHEDULED -> {
                    // ..okay I was wrong, fine, let schedule a retry then
                    val d = retryDelayOnFailure() * nextJitterFactor()
                    logger.warn("replication with $remote failed due to '${e.message}', will retry in $d", e)
                    coroutineScope.launch {
                        delay(d)
                        triggerReplicationFor(remote, withPolicy = RetryPolicy.NO_RETRY)
                    }
                }
                RetryPolicy.NO_RETRY -> {
                    logger.warn("replication with $remote failed due to '${e.message}', awaiting next heartbeat.", e)
                }
            }
        }
    }

    private suspend fun startReplicationWith(remote: RemoteHostname) {
        val lastLocalTimestamps = recordRepo.listLatestPerNodeTimestamps()
        val (initialRespondBatch, target) =
            serverServerAPI.replicateLogInitial(remote, ReplicationBatch(lastLocalTimestamps, emptyList()))
        processRespondReplicationBatch(remote, initialRespondBatch, target)
    }

    private suspend fun processRespondReplicationBatch(
        remote: RemoteHostname,
        respondBatch: ReplicationBatch,
        target: RemoteHostname,
    ) {
        val (latestLocalTimestamps, missingRemoteRecords) = dataScope.write {
            recordRepo.insertIfMissing(respondBatch.records)
            val latestLocalTimestamps = recordRepo.listLatestPerNodeTimestamps()
            val hasLocallyMissingRecords = anyLocallyMissingRecords(
                latestLocally = latestLocalTimestamps,
                latestRemotely = respondBatch.latestTimestamps
            )
            val latestOfMissingRemoteTSs = getLatestRemoteTimestampsOfRemotelyMissingRecords(
                latestLocally = latestLocalTimestamps,
                latestRemotely = respondBatch.latestTimestamps
            )
            if (!hasLocallyMissingRecords && latestOfMissingRemoteTSs.isEmpty())
                return@write null // replication completed successfully

            val missingRemoteRecords = recordRepo.listLaterThanNodeTimestamps(latestOfMissingRemoteTSs)
            latestLocalTimestamps to missingRemoteRecords
        } ?: return

        val (nextRespondBatch, nextTarget) = serverServerAPI.replicateLog(
            remote = remote,
            batch = ReplicationBatch(latestLocalTimestamps, missingRemoteRecords),
            target = target
        )

        check(target == nextTarget) { "target cannot change mid-replication! (from $target to $nextTarget)" }

        processRespondReplicationBatch(remote, nextRespondBatch, target)
    }

    @Suppress("PARAMETER_NAME_CHANGED_ON_OVERRIDE")
    override suspend fun handleIncomingBatch(incomingBatch: ReplicationBatch): ReplicationBatch = dataScope.write {
        val locallyInserted = recordRepo.insertIfMissing(incomingBatch.records)
        val latestLocalTimestamps = recordRepo.listLatestPerNodeTimestamps()
        val latestOfMissingRemoteTSs = getLatestRemoteTimestampsOfRemotelyMissingRecords(
            latestLocally = latestLocalTimestamps, latestRemotely = incomingBatch.latestTimestamps
        )
        val missingRemoteRecords = recordRepo.listLaterThanNodeTimestamps(latestOfMissingRemoteTSs)

        // TODO: optimization: only trigger replication for public client replications
        if (locallyInserted > 0)
            dataScope.addPostCommitHook(::triggerReplication)

        ReplicationBatch(latestLocalTimestamps, missingRemoteRecords)
    }

    private fun nextJitterFactor(): Double = synchronized(random) {
        random.nextDouble(from = 0.9, until = 1.1)
    }

    private suspend fun heartbeatTriggerInterval(): Duration =
        configurationProvider.getConfiguration().heartbeatTriggerInterval

    private suspend fun retryDelayOnFailure(): Duration =
        configurationProvider.getConfiguration().retryDelayOnFailure

    /** doesn't need to be thread-safe due to called during initialization of TOLR */
    private suspend fun triggers(): Map> {
        _triggers?.let { return it }
        val triggers = configurationProvider.getConfiguration().remoteHostnamesSafe
            .associateWith> { Channel(Channel.CONFLATED) }
        _triggers = triggers
        return triggers
    }
}

internal fun anyLocallyMissingRecords(
    latestLocally: Map,
    latestRemotely: Map,
): Boolean = latestRemotely.any { remoteTS ->
    val localTS = latestLocally[remoteTS.key] ?: return@any true
    remoteTS.value > localTS
}

internal fun getLatestRemoteTimestampsOfRemotelyMissingRecords(
    latestLocally: Map,
    latestRemotely: Map,
): Map {

    // first extract all local ts that are later than remote equivalent
    val laterLocalTimestamps = latestLocally.filter { localTS ->
        val remoteTS = latestRemotely[localTS.key] ?: return@filter true
        localTS.value > remoteTS
    }

    // then we want the remote equivalent ts of each of those remotely missing ones, so we can get records from local db
    return laterLocalTimestamps.mapValues { l ->
        latestRemotely[l.key] ?: HLCTimestamp.initial
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy