All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.platon.pulsar.common.collect.UrlCache.kt Maven / Gradle / Ivy

There is a newer version: 2.1.0
Show newest version
package ai.platon.pulsar.common.collect

import ai.platon.pulsar.common.Priority13
import ai.platon.pulsar.common.collect.queue.*
import ai.platon.pulsar.common.urls.UrlAware
import com.google.common.cache.LoadingCache
import java.time.Instant
import java.util.*
import java.util.concurrent.ConcurrentLinkedQueue

/**
 * The url cache holds urls.
 *
 * A url cache contains several queues for different purpose: reentrant, non-reentrant and n-reentrant.
 * A reentrant queue accepts the same url multiple times, a non-reentrant queue accepts the same url only once,
 * and an n-reentrant queue accepts the same url for n times at most.
 *
 * The URL cache is expected to be very large and items may be loaded from external sources such as MongoDB.
 * */
interface UrlCache {
    /**
     * The cache name
     * */
    val name: String
    /**
     * The priority
     * */
    val priority: Int
    /**
     * A non-reentrant queue accepts the same url only once
     * */
    val nonReentrantQueue: Queue
    /**
     * An n-reentrant queue accepts the same url for n times at most
     * */
    val nReentrantQueue: Queue
    /**
     * A reentrant queue accepts the same url multiple times
     * */
    val reentrantQueue: Queue
    /**
     * Create a list of all the queues
     * */
    val queues: List>
        get() = listOf(nonReentrantQueue, nReentrantQueue, reentrantQueue)
    /**
     * The total size of all the queues
     * */
    val size get() = queues.sumOf { it.size }
    /**
     * The precise count of urls in the external source, since the external source can be very large,
     * retrieving the precise size can be very slow in some external source.
     * */
    val externalSize: Int get() = 0
    /**
     * The estimated, imprecise count of urls in the external source, it should be very fast.
     * */
    val estimatedExternalSize get() = 0
    /**
     * The estimated, imprecise count of all urls both in local cache and the external source.
     * */
    val estimatedSize get() = size + estimatedExternalSize
    /**
     * Remove dead urls.
     * */
    fun removeDeceased()
    /**
     * Clear the local cache.
     * */
    fun clear()
    /**
     * Clear both the local cache and external source.
     * */
    fun deepClear() = clear()
}

abstract class AbstractUrlCache(
    override val name: String,
    override val priority: Int
) : UrlCache {
    override fun removeDeceased() {
        val now = Instant.now()
        queues.forEach { it.removeIf { it.deadline < now } }
    }

    override fun clear() {
        queues.forEach { it.clear() }
    }
}

open class ConcurrentUrlCache(
    name: String = "",
    priority: Int = Priority13.NORMAL.value
) : AbstractUrlCache(name, priority) {
    override val nonReentrantQueue = ConcurrentNonReentrantQueue()
    override val nReentrantQueue = ConcurrentNEntrantQueue(3)
    override val reentrantQueue = ConcurrentLinkedQueue()
}

/**
 * Contains a sets of loading queues which can load urls from external source using [urlLoader].
 * */
class LoadingUrlCache constructor(
    name: String,
    priority: Int,
    /**
     * A loader to load urls from external sources
     * */
    val urlLoader: ExternalUrlLoader,
    /**
     * The capacity for each queue
     * */
    val capacity: Int = LoadingQueue.DEFAULT_CAPACITY,
) : AbstractUrlCache(name, priority), Loadable {

    companion object {
        const val G_NON_REENTRANT = 1
        const val G_N_ENTRANT = 2
        const val G_REENTRANT = 3
    }

    override val nonReentrantQueue = ConcurrentNonReentrantLoadingQueue(urlLoader, topic(G_NON_REENTRANT))
    override val nReentrantQueue = ConcurrentNEntrantLoadingQueue(urlLoader, topic(G_N_ENTRANT), 3)
    override val reentrantQueue = ConcurrentLoadingQueue(urlLoader, topic(G_REENTRANT))
    override val queues: List> get() = listOf(nonReentrantQueue, nReentrantQueue, reentrantQueue)
    override val externalSize: Int
        get() = queues.filterIsInstance>().sumOf { it.externalSize }
    override val estimatedExternalSize: Int
        get() = queues.filterIsInstance>().sumOf { it.estimatedExternalSize }

    override fun load() {
        queues.filterIsInstance>().forEach { it.load() }
    }

    override fun loadNow(): Collection {
        return queues.filterIsInstance>().flatMap { it.loadNow() }
    }

    override fun deepClear() {
        queues.filterIsInstance>().forEach { it.deepClear() }
    }

    private fun topic(group: Int) = UrlTopic(name, group, priority, capacity)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy