All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.platon.pulsar.common.collect.collector.UrlCacheCollector.kt Maven / Gradle / Ivy

There is a newer version: 2.1.0
Show newest version
package ai.platon.pulsar.common.collect.collector

import ai.platon.pulsar.common.collect.Loadable
import ai.platon.pulsar.common.collect.UrlCache
import ai.platon.pulsar.common.collect.queue.LoadingQueue
import ai.platon.pulsar.common.getLogger
import ai.platon.pulsar.common.urls.UrlAware
import java.time.Instant
import java.util.*

open class UrlCacheCollector constructor(
    val urlCache: UrlCache
) : AbstractPriorityDataCollector(urlCache.priority) {
    private val logger = getLogger(this)

    private val queues get() = urlCache.queues

    override var name: String = "FCC"

    override val size: Int
        get() = urlCache.size

    override val externalSize: Int
        get() = urlCache.externalSize

    override val estimatedExternalSize: Int
        get() = urlCache.estimatedExternalSize

    override val estimatedSize: Int
        get() = urlCache.estimatedSize

    /**
     * If the fetch cache is a LoadingurlCache, the items can be both in memory or in external source,
     * so even if all queues are empty, hasMore can return true
     * */
    @Synchronized
    override fun hasMore(): Boolean {
        if (size > 0) {
            return true
        }

        if (deadTime <= Instant.now()) {
            return false
        }

        // size is 0, estimatedSize > 0, there are items in the database
        // estimatedSize is updated at least every 5 seconds
        // load actually performed at least every 5 seconds
        if (estimatedSize > 0 && urlCache is Loadable<*>) {
            logger.debug("Loading tasks with estimatedSize be {}", estimatedSize)
            urlCache.loadNow()
        }

        return size > 0
    }

    @Synchronized
    override fun collectTo(sink: MutableList): Int {
        beforeCollect()
        val count = queues.sumOf { consume(it, sink) }
        return afterCollect(count)
    }

    private fun consume(queue: Queue, sink: MutableCollection): Int {
        if (queue is LoadingQueue && queue.size == 0 && queue.estimatedExternalSize == 0) {
            return 0
        }

        val e = queue.poll()
        if (e != null) {
            labels.add(e.label)
            sink.add(e)
            return 1
        }

        return 0
    }

    @Synchronized
    override fun dump(): List {
        return queues.flatMap { it.map { it.toString() } }
    }

    @Synchronized
    override fun clear() = urlCache.clear()

    @Synchronized
    override fun deepClear() {
        urlCache.deepClear()
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy