All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.platon.pulsar.common.collect.queue.AbstractLoadingQueue.kt Maven / Gradle / Ivy

There is a newer version: 2.1.0
Show newest version
package ai.platon.pulsar.common.collect.queue

import ai.platon.pulsar.common.collect.ExternalUrlLoader
import ai.platon.pulsar.common.collect.UrlTopic
import ai.platon.pulsar.common.getLogger
import ai.platon.pulsar.common.stringify
import ai.platon.pulsar.common.urls.UrlAware
import ai.platon.pulsar.common.warnInterruptible
import java.util.*
import java.util.concurrent.ConcurrentLinkedQueue
import java.util.function.Predicate

/**
 * An url queue should be small since every url uses about 1s to fetch
 * */
abstract class AbstractLoadingQueue(
    val loader: ExternalUrlLoader,
    val topic: UrlTopic,
    val transformer: (UrlAware) -> UrlAware
) : AbstractQueue(), LoadingQueue {
    private val logger = getLogger(AbstractLoadingQueue::class)

    protected val cacheImplementation = ConcurrentLinkedQueue()

    private val capacity = topic.pageSize

    var loadCount: Int = 0
        protected set

    var savedCount: Int = 0
        protected set

    val cache: Collection = cacheImplementation

    /**
     * The cache size
     * */
    @get:Synchronized
    override val size: Int
        get() = cacheImplementation.size

    /**
     * Query the underlying database, this operation might be slow, try to use estimatedExternalSize
     * */
    @get:Synchronized
    override val externalSize: Int
        get() {
            return loader.runCatching { countRemaining(topic) }
                .onFailure { warnInterruptible(this, it) }
                .getOrNull() ?: 0
        }

    @get:Synchronized
    override val estimatedExternalSize: Int
        get() {
            return loader.runCatching { estimateRemaining(topic) }
                .onFailure { warnInterruptible(this, it) }
                .getOrNull() ?: 0
        }

    @get:Synchronized
    override val estimatedSize: Int
        get() = size + estimatedExternalSize

    @get:Synchronized
    val freeSlots
        get() = capacity - cacheImplementation.size

    @get:Synchronized
    val isFull
        get() = freeSlots == 0

    @Synchronized
    override fun clear() {
        cacheImplementation.clear()
    }

    @Synchronized
    override fun deepClear() {
        externalClear()
        clear()
    }

    @Synchronized
    fun externalClear() {
        loader.deleteAll(topic)
    }

    @Synchronized
    override fun load() {
        if (freeSlots > 0) {
            loadNow()
        }
    }

    @Synchronized
    override fun loadNow(): Collection {
        if (freeSlots <= 0) return listOf()

        return try {
            ++loadCount
            loader.loadToNow(cacheImplementation, freeSlots, topic, transformer)
        } catch (e: Exception) {
            logger.warn("Failed to load", e)
            listOf()
        }
    }

    @Synchronized
    override fun shuffle() {
        val l = cacheImplementation.toMutableList()
        cacheImplementation.clear()
        l.shuffle()
        cacheImplementation.addAll(l)
    }

    @Synchronized
    override fun add(url: UrlAware) = offer(url)

    @Synchronized
    override fun addAll(urls: Collection): Boolean {
        if (urls.size > freeSlots) {
            val n = freeSlots
            // TODO: can be optimized
            super.addAll(urls.take(n))
            overflow(urls.drop(n))
        } else {
            super.addAll(urls)
        }
        return true
    }

    @Synchronized
    override fun offer(url: UrlAware): Boolean {
        return if (!url.isPersistable || freeSlots > 0) {
            cacheImplementation.add(url)
        } else {
            overflow(url)
            true
        }
    }

    @Synchronized
    override fun removeIf(filter: Predicate): Boolean {
        return cacheImplementation.removeIf(filter)
    }

    @Synchronized
    override fun iterator(): MutableIterator = refreshIfNecessary().cacheImplementation.iterator()

    @Synchronized
    override fun peek(): UrlAware? {
        refreshIfNecessary()
        return cacheImplementation.peek()
    }

    @Synchronized
    override fun poll(): UrlAware? {
        refreshIfNecessary()
        return cacheImplementation.poll()
    }

    @Synchronized
    override fun overflow(url: UrlAware) {
        loader.save(url, topic)
        ++savedCount
    }

    @Synchronized
    override fun overflow(urls: List) {
        try {
            loader.saveAll(urls, topic)
            savedCount += urls.size
        } catch (e: Exception) {
            logger.warn("Failed to save urls", e)
        }
    }

    private fun refreshIfNecessary(): AbstractLoadingQueue {
        if (cacheImplementation.isEmpty()) {
            load()
        }

        return this
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy