All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ai.platon.pulsar.common.collect.UrlFeederHelper.kt Maven / Gradle / Ivy

There is a newer version: 2.1.0
Show newest version
package ai.platon.pulsar.common.collect

import ai.platon.pulsar.common.Priority13
import ai.platon.pulsar.common.collect.collector.DataCollector
import ai.platon.pulsar.common.collect.collector.PriorityDataCollector
import ai.platon.pulsar.common.collect.collector.QueueCollector
import ai.platon.pulsar.common.collect.collector.UrlCacheCollector
import ai.platon.pulsar.common.getLogger
import ai.platon.pulsar.common.urls.UrlAware
import java.util.*
import java.util.concurrent.ConcurrentLinkedQueue

/**
 * This is a helper class that queries or inserts a data collector from or into a URLFeeder.
 * */
class UrlFeederHelper(val feeder: UrlFeeder) {
    private val dcLogger = getLogger(DataCollector::class)
    private val urlPool get() = feeder.urlPool

    fun findByName(name: String): List> = feeder.findByName(name)

    fun findByName(names: Iterable): List> =
        feeder.findByName(names)

    fun findByName(regex: Regex): List> = feeder.findByName(regex)

    fun findByNameLike(name: String): List> = feeder.findByNameLike(name)

    fun contains(name: String): Boolean = findByName(name).isNotEmpty()

    fun contains(names: Iterable): Boolean = findByName(names).isNotEmpty()

    fun contains(regex: Regex): Boolean = findByName(regex).isNotEmpty()

    fun containsLike(name: String): Boolean = findByNameLike(name).isNotEmpty()

    fun addDefaults() {
        feeder.addDefaultCollectors()
    }

    fun add(collector: PriorityDataCollector) {
        addAll(listOf(collector))
    }

    fun addAll(collectors: Iterable>) {
        collectors.filterIsInstance().forEach {
            urlPool.orderedCaches[it.priority] = it.urlCache
        }
        collectors.forEach { report(it) }
        feeder.addCollectors(collectors)
    }

    fun create(
        name: String, priority: Int = Priority13.NORMAL.value, queue: Queue = ConcurrentLinkedQueue()
    ): QueueCollector {
        val collector = QueueCollector(queue, priority).also { it.name = name }

        feeder.addCollector(collector)
        report(collector)

        return collector
    }

    fun create(priority: Int, urlLoader: ExternalUrlLoader): UrlCacheCollector {
        return create("", priority, urlLoader).also { it.name = "LFC@" + it.id }
    }

    fun create(name: String, priority: Int, urlLoader: ExternalUrlLoader): UrlCacheCollector {
        val urlCache = LoadingUrlCache(name, priority, urlLoader)
        urlPool.unorderedCaches.add(urlCache)
        val collector = UrlCacheCollector(urlCache).also { it.name = name }

        report(collector)
        feeder.addCollector(collector)

        return collector
    }

    fun create(priority: Int): UrlCacheCollector {
        return create("", priority).also { it.name = "FC@" + it.id }
    }

    fun create(name: String, priority: Int): UrlCacheCollector {
        val urlCache = ConcurrentUrlCache(name)
        urlPool.unorderedCaches.add(urlCache)
        val collector = UrlCacheCollector(urlCache).also { it.name = name }

        feeder.addCollector(collector)
        report(collector)

        return collector
    }

    fun remove(name: String): DataCollector? {
        return removeAll(listOf(name)).firstOrNull()
    }

    fun removeAll(names: Iterable): Collection> {
        val collectors = feeder.findByName(names)
        return removeAll(collectors)
    }

    fun removeAll(regex: Regex): Collection> {
        val collectors = feeder.findByName(regex)
        return removeAll(collectors)
    }

    fun removeAllLike(name: String): Collection> {
        return removeAll(".*$name.*".toRegex())
    }

    fun removeAll(collectors: Collection>): Collection> {
        feeder.removeAll(collectors)
        collectors.filterIsInstance().map { it.urlCache }
            .let { urlPool.unorderedCaches.removeAll(it) }

        if (collectors.isNotEmpty()) {
            dcLogger.info("Removed collectors: " + collectors.joinToString { it.name })
            collectors.forEachIndexed { i, c -> dcLogger.info("${i + 1}.\t$c") }
            dcLogger.info("")
        }
        return collectors
    }

    fun report(collector: DataCollector, message: String = "") {
        val msg = if (message.isBlank()) "" else " | $message"

        dcLogger.info("Task <{}> has {}/{} items{}, adding to {}@{}",
            collector.name, collector.size, collector.estimatedSize, msg,
            feeder.openCollectors.javaClass.simpleName,
            feeder.openCollectors.hashCode())
        dcLogger.info("{}", collector)
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy