ai.platon.pulsar.common.collect.UrlFeederHelper.kt Maven / Gradle / Ivy
package ai.platon.pulsar.common.collect
import ai.platon.pulsar.common.Priority13
import ai.platon.pulsar.common.collect.collector.DataCollector
import ai.platon.pulsar.common.collect.collector.PriorityDataCollector
import ai.platon.pulsar.common.collect.collector.QueueCollector
import ai.platon.pulsar.common.collect.collector.UrlCacheCollector
import ai.platon.pulsar.common.getLogger
import ai.platon.pulsar.common.urls.UrlAware
import java.util.*
import java.util.concurrent.ConcurrentLinkedQueue
/**
* This is a helper class that queries or inserts a data collector from or into a URLFeeder.
* */
class UrlFeederHelper(val feeder: UrlFeeder) {
private val dcLogger = getLogger(DataCollector::class)
private val urlPool get() = feeder.urlPool
fun findByName(name: String): List> = feeder.findByName(name)
fun findByName(names: Iterable): List> =
feeder.findByName(names)
fun findByName(regex: Regex): List> = feeder.findByName(regex)
fun findByNameLike(name: String): List> = feeder.findByNameLike(name)
fun contains(name: String): Boolean = findByName(name).isNotEmpty()
fun contains(names: Iterable): Boolean = findByName(names).isNotEmpty()
fun contains(regex: Regex): Boolean = findByName(regex).isNotEmpty()
fun containsLike(name: String): Boolean = findByNameLike(name).isNotEmpty()
fun addDefaults() {
feeder.addDefaultCollectors()
}
fun add(collector: PriorityDataCollector) {
addAll(listOf(collector))
}
fun addAll(collectors: Iterable>) {
collectors.filterIsInstance().forEach {
urlPool.orderedCaches[it.priority] = it.urlCache
}
collectors.forEach { report(it) }
feeder.addCollectors(collectors)
}
fun create(
name: String, priority: Int = Priority13.NORMAL.value, queue: Queue = ConcurrentLinkedQueue()
): QueueCollector {
val collector = QueueCollector(queue, priority).also { it.name = name }
feeder.addCollector(collector)
report(collector)
return collector
}
fun create(priority: Int, urlLoader: ExternalUrlLoader): UrlCacheCollector {
return create("", priority, urlLoader).also { it.name = "LFC@" + it.id }
}
fun create(name: String, priority: Int, urlLoader: ExternalUrlLoader): UrlCacheCollector {
val urlCache = LoadingUrlCache(name, priority, urlLoader)
urlPool.unorderedCaches.add(urlCache)
val collector = UrlCacheCollector(urlCache).also { it.name = name }
report(collector)
feeder.addCollector(collector)
return collector
}
fun create(priority: Int): UrlCacheCollector {
return create("", priority).also { it.name = "FC@" + it.id }
}
fun create(name: String, priority: Int): UrlCacheCollector {
val urlCache = ConcurrentUrlCache(name)
urlPool.unorderedCaches.add(urlCache)
val collector = UrlCacheCollector(urlCache).also { it.name = name }
feeder.addCollector(collector)
report(collector)
return collector
}
fun remove(name: String): DataCollector? {
return removeAll(listOf(name)).firstOrNull()
}
fun removeAll(names: Iterable): Collection> {
val collectors = feeder.findByName(names)
return removeAll(collectors)
}
fun removeAll(regex: Regex): Collection> {
val collectors = feeder.findByName(regex)
return removeAll(collectors)
}
fun removeAllLike(name: String): Collection> {
return removeAll(".*$name.*".toRegex())
}
fun removeAll(collectors: Collection>): Collection> {
feeder.removeAll(collectors)
collectors.filterIsInstance().map { it.urlCache }
.let { urlPool.unorderedCaches.removeAll(it) }
if (collectors.isNotEmpty()) {
dcLogger.info("Removed collectors: " + collectors.joinToString { it.name })
collectors.forEachIndexed { i, c -> dcLogger.info("${i + 1}.\t$c") }
dcLogger.info("")
}
return collectors
}
fun report(collector: DataCollector, message: String = "") {
val msg = if (message.isBlank()) "" else " | $message"
dcLogger.info("Task <{}> has {}/{} items{}, adding to {}@{}",
collector.name, collector.size, collector.estimatedSize, msg,
feeder.openCollectors.javaClass.simpleName,
feeder.openCollectors.hashCode())
dcLogger.info("{}", collector)
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy